summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--00-RELEASENOTES2
-rw-r--r--BUGS2
-rw-r--r--CONTRIBUTING16
-rw-r--r--COPYING2
-rw-r--r--README154
-rw-r--r--README.md446
-rw-r--r--deps/Makefile11
-rw-r--r--deps/README.md66
-rw-r--r--deps/hiredis/.gitignore1
-rw-r--r--deps/hiredis/.travis.yml35
-rw-r--r--deps/hiredis/CHANGELOG.md125
-rw-r--r--deps/hiredis/Makefile107
-rw-r--r--deps/hiredis/README.md203
-rw-r--r--deps/hiredis/adapters/glib.h153
-rw-r--r--deps/hiredis/adapters/ivykis.h81
-rw-r--r--deps/hiredis/adapters/libevent.h24
-rw-r--r--deps/hiredis/adapters/libuv.h7
-rw-r--r--deps/hiredis/adapters/macosx.h114
-rw-r--r--deps/hiredis/adapters/qt.h135
-rw-r--r--deps/hiredis/appveyor.yml36
-rw-r--r--deps/hiredis/async.c52
-rw-r--r--deps/hiredis/async.h3
-rw-r--r--deps/hiredis/dict.c4
-rw-r--r--deps/hiredis/examples/example-glib.c73
-rw-r--r--deps/hiredis/examples/example-ivykis.c58
-rw-r--r--deps/hiredis/examples/example-macosx.c66
-rw-r--r--deps/hiredis/examples/example-qt.cpp46
-rw-r--r--deps/hiredis/examples/example-qt.h32
-rw-r--r--deps/hiredis/examples/example.c2
-rw-r--r--deps/hiredis/fmacros.h15
-rw-r--r--deps/hiredis/hiredis.c719
-rw-r--r--deps/hiredis/hiredis.h153
-rw-r--r--deps/hiredis/net.c133
-rw-r--r--deps/hiredis/net.h6
-rw-r--r--deps/hiredis/read.c525
-rw-r--r--deps/hiredis/read.h111
-rw-r--r--deps/hiredis/sds.c442
-rw-r--r--deps/hiredis/sds.h194
-rw-r--r--deps/hiredis/sdsalloc.h42
-rw-r--r--deps/hiredis/test.c118
-rw-r--r--deps/hiredis/win32.h42
-rw-r--r--deps/hiredis/zmalloc.h13
-rw-r--r--deps/jemalloc/.autom4te.cfg3
-rw-r--r--deps/jemalloc/.gitattributes1
-rw-r--r--deps/jemalloc/.gitignore7
-rw-r--r--deps/jemalloc/COPYING4
-rw-r--r--deps/jemalloc/ChangeLog250
-rw-r--r--deps/jemalloc/INSTALL148
-rw-r--r--deps/jemalloc/Makefile.in112
-rw-r--r--deps/jemalloc/VERSION2
-rw-r--r--deps/jemalloc/bin/jemalloc-config.in79
-rw-r--r--[-rwxr-xr-x]deps/jemalloc/bin/jeprof.in (renamed from deps/jemalloc/bin/pprof)502
-rwxr-xr-xdeps/jemalloc/config.guess192
-rwxr-xr-xdeps/jemalloc/config.sub22
-rwxr-xr-xdeps/jemalloc/configure1390
-rw-r--r--deps/jemalloc/configure.ac590
-rw-r--r--deps/jemalloc/doc/jemalloc.3881
-rw-r--r--deps/jemalloc/doc/jemalloc.html897
-rw-r--r--deps/jemalloc/doc/jemalloc.xml.in1207
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena.h1032
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic.h477
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bitmap.h58
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk.h62
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_dss.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_mmap.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ckh.h8
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ctl.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent.h217
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hash.h13
-rw-r--r--deps/jemalloc/include/jemalloc/internal/huge.h36
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in874
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h64
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in91
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/pages.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/private_symbols.txt336
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prng.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof.h702
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ql.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/qr.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/quarantine.h21
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rb.h24
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree.h366
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/size_classes.sh290
-rw-r--r--deps/jemalloc/include/jemalloc/internal/stats.h64
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache.h305
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd.h539
-rw-r--r--deps/jemalloc/include/jemalloc/internal/util.h174
-rw-r--r--deps/jemalloc/include/jemalloc/internal/valgrind.h112
-rwxr-xr-xdeps/jemalloc/include/jemalloc/jemalloc.sh4
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_defs.h.in17
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_macros.h.in97
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_protos.h.in86
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in57
-rw-r--r--deps/jemalloc/include/msvc_compat/C99/stdbool.h (renamed from deps/jemalloc/include/msvc_compat/stdbool.h)4
-rw-r--r--deps/jemalloc/include/msvc_compat/C99/stdint.h (renamed from deps/jemalloc/include/msvc_compat/stdint.h)0
-rw-r--r--deps/jemalloc/include/msvc_compat/inttypes.h313
-rw-r--r--deps/jemalloc/include/msvc_compat/strings.h10
-rw-r--r--deps/jemalloc/include/msvc_compat/windows_extra.h26
-rw-r--r--deps/jemalloc/jemalloc.pc.in12
-rw-r--r--deps/jemalloc/src/arena.c2899
-rw-r--r--deps/jemalloc/src/base.c178
-rw-r--r--deps/jemalloc/src/bitmap.c18
-rw-r--r--deps/jemalloc/src/chunk.c792
-rw-r--r--deps/jemalloc/src/chunk_dss.c54
-rw-r--r--deps/jemalloc/src/chunk_mmap.c154
-rw-r--r--deps/jemalloc/src/ckh.c61
-rw-r--r--deps/jemalloc/src/ctl.c1085
-rw-r--r--deps/jemalloc/src/extent.c40
-rw-r--r--deps/jemalloc/src/huge.c546
-rw-r--r--deps/jemalloc/src/jemalloc.c2106
-rw-r--r--deps/jemalloc/src/mutex.c10
-rw-r--r--deps/jemalloc/src/pages.c173
-rw-r--r--deps/jemalloc/src/prof.c1821
-rw-r--r--deps/jemalloc/src/quarantine.c132
-rw-r--r--deps/jemalloc/src/rtree.c138
-rw-r--r--deps/jemalloc/src/stats.c479
-rw-r--r--deps/jemalloc/src/tcache.c320
-rw-r--r--deps/jemalloc/src/tsd.c62
-rw-r--r--deps/jemalloc/src/util.c30
-rw-r--r--deps/jemalloc/src/valgrind.c34
-rw-r--r--deps/jemalloc/src/zone.c34
-rw-r--r--deps/jemalloc/test/include/test/btalloc.h31
-rw-r--r--deps/jemalloc/test/include/test/jemalloc_test.h.in12
-rw-r--r--deps/jemalloc/test/include/test/jemalloc_test_defs.h.in6
-rw-r--r--deps/jemalloc/test/include/test/math.h2
-rw-r--r--deps/jemalloc/test/include/test/mq.h19
-rw-r--r--deps/jemalloc/test/include/test/test.h390
-rw-r--r--deps/jemalloc/test/include/test/thd.h2
-rw-r--r--deps/jemalloc/test/include/test/timer.h26
-rw-r--r--deps/jemalloc/test/integration/MALLOCX_ARENA.c21
-rw-r--r--deps/jemalloc/test/integration/allocm.c107
-rw-r--r--deps/jemalloc/test/integration/chunk.c276
-rw-r--r--deps/jemalloc/test/integration/mallocx.c139
-rw-r--r--deps/jemalloc/test/integration/mremap.c45
-rw-r--r--deps/jemalloc/test/integration/overflow.c49
-rw-r--r--deps/jemalloc/test/integration/rallocm.c111
-rw-r--r--deps/jemalloc/test/integration/rallocx.c15
-rw-r--r--deps/jemalloc/test/integration/sdallocx.c57
-rw-r--r--deps/jemalloc/test/integration/xallocx.c414
-rw-r--r--deps/jemalloc/test/src/SFMT.c22
-rw-r--r--deps/jemalloc/test/src/btalloc.c8
-rw-r--r--deps/jemalloc/test/src/btalloc_0.c3
-rw-r--r--deps/jemalloc/test/src/btalloc_1.c3
-rw-r--r--deps/jemalloc/test/src/mq.c29
-rw-r--r--deps/jemalloc/test/src/mtx.c4
-rw-r--r--deps/jemalloc/test/src/test.c23
-rw-r--r--deps/jemalloc/test/src/thd.c6
-rw-r--r--deps/jemalloc/test/src/timer.c85
-rw-r--r--deps/jemalloc/test/stress/microbench.c181
-rw-r--r--deps/jemalloc/test/unit/SFMT.c2008
-rw-r--r--deps/jemalloc/test/unit/atomic.c122
-rw-r--r--deps/jemalloc/test/unit/bitmap.c24
-rw-r--r--deps/jemalloc/test/unit/ckh.c68
-rw-r--r--deps/jemalloc/test/unit/hash.c4
-rw-r--r--deps/jemalloc/test/unit/junk.c96
-rw-r--r--deps/jemalloc/test/unit/junk_alloc.c3
-rw-r--r--deps/jemalloc/test/unit/junk_free.c3
-rw-r--r--deps/jemalloc/test/unit/lg_chunk.c26
-rw-r--r--deps/jemalloc/test/unit/mallctl.c256
-rw-r--r--deps/jemalloc/test/unit/math.c6
-rw-r--r--deps/jemalloc/test/unit/mq.c3
-rw-r--r--deps/jemalloc/test/unit/prof_accum.c9
-rw-r--r--deps/jemalloc/test/unit/prof_accum.h35
-rw-r--r--deps/jemalloc/test/unit/prof_accum_a.c3
-rw-r--r--deps/jemalloc/test/unit/prof_accum_b.c3
-rw-r--r--deps/jemalloc/test/unit/prof_active.c136
-rw-r--r--deps/jemalloc/test/unit/prof_gdump.c29
-rw-r--r--deps/jemalloc/test/unit/prof_reset.c302
-rw-r--r--deps/jemalloc/test/unit/prof_thread_name.c129
-rw-r--r--deps/jemalloc/test/unit/rb.c7
-rw-r--r--deps/jemalloc/test/unit/rtree.c93
-rw-r--r--deps/jemalloc/test/unit/size_classes.c89
-rw-r--r--deps/jemalloc/test/unit/stats.c171
-rw-r--r--deps/jemalloc/test/unit/tsd.c48
-rw-r--r--deps/jemalloc/test/unit/util.c98
-rw-r--r--deps/jemalloc/test/unit/zero.c4
-rw-r--r--deps/linenoise/.gitignore4
-rw-r--r--deps/linenoise/README.markdown197
-rw-r--r--deps/linenoise/example.c55
-rw-r--r--deps/linenoise/linenoise.c209
-rw-r--r--deps/linenoise/linenoise.h21
-rw-r--r--deps/lua/src/Makefile5
-rw-r--r--deps/lua/src/fpconv.c205
-rw-r--r--deps/lua/src/fpconv.h22
-rw-r--r--deps/lua/src/ldo.c2
-rw-r--r--deps/lua/src/lua_bit.c189
-rw-r--r--deps/lua/src/lua_cjson.c730
-rw-r--r--deps/lua/src/lua_cmsgpack.c526
-rw-r--r--deps/lua/src/lua_struct.c10
-rw-r--r--deps/lua/src/strbuf.c6
-rw-r--r--deps/lua/src/strbuf.h16
-rw-r--r--redis.conf582
-rw-r--r--sentinel.conf41
-rw-r--r--src/Makefile113
-rw-r--r--src/Makefile.dep142
-rw-r--r--src/adlist.c55
-rw-r--r--src/adlist.h2
-rw-r--r--src/ae.c95
-rw-r--r--src/ae.h6
-rw-r--r--src/ae_epoll.c6
-rw-r--r--src/ae_select.c1
-rw-r--r--src/anet.c138
-rw-r--r--src/anet.h14
-rw-r--r--src/aof.c655
-rw-r--r--src/atomicvar.h133
-rw-r--r--src/bio.c86
-rw-r--r--src/bio.h9
-rw-r--r--src/bitops.c749
-rw-r--r--src/blocked.c107
-rw-r--r--src/childinfo.c85
-rw-r--r--src/cluster.c2326
-rw-r--r--src/cluster.h229
-rw-r--r--src/config.c1122
-rw-r--r--src/config.h42
-rw-r--r--src/crc16.c2
-rw-r--r--src/crc64.c8
-rw-r--r--src/crc64.h4
-rw-r--r--src/db.c863
-rw-r--r--src/debug.c634
-rw-r--r--src/debugmacro.h41
-rw-r--r--src/defrag.c579
-rw-r--r--src/dict.c669
-rw-r--r--src/dict.h52
-rw-r--r--src/endianconv.c8
-rw-r--r--src/endianconv.h4
-rw-r--r--src/evict.c567
-rw-r--r--src/expire.c504
-rw-r--r--src/fmacros.h5
-rw-r--r--src/geo.c818
-rw-r--r--src/geo.h22
-rw-r--r--src/geohash.c295
-rw-r--r--src/geohash.h118
-rw-r--r--src/geohash_helper.c235
-rw-r--r--src/geohash_helper.h70
-rw-r--r--src/help.h211
-rw-r--r--src/hyperloglog.c132
-rw-r--r--src/intset.c57
-rw-r--r--src/intset.h6
-rw-r--r--src/latency.c70
-rw-r--r--src/latency.h5
-rw-r--r--src/lazyfree.c135
-rw-r--r--src/lzfP.h56
-rw-r--r--src/lzf_c.c42
-rw-r--r--src/lzf_d.c59
-rw-r--r--src/memtest.c227
-rw-r--r--src/module.c3932
-rw-r--r--src/modules/.gitignore2
-rw-r--r--src/modules/Makefile42
-rw-r--r--src/modules/gendoc.rb51
-rw-r--r--src/modules/helloblock.c196
-rw-r--r--src/modules/hellotype.c286
-rw-r--r--src/modules/helloworld.c621
-rw-r--r--src/modules/testmodule.c237
-rw-r--r--src/multi.c89
-rw-r--r--src/networking.c1188
-rw-r--r--src/notify.c54
-rw-r--r--src/object.c817
-rw-r--r--src/pubsub.c46
-rw-r--r--src/quicklist.c2651
-rw-r--r--src/quicklist.h169
-rw-r--r--src/rand.c4
-rw-r--r--src/rax.c1733
-rw-r--r--src/rax.h160
-rw-r--r--src/rax_malloc.h44
-rw-r--r--src/rdb.c1606
-rw-r--r--src/rdb.h121
-rw-r--r--src/redis-benchmark.c161
-rw-r--r--src/redis-check-aof.c41
-rw-r--r--src/redis-check-dump.c768
-rw-r--r--src/redis-check-rdb.c360
-rw-r--r--src/redis-cli.c1032
-rwxr-xr-xsrc/redis-trib.rb554
-rw-r--r--src/redis.h1530
-rw-r--r--src/redisassert.h6
-rw-r--r--src/redismodule.h358
-rw-r--r--src/replication.c1839
-rw-r--r--src/rio.c182
-rw-r--r--src/rio.h21
-rw-r--r--src/scripting.c1761
-rw-r--r--src/sds.c439
-rw-r--r--src/sds.h194
-rw-r--r--src/sdsalloc.h42
-rw-r--r--src/sentinel.c1427
-rw-r--r--src/server.c (renamed from src/redis.c)2404
-rw-r--r--src/server.h2022
-rw-r--r--src/sha1.c27
-rw-r--r--src/sha1.h15
-rw-r--r--src/siphash.c360
-rw-r--r--src/slowlog.c27
-rw-r--r--src/slowlog.h8
-rw-r--r--src/solarisfixes.h4
-rw-r--r--src/sort.c165
-rw-r--r--src/sparkline.c5
-rw-r--r--src/syncio.c13
-rw-r--r--src/t_hash.c630
-rw-r--r--src/t_list.c694
-rw-r--r--src/t_set.c592
-rw-r--r--src/t_string.c198
-rw-r--r--src/t_zset.c1424
-rw-r--r--src/util.c236
-rw-r--r--src/util.h9
-rw-r--r--src/version.h2
-rw-r--r--src/ziplist.c767
-rw-r--r--src/ziplist.h13
-rw-r--r--src/zipmap.c15
-rw-r--r--src/zipmap.h4
-rw-r--r--src/zmalloc.c171
-rw-r--r--src/zmalloc.h17
-rw-r--r--tests/assets/default.conf1
-rw-r--r--tests/cluster/cluster.tcl2
-rw-r--r--tests/cluster/run.tcl4
-rw-r--r--tests/cluster/tests/03-failover-loop.tcl2
-rw-r--r--tests/cluster/tests/04-resharding.tcl94
-rw-r--r--tests/cluster/tests/05-slave-selection.tcl2
-rw-r--r--tests/cluster/tests/07-replica-migration.tcl56
-rw-r--r--tests/cluster/tests/08-update-msg.tcl90
-rw-r--r--tests/cluster/tests/09-pubsub.tcl40
-rw-r--r--tests/cluster/tests/10-manual-failover.tcl192
-rw-r--r--tests/cluster/tests/11-manual-takeover.tcl59
-rw-r--r--tests/cluster/tests/12-replica-migration-2.tcl64
-rw-r--r--tests/cluster/tests/helpers/onlydots.tcl16
-rw-r--r--tests/cluster/tests/includes/init-tests.tcl9
-rw-r--r--tests/instances.tcl134
-rw-r--r--tests/integration/aof.tcl118
-rw-r--r--tests/integration/logging.tcl24
-rw-r--r--tests/integration/psync2-reg.tcl78
-rw-r--r--tests/integration/psync2.tcl182
-rw-r--r--tests/integration/rdb.tcl32
-rw-r--r--tests/integration/replication-3.tcl12
-rw-r--r--tests/integration/replication-4.tcl19
-rw-r--r--tests/integration/replication-psync.tcl65
-rw-r--r--tests/integration/replication.tcl248
-rw-r--r--tests/sentinel/run.tcl3
-rw-r--r--tests/sentinel/tests/05-manual.tcl3
-rw-r--r--tests/sentinel/tests/06-ckquorum.tcl34
-rw-r--r--tests/sentinel/tests/07-down-conditions.tcl68
-rw-r--r--tests/support/cluster.tcl8
-rw-r--r--tests/support/redis.tcl2
-rw-r--r--tests/support/server.tcl37
-rw-r--r--tests/support/test.tcl15
-rw-r--r--tests/support/util.tcl70
-rw-r--r--tests/test_helper.tcl102
-rw-r--r--tests/unit/aofrw.tcl107
-rw-r--r--tests/unit/auth.tcl2
-rw-r--r--tests/unit/bitfield.tcl201
-rw-r--r--tests/unit/bitops.tcl16
-rw-r--r--tests/unit/dump.tcl95
-rw-r--r--tests/unit/expire.tcl21
-rw-r--r--tests/unit/geo.tcl311
-rw-r--r--tests/unit/hyperloglog.tcl19
-rw-r--r--tests/unit/introspection-2.tcl23
-rw-r--r--tests/unit/introspection.tcl9
-rw-r--r--tests/unit/keyspace.tcl275
-rw-r--r--tests/unit/lazyfree.tcl39
-rw-r--r--tests/unit/maxmemory.tcl10
-rw-r--r--tests/unit/memefficiency.tcl55
-rw-r--r--tests/unit/other.tcl3
-rw-r--r--tests/unit/pubsub.tcl4
-rw-r--r--tests/unit/scan.tcl10
-rw-r--r--tests/unit/scripting.tcl394
-rw-r--r--tests/unit/slowlog.tcl13
-rw-r--r--tests/unit/sort.tcl41
-rw-r--r--tests/unit/type/hash.tcl76
-rw-r--r--tests/unit/type/incr.tcl147
-rw-r--r--tests/unit/type/list-2.tcl9
-rw-r--r--tests/unit/type/list-3.tcl47
-rw-r--r--tests/unit/type/list.tcl173
-rw-r--r--tests/unit/type/set.tcl70
-rw-r--r--tests/unit/type/string.tcl (renamed from tests/unit/basic.tcl)372
-rw-r--r--tests/unit/type/zset.tcl86
-rw-r--r--tests/unit/wait.tcl42
-rw-r--r--utils/cluster_fail_time.tcl50
-rw-r--r--utils/corrupt_rdb.c44
-rw-r--r--utils/create-cluster/.gitignore5
-rw-r--r--utils/create-cluster/README27
-rwxr-xr-xutils/create-cluster/create-cluster102
-rwxr-xr-xutils/generate-command-help.rb6
-rw-r--r--utils/graphs/commits-over-time/README.md16
-rwxr-xr-xutils/graphs/commits-over-time/genhtml.tcl96
-rw-r--r--utils/hashtable/README13
-rw-r--r--utils/hashtable/rehashing.c142
-rw-r--r--utils/hyperloglog/hll-gnuplot-graph.rb2
-rwxr-xr-xutils/install_server.sh99
-rw-r--r--utils/lru/README10
-rw-r--r--utils/lru/lfu-simulation.c158
-rw-r--r--utils/lru/test-lru.rb301
-rwxr-xr-xutils/redis_init_script.tpl5
-rwxr-xr-xutils/releasetools/01_create_tarball.sh (renamed from utils/mkrelease.sh)1
-rwxr-xr-xutils/releasetools/02_upload_tarball.sh6
-rwxr-xr-xutils/releasetools/03_test_release.sh26
-rwxr-xr-xutils/releasetools/04_release_hash.sh8
-rwxr-xr-xutils/releasetools/changelog.tcl30
-rwxr-xr-xutils/whatisdoing.sh8
397 files changed, 66191 insertions, 24013 deletions
diff --git a/.gitignore b/.gitignore
index cf904522d..a188cfc82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
dump.rdb
redis-benchmark
redis-check-aof
+redis-check-rdb
redis-check-dump
redis-cli
redis-sentinel
@@ -19,9 +20,11 @@ src/transfer.sh
src/configs
redis.ds
src/redis.conf
+src/nodes.conf
deps/lua/src/lua
deps/lua/src/luac
deps/lua/src/liblua.a
.make-*
.prerequisites
*.dSYM
+Makefile.dep
diff --git a/00-RELEASENOTES b/00-RELEASENOTES
index 81ff184fe..ce472159e 100644
--- a/00-RELEASENOTES
+++ b/00-RELEASENOTES
@@ -5,7 +5,7 @@ There is no release notes for this branch, it gets forked into another branch
every time there is a partial feature freeze in order to eventually create
a new stable release.
-Usually "unstable" is stable enough for you to use it in development enviromnets
+Usually "unstable" is stable enough for you to use it in development environments
however you should never use it in production environments. It is possible
to download the latest stable release here:
diff --git a/BUGS b/BUGS
index 96d52bf8b..a8e936892 100644
--- a/BUGS
+++ b/BUGS
@@ -1 +1 @@
-Plese check https://github.com/antirez/redis/issues
+Please check https://github.com/antirez/redis/issues
diff --git a/CONTRIBUTING b/CONTRIBUTING
index f7b6836f7..f57de3fd9 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -12,17 +12,19 @@ each source file that you contribute.
PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected
bugs in the Github issues system. We'll be very happy to help you and provide
- all the support in the Redis Google Group.
+ all the support at the Reddit sub:
- Redis Google Group address:
-
- https://groups.google.com/forum/?fromgroups#!forum/redis-db
+ http://reddit.com/r/redis
+
+ There is also an active community of Redis users at Stack Overflow:
+
+ http://stackoverflow.com/questions/tagged/redis
# How to provide a patch for a new feature
-1. Drop a message to the Redis Google Group with a proposal of semantics/API.
+1. If it is a major feature or a semantical change, please post it as a new submission in r/redis on Reddit at http://reddit.com/r/redis. Try to be passionate about why the feature is needed, make users upvote your proposal to gain traction and so forth. Read feedbacks about the community. But in this first step **please don't write code yet**.
-2. If in step 1 you get an acknowledge from the project leaders, use the
+2. If in step 1 you get an acknowledgment from the project leaders, use the
following procedure to submit a patch:
a. Fork Redis on github ( http://help.github.com/fork-a-repo/ )
@@ -31,4 +33,6 @@ each source file that you contribute.
d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
e. Done :)
+For minor fixes just open a pull request on Github.
+
Thanks!
diff --git a/COPYING b/COPYING
index a58de44dd..ac68e012b 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (c) 2006-2014, Salvatore Sanfilippo
+Copyright (c) 2006-2015, Salvatore Sanfilippo
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/README b/README
deleted file mode 100644
index b7a12b828..000000000
--- a/README
+++ /dev/null
@@ -1,154 +0,0 @@
-Where to find complete Redis documentation?
--------------------------------------------
-
-This README is just a fast "quick start" document. You can find more detailed
-documentation at http://redis.io
-
-Building Redis
---------------
-
-Redis can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD.
-We support big endian and little endian architectures.
-
-It may compile on Solaris derived systems (for instance SmartOS) but our
-support for this platform is "best effort" and Redis is not guaranteed to
-work as well as in Linux, OSX, and *BSD there.
-
-It is as simple as:
-
- % make
-
-You can run a 32 bit Redis binary using:
-
- % make 32bit
-
-After building Redis is a good idea to test it, using:
-
- % make test
-
-Fixing problems building 32 bit binaries
----------
-
-If after building Redis with a 32 bit target you need to rebuild it
-with a 64 bit target, or the other way around, you need to perform a
-"make distclean" in the root directory of the Redis distribution.
-
-In case of build errors when trying to build a 32 bit binary of Redis, try
-the following steps:
-
-* Install the packages libc6-dev-i386 (also try g++-multilib).
-* Try using the following command line instead of "make 32bit":
-
- make CFLAGS="-m32 -march=native" LDFLAGS="-m32"
-
-Allocator
----------
-
-Selecting a non-default memory allocator when building Redis is done by setting
-the `MALLOC` environment variable. Redis is compiled and linked against libc
-malloc by default, with the exception of jemalloc being the default on Linux
-systems. This default was picked because jemalloc has proven to have fewer
-fragmentation problems than libc malloc.
-
-To force compiling against libc malloc, use:
-
- % make MALLOC=libc
-
-To compile against jemalloc on Mac OS X systems, use:
-
- % make MALLOC=jemalloc
-
-Verbose build
--------------
-
-Redis will build with a user friendly colorized output by default.
-If you want to see a more verbose output use the following:
-
- % make V=1
-
-Running Redis
--------------
-
-To run Redis with the default configuration just type:
-
- % cd src
- % ./redis-server
-
-If you want to provide your redis.conf, you have to run it using an additional
-parameter (the path of the configuration file):
-
- % cd src
- % ./redis-server /path/to/redis.conf
-
-It is possible to alter the Redis configuration passing parameters directly
-as options using the command line. Examples:
-
- % ./redis-server --port 9999 --slaveof 127.0.0.1 6379
- % ./redis-server /etc/redis/6379.conf --loglevel debug
-
-All the options in redis.conf are also supported as options using the command
-line, with exactly the same name.
-
-Playing with Redis
-------------------
-
-You can use redis-cli to play with Redis. Start a redis-server instance,
-then in another terminal try the following:
-
- % cd src
- % ./redis-cli
- redis> ping
- PONG
- redis> set foo bar
- OK
- redis> get foo
- "bar"
- redis> incr mycounter
- (integer) 1
- redis> incr mycounter
- (integer) 2
- redis>
-
-You can find the list of all the available commands here:
-
- http://redis.io/commands
-
-Installing Redis
------------------
-
-In order to install Redis binaries into /usr/local/bin just use:
-
- % make install
-
-You can use "make PREFIX=/some/other/directory install" if you wish to use a
-different destination.
-
-Make install will just install binaries in your system, but will not configure
-init scripts and configuration files in the appropriate place. This is not
-needed if you want just to play a bit with Redis, but if you are installing
-it the proper way for a production system, we have a script doing this
-for Ubuntu and Debian systems:
-
- % cd utils
- % ./install_server.sh
-
-The script will ask you a few questions and will setup everything you need
-to run Redis properly as a background daemon that will start again on
-system reboots.
-
-You'll be able to stop and start Redis using the script named
-/etc/init.d/redis_<portnumber>, for instance /etc/init.d/redis_6379.
-
-Code contributions
----
-
-Note: by contributing code to the Redis project in any form, including sending
-a pull request via Github, a code fragment or patch via private email or
-public discussion groups, you agree to release your code under the terms
-of the BSD license that you can find in the COPYING file included in the Redis
-source distribution.
-
-Please see the CONTRIBUTING file in this source distribution for more
-information.
-
-Enjoy!
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..70a15790f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,446 @@
+This README is just a fast *quick start* document. You can find more detailed documentation at http://redis.io.
+
+What is Redis?
+--------------
+
+Redis is often referred as a *data structures* server. What this means is that Redis provides access to mutable data structures via a set of commands, which are sent using a *server-client* model with TCP sockets and a simple protocol. So different processes can query and modify the same data structures in a shared way.
+
+Data structures implemented into Redis have a few special properties:
+
+* Redis cares to store them on disk, even if they are always served and modified into the server memory. This means that Redis is fast, but that is also non-volatile.
+* Implementation of data structures stress on memory efficiency, so data structures inside Redis will likely use less memory compared to the same data structure modeled using an high level programming language.
+* Redis offers a number of features that are natural to find in a database, like replication, tunable levels of durability, cluster, high availability.
+
+Another good example is to think of Redis as a more complex version of memcached, where the operations are not just SETs and GETs, but operations to work with complex data types like Lists, Sets, ordered data structures, and so forth.
+
+If you want to know more, this is a list of selected starting points:
+
+* Introduction to Redis data types. http://redis.io/topics/data-types-intro
+* Try Redis directly inside your browser. http://try.redis.io
+* The full list of Redis commands. http://redis.io/commands
+* There is much more inside the Redis official documentation. http://redis.io/documentation
+
+Building Redis
+--------------
+
+Redis can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD.
+We support big endian and little endian architectures, and both 32 bit
+and 64 bit systems.
+
+It may compile on Solaris derived systems (for instance SmartOS) but our
+support for this platform is *best effort* and Redis is not guaranteed to
+work as well as in Linux, OSX, and \*BSD there.
+
+It is as simple as:
+
+ % make
+
+You can run a 32 bit Redis binary using:
+
+ % make 32bit
+
+After building Redis, it is a good idea to test it using:
+
+ % make test
+
+Fixing build problems with dependencies or cached build options
+---------
+
+Redis has some dependencies which are included into the `deps` directory.
+`make` does not automatically rebuild dependencies even if something in
+the source code of dependencies changes.
+
+When you update the source code with `git pull` or when code inside the
+dependencies tree is modified in any other way, make sure to use the following
+command in order to really clean everything and rebuild from scratch:
+
+ make distclean
+
+This will clean: jemalloc, lua, hiredis, linenoise.
+
+Also if you force certain build options like 32bit target, no C compiler
+optimizations (for debugging purposes), and other similar build time options,
+those options are cached indefinitely until you issue a `make distclean`
+command.
+
+Fixing problems building 32 bit binaries
+---------
+
+If after building Redis with a 32 bit target you need to rebuild it
+with a 64 bit target, or the other way around, you need to perform a
+`make distclean` in the root directory of the Redis distribution.
+
+In case of build errors when trying to build a 32 bit binary of Redis, try
+the following steps:
+
+* Install the packages libc6-dev-i386 (also try g++-multilib).
+* Try using the following command line instead of `make 32bit`:
+ `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"`
+
+Allocator
+---------
+
+Selecting a non-default memory allocator when building Redis is done by setting
+the `MALLOC` environment variable. Redis is compiled and linked against libc
+malloc by default, with the exception of jemalloc being the default on Linux
+systems. This default was picked because jemalloc has proven to have fewer
+fragmentation problems than libc malloc.
+
+To force compiling against libc malloc, use:
+
+ % make MALLOC=libc
+
+To compile against jemalloc on Mac OS X systems, use:
+
+ % make MALLOC=jemalloc
+
+Verbose build
+-------------
+
+Redis will build with a user friendly colorized output by default.
+If you want to see a more verbose output use the following:
+
+ % make V=1
+
+Running Redis
+-------------
+
+To run Redis with the default configuration just type:
+
+ % cd src
+ % ./redis-server
+
+If you want to provide your redis.conf, you have to run it using an additional
+parameter (the path of the configuration file):
+
+ % cd src
+ % ./redis-server /path/to/redis.conf
+
+It is possible to alter the Redis configuration by passing parameters directly
+as options using the command line. Examples:
+
+ % ./redis-server --port 9999 --slaveof 127.0.0.1 6379
+ % ./redis-server /etc/redis/6379.conf --loglevel debug
+
+All the options in redis.conf are also supported as options using the command
+line, with exactly the same name.
+
+Playing with Redis
+------------------
+
+You can use redis-cli to play with Redis. Start a redis-server instance,
+then in another terminal try the following:
+
+ % cd src
+ % ./redis-cli
+ redis> ping
+ PONG
+ redis> set foo bar
+ OK
+ redis> get foo
+ "bar"
+ redis> incr mycounter
+ (integer) 1
+ redis> incr mycounter
+ (integer) 2
+ redis>
+
+You can find the list of all the available commands at http://redis.io/commands.
+
+Installing Redis
+-----------------
+
+In order to install Redis binaries into /usr/local/bin just use:
+
+ % make install
+
+You can use `make PREFIX=/some/other/directory install` if you wish to use a
+different destination.
+
+Make install will just install binaries in your system, but will not configure
+init scripts and configuration files in the appropriate place. This is not
+needed if you want just to play a bit with Redis, but if you are installing
+it the proper way for a production system, we have a script doing this
+for Ubuntu and Debian systems:
+
+ % cd utils
+ % ./install_server.sh
+
+The script will ask you a few questions and will setup everything you need
+to run Redis properly as a background daemon that will start again on
+system reboots.
+
+You'll be able to stop and start Redis using the script named
+`/etc/init.d/redis_<portnumber>`, for instance `/etc/init.d/redis_6379`.
+
+Code contributions
+-----------------
+
+Note: by contributing code to the Redis project in any form, including sending
+a pull request via Github, a code fragment or patch via private email or
+public discussion groups, you agree to release your code under the terms
+of the BSD license that you can find in the [COPYING][1] file included in the Redis
+source distribution.
+
+Please see the [CONTRIBUTING][2] file in this source distribution for more
+information.
+
+[1]: https://github.com/antirez/redis/blob/unstable/COPYING
+[2]: https://github.com/antirez/redis/blob/unstable/CONTRIBUTING
+
+Redis internals
+===
+
+If you are reading this README you are likely in front of a Github page
+or you just untarred the Redis distribution tar ball. In both the cases
+you are basically one step away from the source code, so here we explain
+the Redis source code layout, what is in each file as a general idea, the
+most important functions and structures inside the Redis server and so forth.
+We keep all the discussion at a high level without digging into the details
+since this document would be huge otherwise and our code base changes
+continuously, but a general idea should be a good starting point to
+understand more. Moreover most of the code is heavily commented and easy
+to follow.
+
+Source code layout
+---
+
+The Redis root directory just contains this README, the Makefile which
+calls the real Makefile inside the `src` directory and an example
+configuration for Redis and Sentinel. You can find a few shell
+scripts that are used in order to execute the Redis, Redis Cluster and
+Redis Sentinel unit tests, which are implemented inside the `tests`
+directory.
+
+Inside the root are the following important directories:
+
+* `src`: contains the Redis implementation, written in C.
+* `tests`: contains the unit tests, implemented in Tcl.
+* `deps`: contains libraries Redis uses. Everything needed to compile Redis is inside this directory; your system just needs to provide `libc`, a POSIX compatible interface and a C compiler. Notably `deps` contains a copy of `jemalloc`, which is the default allocator of Redis under Linux. Note that under `deps` there are also things which started with the Redis project, but for which the main repository is not `anitrez/redis`. An exception to this rule is `deps/geohash-int` which is the low level geocoding library used by Redis: it originated from a different project, but at this point it diverged so much that it is developed as a separated entity directly inside the Redis repository.
+
+There are a few more directories but they are not very important for our goals
+here. We'll focus mostly on `src`, where the Redis implementation is contained,
+exploring what there is inside each file. The order in which files are
+exposed is the logical one to follow in order to disclose different layers
+of complexity incrementally.
+
+Note: lately Redis was refactored quite a bit. Function names and file
+names have been changed, so you may find that this documentation reflects the
+`unstable` branch more closely. For instance in Redis 3.0 the `server.c`
+and `server.h` files were named to `redis.c` and `redis.h`. However the overall
+structure is the same. Keep in mind that all the new developments and pull
+requests should be performed against the `unstable` branch.
+
+server.h
+---
+
+The simplest way to understand how a program works is to understand the
+data structures it uses. So we'll start from the main header file of
+Redis, which is `server.h`.
+
+All the server configuration and in general all the shared state is
+defined in a global structure called `server`, of type `struct redisServer`.
+A few important fields in this structure are:
+
+* `server.db` is an array of Redis databases, where data is stored.
+* `server.commands` is the command table.
+* `server.clients` is a linked list of clients connected to the server.
+* `server.master` is a special client, the master, if the instance is a slave.
+
+There are tons of other fields. Most fields are commented directly inside
+the structure definition.
+
+Another important Redis data structure is the one defining a client.
+In the past it was called `redisClient`, now just `client`. The structure
+has many fields, here we'll just show the main ones:
+
+ struct client {
+ int fd;
+ sds querybuf;
+ int argc;
+ robj **argv;
+ redisDb *db;
+ int flags;
+ list *reply;
+ char buf[PROTO_REPLY_CHUNK_BYTES];
+ ... many other fields ...
+ }
+
+The client structure defines a *connected client*:
+
+* The `fd` field is the client socket file descriptor.
+* `argc` and `argv` are populated with the command the client is executing, so that functions implementing a given Redis command can read the arguments.
+* `querybuf` accumulates the requests from the client, which are parsed by the Redis server according to the Redis protocol and executed by calling the implementations of the commands the client is executing.
+* `reply` and `buf` are dynamic and static buffers that accumulate the replies the server sends to the client. These buffers are incrementally written to the socket as soon as the file descriptor is writable.
+
+As you can see in the client structure above, arguments in a command
+are described as `robj` structures. The following is the full `robj`
+structure, which defines a *Redis object*:
+
+ typedef struct redisObject {
+ unsigned type:4;
+ unsigned encoding:4;
+ unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */
+ int refcount;
+ void *ptr;
+ } robj;
+
+Basically this structure can represent all the basic Redis data types like
+strings, lists, sets, sorted sets and so forth. The interesting thing is that
+it has a `type` field, so that it is possible to know what type a given
+object has, and a `refcount`, so that the same object can be referenced
+in multiple places without allocating it multiple times. Finally the `ptr`
+field points to the actual representation of the object, which might vary
+even for the same type, depending on the `encoding` used.
+
+Redis objects are used extensively in the Redis internals, however in order
+to avoid the overhead of indirect accesses, recently in many places
+we just use plain dynamic strings not wrapped inside a Redis object.
+
+server.c
+---
+
+This is the entry point of the Redis server, where the `main()` function
+is defined. The following are the most important steps in order to startup
+the Redis server.
+
+* `initServerConfig()` setups the default values of the `server` structure.
+* `initServer()` allocates the data structures needed to operate, setup the listening socket, and so forth.
+* `aeMain()` starts the event loop which listens for new connections.
+
+There are two special functions called periodically by the event loop:
+
+1. `serverCron()` is called periodically (according to `server.hz` frequency), and performs tasks that must be performed from time to time, like checking for timedout clients.
+2. `beforeSleep()` is called every time the event loop fired, Redis served a few requests, and is returning back into the event loop.
+
+Inside server.c you can find code that handles other vital things of the Redis server:
+
+* `call()` is used in order to call a given command in the context of a given client.
+* `activeExpireCycle()` handles eviciton of keys with a time to live set via the `EXPIRE` command.
+* `freeMemoryIfNeeded()` is called when a new write command should be performed but Redis is out of memory according to the `maxmemory` directive.
+* The global variable `redisCommandTable` defines all the Redis commands, specifying the name of the command, the function implementing the command, the number of arguments required, and other properties of each command.
+
+networking.c
+---
+
+This file defines all the I/O functions with clients, masters and slaves
+(which in Redis are just special clients):
+
+* `createClient()` allocates and initializes a new client.
+* the `addReply*()` family of functions are used by commands implementations in order to append data to the client structure, that will be transmitted to the client as a reply for a given command executed.
+* `writeToClient()` transmits the data pending in the output buffers to the client and is called by the *writable event handler* `sendReplyToClient()`.
+* `readQueryFromClient()` is the *readable event handler* and accumulates data from read from the client into the query buffer.
+* `processInputBuffer()` is the entry point in order to parse the client query buffer according to the Redis protocol. Once commands are ready to be processed, it calls `processCommand()` which is defined inside `server.c` in order to actually execute the command.
+* `freeClient()` deallocates, disconnects and removes a client.
+
+aof.c and rdb.c
+---
+
+As you can guess from the names these files implement the RDB and AOF
+persistence for Redis. Redis uses a persistence model based on the `fork()`
+system call in order to create a thread with the same (shared) memory
+content of the main Redis thread. This secondary thread dumps the content
+of the memory on disk. This is used by `rdb.c` to create the snapshots
+on disk and by `aof.c` in order to perform the AOF rewrite when the
+append only file gets too big.
+
+The implementation inside `aof.c` has additional functions in order to
+implement an API that allows commands to append new commands into the AOF
+file as clients execute them.
+
+The `call()` function defined inside `server.c` is responsible to call
+the functions that in turn will write the commands into the AOF.
+
+db.c
+---
+
+Certain Redis commands operate on specific data types, others are general.
+Examples of generic commands are `DEL` and `EXPIRE`. They operate on keys
+and not on their values specifically. All those generic commands are
+defined inside `db.c`.
+
+Moreover `db.c` implements an API in order to perform certain operations
+on the Redis dataset without directly accessing the internal data structures.
+
+The most important functions inside `db.c` which are used in many commands
+implementations are the following:
+
+* `lookupKeyRead()` and `lookupKeyWrite()` are used in order to get a pointer to the value associated to a given key, or `NULL` if the key does not exist.
+* `dbAdd()` and its higher level counterpart `setKey()` create a new key in a Redis database.
+* `dbDelete()` removes a key and its associated value.
+* `emptyDb()` removes an entire single database or all the databases defined.
+
+The rest of the file implements the generic commands exposed to the client.
+
+object.c
+---
+
+The `robj` structure defining Redis objects was already described. Inside
+`object.c` there are all the functions that operate with Redis objects at
+a basic level, like functions to allocate new objects, handle the reference
+counting and so forth. Notable functions inside this file:
+
+* `incrRefcount()` and `decrRefCount()` are used in order to increment or decrement an object reference count. When it drops to 0 the object is finally freed.
+* `createObject()` allocates a new object. There are also specialized functions to allocate string objects having a specific content, like `createStringObjectFromLongLong()` and similar functions.
+
+This file also implements the `OBJECT` command.
+
+replication.c
+---
+
+This is one of the most complex files inside Redis, it is recommended to
+approach it only after getting a bit familiar with the rest of the code base.
+In this file there is the implementation of both the master and slave role
+of Redis.
+
+One of the most important functions inside this file is `replicationFeedSlaves()` that writes commands to the clients representing slave instances connected
+to our master, so that the slaves can get the writes performed by the clients:
+this way their data set will remain synchronized with the one in the master.
+
+This file also implements both the `SYNC` and `PSYNC` commands that are
+used in order to perform the first synchronization between masters and
+slaves, or to continue the replication after a disconnection.
+
+Other C files
+---
+
+* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c` and `t_zset.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types.
+* `ae.c` implements the Redis event loop, it's a self contained library which is simple to read and understand.
+* `sds.c` is the Redis string library, check http://github.com/antirez/sds for more information.
+* `anet.c` is a library to use POSIX networking in a simpler way compared to the raw interface exposed by the kernel.
+* `dict.c` is an implementation of a non-blocking hash table which rehashes incrementally.
+* `scripting.c` implements Lua scripting. It is completely self contained from the rest of the Redis implementation and is simple enough to understand if you are familar with the Lua API.
+* `cluster.c` implements the Redis Cluster. Probably a good read only after being very familiar with the rest of the Redis code base. If you want to read `cluster.c` make sure to read the [Redis Cluster specification][3].
+
+[3]: http://redis.io/topics/cluster-spec
+
+Anatomy of a Redis command
+---
+
+All the Redis commands are defined in the following way:
+
+ void foobarCommand(client *c) {
+ printf("%s",c->argv[1]->ptr); /* Do something with the argument. */
+ addReply(c,shared.ok); /* Reply something to the client. */
+ }
+
+The command is then referenced inside `server.c` in the command table:
+
+ {"foobar",foobarCommand,2,"rtF",0,NULL,0,0,0,0,0},
+
+In the above example `2` is the number of arguments the command takes,
+while `"rtF"` are the command flags, as documented in the command table
+top comment inside `server.c`.
+
+After the command operates in some way, it returns a reply to the client,
+usually using `addReply()` or a similar function defined inside `networking.c`.
+
+There are tons of commands implementations inside th Redis source code
+that can serve as examples of actual commands implementations. To write
+a few toy commands can be a good exercise to familiarize with the code base.
+
+There are also many other files not described here, but it is useless to
+cover everything. We want to just help you with the first steps.
+Eventually you'll find your way inside the Redis code base :-)
+
+Enjoy!
diff --git a/deps/Makefile b/deps/Makefile
index 5a95545de..e148a331c 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -58,12 +58,17 @@ ifeq ($(uname_S),SunOS)
LUA_CFLAGS= -D__C99FEATURES__=1
endif
-LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI $(CFLAGS)
+LUA_CFLAGS+= -O2 -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' $(CFLAGS)
LUA_LDFLAGS+= $(LDFLAGS)
+# lua's Makefile defines AR="ar rcu", which is unusual, and makes it more
+# challenging to cross-compile lua (and redis). These defines make it easier
+# to fit redis into cross-compilation environments, which typically set AR.
+AR=ar
+ARFLAGS=rcu
lua: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
- cd lua/src && $(MAKE) all CFLAGS="$(LUA_CFLAGS)" MYLDFLAGS="$(LUA_LDFLAGS)"
+ cd lua/src && $(MAKE) all CFLAGS="$(LUA_CFLAGS)" MYLDFLAGS="$(LUA_LDFLAGS)" AR="$(AR) $(ARFLAGS)"
.PHONY: lua
@@ -72,7 +77,7 @@ JEMALLOC_LDFLAGS= $(LDFLAGS)
jemalloc: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
- cd jemalloc && ./configure --with-jemalloc-prefix=je_ --enable-cc-silence CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)"
+ cd jemalloc && ./configure --with-lg-quantum=3 --with-jemalloc-prefix=je_ --enable-cc-silence CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)"
cd jemalloc && $(MAKE) CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)" lib/libjemalloc.a
.PHONY: jemalloc
diff --git a/deps/README.md b/deps/README.md
new file mode 100644
index 000000000..0ce480046
--- /dev/null
+++ b/deps/README.md
@@ -0,0 +1,66 @@
+This directory contains all Redis dependencies, except for the libc that
+should be provided by the operating system.
+
+* **Jemalloc** is our memory allocator, used as replacement for libc malloc on Linux by default. It has good performances and excellent fragmentation behavior. This component is upgraded from time to time.
+* **geohash-int** is inside the dependencies directory but is actually part of the Redis project, since it is our private fork (heavily modified) of a library initially developed for Ardb, which is in turn a fork of Redis.
+* **hiredis** is the official C client library for Redis. It is used by redis-cli, redis-benchmark and Redis Sentinel. It is part of the Redis official ecosystem but is developed externally from the Redis repository, so we just upgrade it as needed.
+* **linenoise** is a readline replacement. It is developed by the same authors of Redis but is managed as a separated project and updated as needed.
+* **lua** is Lua 5.1 with minor changes for security and additional libraries.
+
+How to upgrade the above dependencies
+===
+
+Jemalloc
+---
+
+Jemalloc is unmodified. We only change settings via the `configure` script of Jemalloc using the `--with-lg-quantum` option, setting it to the value of 3 instead of 4. This provides us with more size classes that better suit the Redis data structures, in order to gain memory efficiency.
+
+So in order to upgrade jemalloc:
+
+1. Remove the jemalloc directory.
+2. Substitute it with the new jemalloc source tree.
+
+Geohash
+---
+
+This is never upgraded since it's part of the Redis project. If there are changes to merge from Ardb there is the need to manually check differences, but at this point the source code is pretty different.
+
+Hiredis
+---
+
+Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is adviced to take a lot of care:
+
+1. Check with diff if hiredis API changed and what impact it could have in Redis.
+2. Make sure thet the SDS library inside Hiredis and inside Redis are compatible.
+3. After the upgrade, run the Redis Sentinel test.
+4. Check manually that redis-cli and redis-benchmark behave as expecteed, since we have no tests for CLI utilities currently.
+
+Linenoise
+---
+
+Linenoise is rarely upgraded as needed. The upgrade process is trivial since
+Redis uses a non modified version of linenoise, so to upgrade just do the
+following:
+
+1. Remove the linenoise directory.
+2. Substitute it with the new linenoise source tree.
+
+Lua
+---
+
+We use Lua 5.1 and no upgrade is planned currently, since we don't want to break
+Lua scripts for new Lua features: in the context of Redis Lua scripts the
+capabilities of 5.1 are usually more than enough, the release is rock solid,
+and we definitely don't want to break old scripts.
+
+So upgrading of Lua is up to the Redis project maintainers and should be a
+manual procedure performed by taking a diff between the different versions.
+
+Currently we have at least the following differences between official Lua 5.1
+and our version:
+
+1. Makefile is modified to allow a different compiler than GCC.
+2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`.
+3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode exectuion.
+
+
diff --git a/deps/hiredis/.gitignore b/deps/hiredis/.gitignore
index 0c166a02e..c44b5c537 100644
--- a/deps/hiredis/.gitignore
+++ b/deps/hiredis/.gitignore
@@ -4,3 +4,4 @@
/*.so
/*.dylib
/*.a
+/*.pc
diff --git a/deps/hiredis/.travis.yml b/deps/hiredis/.travis.yml
index 030427ff4..ad08076d8 100644
--- a/deps/hiredis/.travis.yml
+++ b/deps/hiredis/.travis.yml
@@ -1,6 +1,39 @@
language: c
+sudo: false
compiler:
- gcc
- clang
-script: make && make check
+os:
+ - linux
+ - osx
+
+before_script:
+ - if [ "$TRAVIS_OS_NAME" == "osx" ] ; then brew update; brew install redis; fi
+
+addons:
+ apt:
+ packages:
+ - libc6-dbg
+ - libc6-dev
+ - libc6:i386
+ - libc6-dev-i386
+ - libc6-dbg:i386
+ - gcc-multilib
+ - valgrind
+
+env:
+ - CFLAGS="-Werror"
+ - PRE="valgrind --track-origins=yes --leak-check=full"
+ - TARGET="32bit" TARGET_VARS="32bit-vars" CFLAGS="-Werror"
+ - TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full"
+
+matrix:
+ exclude:
+ - os: osx
+ env: PRE="valgrind --track-origins=yes --leak-check=full"
+
+ - os: osx
+ env: TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full"
+
+script: make $TARGET CFLAGS="$CFLAGS" && make check PRE="$PRE" && make $TARGET_VARS hiredis-example
diff --git a/deps/hiredis/CHANGELOG.md b/deps/hiredis/CHANGELOG.md
index 268b15cd5..f92bcb3c9 100644
--- a/deps/hiredis/CHANGELOG.md
+++ b/deps/hiredis/CHANGELOG.md
@@ -1,3 +1,128 @@
+### 1.0.0 (unreleased)
+
+**Fixes**:
+
+* Catch a buffer overflow when formatting the error message
+* Import latest upstream sds. This breaks applications that are linked against the old hiredis v0.13
+* Fix warnings, when compiled with -Wshadow
+* Make hiredis compile in Cygwin on Windows, now CI-tested
+
+**BREAKING CHANGES**:
+
+* Change `redisReply.len` to `size_t`, as it denotes the the size of a string
+
+User code should compare this to `size_t` values as well.
+If it was used to compare to other values, casting might be necessary or can be removed, if casting was applied before.
+
+* Remove backwards compatibility macro's
+
+This removes the following old function aliases, use the new name now:
+
+| Old | New |
+| --------------------------- | ---------------------- |
+| redisReplyReaderCreate | redisReaderCreate |
+| redisReplyReaderCreate | redisReaderCreate |
+| redisReplyReaderFree | redisReaderFree |
+| redisReplyReaderFeed | redisReaderFeed |
+| redisReplyReaderGetReply | redisReaderGetReply |
+| redisReplyReaderSetPrivdata | redisReaderSetPrivdata |
+| redisReplyReaderGetObject | redisReaderGetObject |
+| redisReplyReaderGetError | redisReaderGetError |
+
+* The `DEBUG` variable in the Makefile was renamed to `DEBUG_FLAGS`
+
+Previously it broke some builds for people that had `DEBUG` set to some arbitrary value,
+due to debugging other software.
+By renaming we avoid unintentional name clashes.
+
+Simply rename `DEBUG` to `DEBUG_FLAGS` in your environment to make it working again.
+
+### 0.13.3 (2015-09-16)
+
+* Revert "Clear `REDIS_CONNECTED` flag when connection is closed".
+* Make tests pass on FreeBSD (Thanks, Giacomo Olgeni)
+
+
+If the `REDIS_CONNECTED` flag is cleared,
+the async onDisconnect callback function will never be called.
+This causes problems as the disconnect is never reported back to the user.
+
+### 0.13.2 (2015-08-25)
+
+* Prevent crash on pending replies in async code (Thanks, @switch-st)
+* Clear `REDIS_CONNECTED` flag when connection is closed (Thanks, Jerry Jacobs)
+* Add MacOS X addapter (Thanks, @dizzus)
+* Add Qt adapter (Thanks, Pietro Cerutti)
+* Add Ivykis adapter (Thanks, Gergely Nagy)
+
+All adapters are provided as is and are only tested where possible.
+
+### 0.13.1 (2015-05-03)
+
+This is a bug fix release.
+The new `reconnect` method introduced new struct members, which clashed with pre-defined names in pre-C99 code.
+Another commit forced C99 compilation just to make it work, but of course this is not desirable for outside projects.
+Other non-C99 code can now use hiredis as usual again.
+Sorry for the inconvenience.
+
+* Fix memory leak in async reply handling (Salvatore Sanfilippo)
+* Rename struct member to avoid name clash with pre-c99 code (Alex Balashov, ncopa)
+
+### 0.13.0 (2015-04-16)
+
+This release adds a minimal Windows compatibility layer.
+The parser, standalone since v0.12.0, can now be compiled on Windows
+(and thus used in other client libraries as well)
+
+* Windows compatibility layer for parser code (tzickel)
+* Properly escape data printed to PKGCONF file (Dan Skorupski)
+* Fix tests when assert() undefined (Keith Bennett, Matt Stancliff)
+* Implement a reconnect method for the client context, this changes the structure of `redisContext` (Aaron Bedra)
+
+### 0.12.1 (2015-01-26)
+
+* Fix `make install`: DESTDIR support, install all required files, install PKGCONF in proper location
+* Fix `make test` as 32 bit build on 64 bit platform
+
+### 0.12.0 (2015-01-22)
+
+* Add optional KeepAlive support
+
+* Try again on EINTR errors
+
+* Add libuv adapter
+
+* Add IPv6 support
+
+* Remove possiblity of multiple close on same fd
+
+* Add ability to bind source address on connect
+
+* Add redisConnectFd() and redisFreeKeepFd()
+
+* Fix getaddrinfo() memory leak
+
+* Free string if it is unused (fixes memory leak)
+
+* Improve redisAppendCommandArgv performance 2.5x
+
+* Add support for SO_REUSEADDR
+
+* Fix redisvFormatCommand format parsing
+
+* Add GLib 2.0 adapter
+
+* Refactor reading code into read.c
+
+* Fix errno error buffers to not clobber errors
+
+* Generate pkgconf during build
+
+* Silence _BSD_SOURCE warnings
+
+* Improve digit counting for multibulk creation
+
+
### 0.11.0
* Increase the maximum multi-bulk reply depth to 7.
diff --git a/deps/hiredis/Makefile b/deps/hiredis/Makefile
index ddcc4e4f6..9a4de8360 100644
--- a/deps/hiredis/Makefile
+++ b/deps/hiredis/Makefile
@@ -3,13 +3,25 @@
# Copyright (C) 2010-2011 Pieter Noordhuis <pcnoordhuis at gmail dot com>
# This file is released under the BSD license, see the COPYING file
-OBJ=net.o hiredis.o sds.o async.o
-EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev
+OBJ=net.o hiredis.o sds.o async.o read.o
+EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev hiredis-example-glib
TESTS=hiredis-test
LIBNAME=libhiredis
+PKGCONFNAME=hiredis.pc
-HIREDIS_MAJOR=0
-HIREDIS_MINOR=11
+HIREDIS_MAJOR=$(shell grep HIREDIS_MAJOR hiredis.h | awk '{print $$3}')
+HIREDIS_MINOR=$(shell grep HIREDIS_MINOR hiredis.h | awk '{print $$3}')
+HIREDIS_PATCH=$(shell grep HIREDIS_PATCH hiredis.h | awk '{print $$3}')
+HIREDIS_SONAME=$(shell grep HIREDIS_SONAME hiredis.h | awk '{print $$3}')
+
+# Installation related variables and target
+PREFIX?=/usr/local
+INCLUDE_PATH?=include/hiredis
+LIBRARY_PATH?=lib
+PKGCONF_PATH?=pkgconfig
+INSTALL_INCLUDE_PATH= $(DESTDIR)$(PREFIX)/$(INCLUDE_PATH)
+INSTALL_LIBRARY_PATH= $(DESTDIR)$(PREFIX)/$(LIBRARY_PATH)
+INSTALL_PKGCONF_PATH= $(INSTALL_LIBRARY_PATH)/$(PKGCONF_PATH)
# redis-server configuration used for testing
REDIS_PORT=56379
@@ -25,15 +37,16 @@ export REDIS_TEST_CONFIG
# Fallback to gcc when $CC is not in $PATH.
CC:=$(shell sh -c 'type $(CC) >/dev/null 2>/dev/null && echo $(CC) || echo gcc')
+CXX:=$(shell sh -c 'type $(CXX) >/dev/null 2>/dev/null && echo $(CXX) || echo g++')
OPTIMIZATION?=-O3
WARNINGS=-Wall -W -Wstrict-prototypes -Wwrite-strings
-DEBUG?= -g -ggdb
-REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG) $(ARCH)
+DEBUG_FLAGS?= -g -ggdb
+REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG_FLAGS) $(ARCH)
REAL_LDFLAGS=$(LDFLAGS) $(ARCH)
DYLIBSUFFIX=so
STLIBSUFFIX=a
-DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR).$(HIREDIS_MINOR)
+DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_SONAME)
DYLIB_MAJOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR)
DYLIBNAME=$(LIBNAME).$(DYLIBSUFFIX)
DYLIB_MAKE_CMD=$(CC) -shared -Wl,-soname,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS)
@@ -49,19 +62,20 @@ ifeq ($(uname_S),SunOS)
endif
ifeq ($(uname_S),Darwin)
DYLIBSUFFIX=dylib
- DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(DYLIBSUFFIX)
- DYLIB_MAJOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(DYLIBSUFFIX)
+ DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_SONAME).$(DYLIBSUFFIX)
DYLIB_MAKE_CMD=$(CC) -shared -Wl,-install_name,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS)
endif
-all: $(DYLIBNAME)
+all: $(DYLIBNAME) $(STLIBNAME) hiredis-test $(PKGCONFNAME)
# Deps (use make dep to generate this)
-net.o: net.c fmacros.h net.h hiredis.h
-async.o: async.c async.h hiredis.h sds.h dict.c dict.h
-hiredis.o: hiredis.c fmacros.h hiredis.h net.h sds.h
+async.o: async.c fmacros.h async.h hiredis.h read.h sds.h net.h dict.c dict.h
+dict.o: dict.c fmacros.h dict.h
+hiredis.o: hiredis.c fmacros.h hiredis.h read.h sds.h net.h
+net.o: net.c fmacros.h net.h hiredis.h read.h sds.h
+read.o: read.c fmacros.h read.h sds.h
sds.o: sds.c sds.h
-test.o: test.c hiredis.h
+test.o: test.c fmacros.h hiredis.h read.h sds.h
$(DYLIBNAME): $(OBJ)
$(DYLIB_MAKE_CMD) $(OBJ)
@@ -79,6 +93,15 @@ hiredis-example-libevent: examples/example-libevent.c adapters/libevent.h $(STLI
hiredis-example-libev: examples/example-libev.c adapters/libev.h $(STLIBNAME)
$(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -lev $(STLIBNAME)
+hiredis-example-glib: examples/example-glib.c adapters/glib.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) $(shell pkg-config --cflags --libs glib-2.0) -I. $< $(STLIBNAME)
+
+hiredis-example-ivykis: examples/example-ivykis.c adapters/ivykis.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -livykis $(STLIBNAME)
+
+hiredis-example-macosx: examples/example-macosx.c adapters/macosx.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -framework CoreFoundation $(STLIBNAME)
+
ifndef AE_DIR
hiredis-example-ae:
@echo "Please specify AE_DIR (e.g. <redis repository>/src)"
@@ -94,7 +117,20 @@ hiredis-example-libuv:
@false
else
hiredis-example-libuv: examples/example-libuv.c adapters/libuv.h $(STLIBNAME)
- $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread -lrt $(STLIBNAME)
+endif
+
+ifeq ($(and $(QT_MOC),$(QT_INCLUDE_DIR),$(QT_LIBRARY_DIR)),)
+hiredis-example-qt:
+ @echo "Please specify QT_MOC, QT_INCLUDE_DIR AND QT_LIBRARY_DIR"
+ @false
+else
+hiredis-example-qt: examples/example-qt.cpp adapters/qt.h $(STLIBNAME)
+ $(QT_MOC) adapters/qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \
+ $(CXX) -x c++ -o qt-adapter-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore
+ $(QT_MOC) examples/example-qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \
+ $(CXX) -x c++ -o qt-example-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore
+ $(CXX) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore -L$(QT_LIBRARY_DIR) qt-adapter-moc.o qt-example-moc.o $< -pthread $(STLIBNAME) -lQtCore
endif
hiredis-example: examples/example.c $(STLIBNAME)
@@ -103,14 +139,16 @@ hiredis-example: examples/example.c $(STLIBNAME)
examples: $(EXAMPLES)
hiredis-test: test.o $(STLIBNAME)
- $(CC) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME)
+
+hiredis-%: %.o $(STLIBNAME)
+ $(CC) $(REAL_CFLAGS) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME)
test: hiredis-test
./hiredis-test
check: hiredis-test
@echo "$$REDIS_TEST_CONFIG" | $(REDIS_SERVER) -
- ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \
+ $(PRE) ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \
( kill `cat /tmp/hiredis-test-redis.pid` && false )
kill `cat /tmp/hiredis-test-redis.pid`
@@ -118,29 +156,38 @@ check: hiredis-test
$(CC) -std=c99 -pedantic -c $(REAL_CFLAGS) $<
clean:
- rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) examples/hiredis-example* *.o *.gcda *.gcno *.gcov
+ rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) $(PKGCONFNAME) examples/hiredis-example* *.o *.gcda *.gcno *.gcov
dep:
$(CC) -MM *.c
-# Installation related variables and target
-PREFIX?=/usr/local
-INSTALL_INCLUDE_PATH= $(PREFIX)/include/hiredis
-INSTALL_LIBRARY_PATH= $(PREFIX)/lib
-
ifeq ($(uname_S),SunOS)
INSTALL?= cp -r
endif
INSTALL?= cp -a
-install: $(DYLIBNAME) $(STLIBNAME)
+$(PKGCONFNAME): hiredis.h
+ @echo "Generating $@ for pkgconfig..."
+ @echo prefix=$(PREFIX) > $@
+ @echo exec_prefix=\$${prefix} >> $@
+ @echo libdir=$(PREFIX)/$(LIBRARY_PATH) >> $@
+ @echo includedir=$(PREFIX)/$(INCLUDE_PATH) >> $@
+ @echo >> $@
+ @echo Name: hiredis >> $@
+ @echo Description: Minimalistic C client library for Redis. >> $@
+ @echo Version: $(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(HIREDIS_PATCH) >> $@
+ @echo Libs: -L\$${libdir} -lhiredis >> $@
+ @echo Cflags: -I\$${includedir} -D_FILE_OFFSET_BITS=64 >> $@
+
+install: $(DYLIBNAME) $(STLIBNAME) $(PKGCONFNAME)
mkdir -p $(INSTALL_INCLUDE_PATH) $(INSTALL_LIBRARY_PATH)
- $(INSTALL) hiredis.h async.h adapters $(INSTALL_INCLUDE_PATH)
+ $(INSTALL) hiredis.h async.h read.h sds.h adapters $(INSTALL_INCLUDE_PATH)
$(INSTALL) $(DYLIBNAME) $(INSTALL_LIBRARY_PATH)/$(DYLIB_MINOR_NAME)
- cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIB_MAJOR_NAME)
- cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MAJOR_NAME) $(DYLIBNAME)
+ cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIBNAME)
$(INSTALL) $(STLIBNAME) $(INSTALL_LIBRARY_PATH)
+ mkdir -p $(INSTALL_PKGCONF_PATH)
+ $(INSTALL) $(PKGCONFNAME) $(INSTALL_PKGCONF_PATH)
32bit:
@echo ""
@@ -148,6 +195,10 @@ install: $(DYLIBNAME) $(STLIBNAME)
@echo ""
$(MAKE) CFLAGS="-m32" LDFLAGS="-m32"
+32bit-vars:
+ $(eval CFLAGS=-m32)
+ $(eval LDFLAGS=-m32)
+
gprof:
$(MAKE) CFLAGS="-pg" LDFLAGS="-pg"
@@ -163,4 +214,4 @@ coverage: gcov
noopt:
$(MAKE) OPTIMIZATION=""
-.PHONY: all test check clean dep install 32bit gprof gcov noopt
+.PHONY: all test check clean dep install 32bit 32bit-vars gprof gcov noopt
diff --git a/deps/hiredis/README.md b/deps/hiredis/README.md
index dba4a8c8e..01223ea59 100644
--- a/deps/hiredis/README.md
+++ b/deps/hiredis/README.md
@@ -1,11 +1,13 @@
[![Build Status](https://travis-ci.org/redis/hiredis.png)](https://travis-ci.org/redis/hiredis)
+**This Readme reflects the latest changed in the master branch. See [v0.13.3](https://github.com/redis/hiredis/tree/v0.13.3) for the Readme and documentation for the latest release.**
+
# HIREDIS
Hiredis is a minimalistic C client library for the [Redis](http://redis.io/) database.
It is minimalistic because it just adds minimal support for the protocol, but
-at the same time it uses an high level printf-alike API in order to make it
+at the same time it uses a high level printf-alike API in order to make it
much higher level than otherwise suggested by its minimal code base and the
lack of explicit bindings for every Redis command.
@@ -20,7 +22,15 @@ Redis version >= 1.2.0.
The library comes with multiple APIs. There is the
*synchronous API*, the *asynchronous API* and the *reply parsing API*.
-## UPGRADING
+## Upgrading to `1.0.0`
+
+Version 1.0.0 marks a stable release of hiredis.
+It includes some minor breaking changes, mostly to make the exposed API more uniform and self-explanatory.
+It also bundles the updated `sds` library, to sync up with upstream and Redis.
+For most applications a recompile against the new hiredis should be enough.
+For code changes see the [Changelog](CHANGELOG.md).
+
+## Upgrading from `<0.9.0`
Version 0.9.0 is a major overhaul of hiredis in every aspect. However, upgrading existing
code using hiredis should not be a big pain. The key thing to keep in mind when
@@ -31,51 +41,62 @@ the stateless 0.0.1 that only has a file descriptor to work with.
To consume the synchronous API, there are only a few function calls that need to be introduced:
- redisContext *redisConnect(const char *ip, int port);
- void *redisCommand(redisContext *c, const char *format, ...);
- void freeReplyObject(void *reply);
+```c
+redisContext *redisConnect(const char *ip, int port);
+void *redisCommand(redisContext *c, const char *format, ...);
+void freeReplyObject(void *reply);
+```
### Connecting
The function `redisConnect` is used to create a so-called `redisContext`. The
context is where Hiredis holds state for a connection. The `redisContext`
-struct has an integer `err` field that is non-zero when an the connection is in
+struct has an integer `err` field that is non-zero when the connection is in
an error state. The field `errstr` will contain a string with a description of
the error. More information on errors can be found in the **Errors** section.
After trying to connect to Redis using `redisConnect` you should
check the `err` field to see if establishing the connection was successful:
-
- redisContext *c = redisConnect("127.0.0.1", 6379);
- if (c != NULL && c->err) {
+```c
+redisContext *c = redisConnect("127.0.0.1", 6379);
+if (c == NULL || c->err) {
+ if (c) {
printf("Error: %s\n", c->errstr);
// handle error
+ } else {
+ printf("Can't allocate redis context\n");
}
+}
+```
+
+*Note: A `redisContext` is not thread-safe.*
### Sending commands
There are several ways to issue commands to Redis. The first that will be introduced is
`redisCommand`. This function takes a format similar to printf. In the simplest form,
it is used like this:
-
- reply = redisCommand(context, "SET foo bar");
+```c
+reply = redisCommand(context, "SET foo bar");
+```
The specifier `%s` interpolates a string in the command, and uses `strlen` to
determine the length of the string:
-
- reply = redisCommand(context, "SET foo %s", value);
-
+```c
+reply = redisCommand(context, "SET foo %s", value);
+```
When you need to pass binary safe strings in a command, the `%b` specifier can be
used. Together with a pointer to the string, it requires a `size_t` length argument
of the string:
-
- reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen);
-
+```c
+reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen);
+```
Internally, Hiredis splits the command in different arguments and will
convert it to the protocol used to communicate with Redis.
One or more spaces separates arguments, so you can use the specifiers
anywhere in an argument:
-
- reply = redisCommand(context, "SET key:%s %s", myid, value);
+```c
+reply = redisCommand(context, "SET key:%s %s", myid, value);
+```
### Using replies
@@ -114,11 +135,11 @@ was received:
Redis may reply with nested arrays but this is fully supported.
Replies should be freed using the `freeReplyObject()` function.
-Note that this function will take care of freeing sub-replies objects
+Note that this function will take care of freeing sub-reply objects
contained in arrays and nested arrays, so there is no need for the user to
free the sub replies (it is actually harmful and will corrupt the memory).
-**Important:** the current version of hiredis (0.10.0) free's replies when the
+**Important:** the current version of hiredis (0.10.0) frees replies when the
asynchronous API is used. This means you should not call `freeReplyObject` when
you use this API. The reply is cleaned up by hiredis _after_ the callback
returns. This behavior will probably change in future releases, so make sure to
@@ -127,19 +148,19 @@ keep an eye on the changelog when upgrading (see issue #39).
### Cleaning up
To disconnect and free the context the following function can be used:
-
- void redisFree(redisContext *c);
-
-This function immediately closes the socket and then free's the allocations done in
+```c
+void redisFree(redisContext *c);
+```
+This function immediately closes the socket and then frees the allocations done in
creating the context.
### Sending commands (cont'd)
Together with `redisCommand`, the function `redisCommandArgv` can be used to issue commands.
It has the following prototype:
-
- void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
-
+```c
+void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
+```
It takes the number of arguments `argc`, an array of strings `argv` and the lengths of the
arguments `argvlen`. For convenience, `argvlen` may be set to `NULL` and the function will
use `strlen(3)` on every argument to determine its length. Obviously, when any of the arguments
@@ -169,10 +190,10 @@ The function `redisGetReply` is exported as part of the Hiredis API and can be u
is expected on the socket. To pipeline commands, the only things that needs to be done is
filling up the output buffer. For this cause, two commands can be used that are identical
to the `redisCommand` family, apart from not returning a reply:
-
- void redisAppendCommand(redisContext *c, const char *format, ...);
- void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
-
+```c
+void redisAppendCommand(redisContext *c, const char *format, ...);
+void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
+```
After calling either function one or more times, `redisGetReply` can be used to receive the
subsequent replies. The return value for this function is either `REDIS_OK` or `REDIS_ERR`, where
the latter means an error occurred while reading a reply. Just as with the other commands,
@@ -180,24 +201,24 @@ the `err` field in the context can be used to find out what the cause of this er
The following examples shows a simple pipeline (resulting in only a single call to `write(2)` and
a single call to `read(2)`):
-
- redisReply *reply;
- redisAppendCommand(context,"SET foo bar");
- redisAppendCommand(context,"GET foo");
- redisGetReply(context,&reply); // reply for SET
- freeReplyObject(reply);
- redisGetReply(context,&reply); // reply for GET
- freeReplyObject(reply);
-
+```c
+redisReply *reply;
+redisAppendCommand(context,"SET foo bar");
+redisAppendCommand(context,"GET foo");
+redisGetReply(context,&reply); // reply for SET
+freeReplyObject(reply);
+redisGetReply(context,&reply); // reply for GET
+freeReplyObject(reply);
+```
This API can also be used to implement a blocking subscriber:
-
- reply = redisCommand(context,"SUBSCRIBE foo");
+```c
+reply = redisCommand(context,"SUBSCRIBE foo");
+freeReplyObject(reply);
+while(redisGetReply(context,&reply) == REDIS_OK) {
+ // consume message
freeReplyObject(reply);
- while(redisGetReply(context,&reply) == REDIS_OK) {
- // consume message
- freeReplyObject(reply);
- }
-
+}
+```
### Errors
When a function call is not successful, depending on the function either `NULL` or `REDIS_ERR` is
@@ -237,58 +258,62 @@ should be checked after creation to see if there were errors creating the connec
Because the connection that will be created is non-blocking, the kernel is not able to
instantly return if the specified host and port is able to accept a connection.
- redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
- if (c->err) {
- printf("Error: %s\n", c->errstr);
- // handle error
- }
+*Note: A `redisAsyncContext` is not thread-safe.*
+
+```c
+redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+if (c->err) {
+ printf("Error: %s\n", c->errstr);
+ // handle error
+}
+```
The asynchronous context can hold a disconnect callback function that is called when the
connection is disconnected (either because of an error or per user request). This function should
have the following prototype:
-
- void(const redisAsyncContext *c, int status);
-
+```c
+void(const redisAsyncContext *c, int status);
+```
On a disconnect, the `status` argument is set to `REDIS_OK` when disconnection was initiated by the
user, or `REDIS_ERR` when the disconnection was caused by an error. When it is `REDIS_ERR`, the `err`
field in the context can be accessed to find out the cause of the error.
-The context object is always free'd after the disconnect callback fired. When a reconnect is needed,
+The context object is always freed after the disconnect callback fired. When a reconnect is needed,
the disconnect callback is a good point to do so.
Setting the disconnect callback can only be done once per context. For subsequent calls it will
return `REDIS_ERR`. The function to set the disconnect callback has the following prototype:
-
- int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
-
+```c
+int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
+```
### Sending commands and their callbacks
In an asynchronous context, commands are automatically pipelined due to the nature of an event loop.
Therefore, unlike the synchronous API, there is only a single way to send commands.
Because commands are sent to Redis asynchronously, issuing a command requires a callback function
that is called when the reply is received. Reply callbacks should have the following prototype:
-
- void(redisAsyncContext *c, void *reply, void *privdata);
-
+```c
+void(redisAsyncContext *c, void *reply, void *privdata);
+```
The `privdata` argument can be used to curry arbitrary data to the callback from the point where
the command is initially queued for execution.
The functions that can be used to issue commands in an asynchronous context are:
-
- int redisAsyncCommand(
- redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
- const char *format, ...);
- int redisAsyncCommandArgv(
- redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
- int argc, const char **argv, const size_t *argvlen);
-
+```c
+int redisAsyncCommand(
+ redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
+ const char *format, ...);
+int redisAsyncCommandArgv(
+ redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
+ int argc, const char **argv, const size_t *argvlen);
+```
Both functions work like their blocking counterparts. The return value is `REDIS_OK` when the command
was successfully added to the output buffer and `REDIS_ERR` otherwise. Example: when the connection
is being disconnected per user-request, no new commands may be added to the output buffer and `REDIS_ERR` is
returned on calls to the `redisAsyncCommand` family.
-If the reply for a command with a `NULL` callback is read, it is immediately free'd. When the callback
-for a command is non-`NULL`, the memory is free'd immediately following the callback: the reply is only
+If the reply for a command with a `NULL` callback is read, it is immediately freed. When the callback
+for a command is non-`NULL`, the memory is freed immediately following the callback: the reply is only
valid for the duration of the callback.
All pending callbacks are called with a `NULL` reply when the context encountered an error.
@@ -296,14 +321,14 @@ All pending callbacks are called with a `NULL` reply when the context encountere
### Disconnecting
An asynchronous connection can be terminated using:
-
- void redisAsyncDisconnect(redisAsyncContext *ac);
-
+```c
+void redisAsyncDisconnect(redisAsyncContext *ac);
+```
When this function is called, the connection is **not** immediately terminated. Instead, new
commands are no longer accepted and the connection is only terminated when all pending commands
have been written to the socket, their respective replies have been read and their respective
callbacks have been executed. After this, the disconnection callback is executed with the
-`REDIS_OK` status and the context object is free'd.
+`REDIS_OK` status and the context object is freed.
### Hooking it up to event library *X*
@@ -316,12 +341,12 @@ Hiredis comes with a reply parsing API that makes it easy for writing higher
level language bindings.
The reply parsing API consists of the following functions:
-
- redisReader *redisReaderCreate(void);
- void redisReaderFree(redisReader *reader);
- int redisReaderFeed(redisReader *reader, const char *buf, size_t len);
- int redisReaderGetReply(redisReader *reader, void **reply);
-
+```c
+redisReader *redisReaderCreate(void);
+void redisReaderFree(redisReader *reader);
+int redisReaderFeed(redisReader *reader, const char *buf, size_t len);
+int redisReaderGetReply(redisReader *reader, void **reply);
+```
The same set of functions are used internally by hiredis when creating a
normal Redis context, the above API just exposes it to the user for a direct
usage.
@@ -361,7 +386,7 @@ Both when using the Reader API directly or when using it indirectly via a
normal Redis context, the redisReader structure uses a buffer in order to
accumulate data from the server.
Usually this buffer is destroyed when it is empty and is larger than 16
-kb in order to avoid wasting memory in unused buffers
+KiB in order to avoid wasting memory in unused buffers
However when working with very big payloads destroying the buffer may slow
down performances considerably, so it is possible to modify the max size of
@@ -371,9 +396,9 @@ value for an idle buffer, so the buffer will never get freed.
For instance if you have a normal Redis context you can set the maximum idle
buffer to zero (unlimited) just with:
-
- context->reader->maxbuf = 0;
-
+```c
+context->reader->maxbuf = 0;
+```
This should be done only in order to maximize performances when working with
large payloads. The context should be set back to `REDIS_READER_MAX_BUF` again
as soon as possible in order to prevent allocation of useless memory.
@@ -381,4 +406,6 @@ as soon as possible in order to prevent allocation of useless memory.
## AUTHORS
Hiredis was written by Salvatore Sanfilippo (antirez at gmail) and
-Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license.
+Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license.
+Hiredis is currently maintained by Matt Stancliff (matt at genges dot com) and
+Jan-Erik Rediger (janerik at fnordig dot com)
diff --git a/deps/hiredis/adapters/glib.h b/deps/hiredis/adapters/glib.h
new file mode 100644
index 000000000..e0a6411d3
--- /dev/null
+++ b/deps/hiredis/adapters/glib.h
@@ -0,0 +1,153 @@
+#ifndef __HIREDIS_GLIB_H__
+#define __HIREDIS_GLIB_H__
+
+#include <glib.h>
+
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct
+{
+ GSource source;
+ redisAsyncContext *ac;
+ GPollFD poll_fd;
+} RedisSource;
+
+static void
+redis_source_add_read (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events |= G_IO_IN;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_del_read (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events &= ~G_IO_IN;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_add_write (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events |= G_IO_OUT;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_del_write (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events &= ~G_IO_OUT;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_cleanup (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+
+ g_return_if_fail(source);
+
+ redis_source_del_read(source);
+ redis_source_del_write(source);
+ /*
+ * It is not our responsibility to remove ourself from the
+ * current main loop. However, we will remove the GPollFD.
+ */
+ if (source->poll_fd.fd >= 0) {
+ g_source_remove_poll((GSource *)data, &source->poll_fd);
+ source->poll_fd.fd = -1;
+ }
+}
+
+static gboolean
+redis_source_prepare (GSource *source,
+ gint *timeout_)
+{
+ RedisSource *redis = (RedisSource *)source;
+ *timeout_ = -1;
+ return !!(redis->poll_fd.events & redis->poll_fd.revents);
+}
+
+static gboolean
+redis_source_check (GSource *source)
+{
+ RedisSource *redis = (RedisSource *)source;
+ return !!(redis->poll_fd.events & redis->poll_fd.revents);
+}
+
+static gboolean
+redis_source_dispatch (GSource *source,
+ GSourceFunc callback,
+ gpointer user_data)
+{
+ RedisSource *redis = (RedisSource *)source;
+
+ if ((redis->poll_fd.revents & G_IO_OUT)) {
+ redisAsyncHandleWrite(redis->ac);
+ redis->poll_fd.revents &= ~G_IO_OUT;
+ }
+
+ if ((redis->poll_fd.revents & G_IO_IN)) {
+ redisAsyncHandleRead(redis->ac);
+ redis->poll_fd.revents &= ~G_IO_IN;
+ }
+
+ if (callback) {
+ return callback(user_data);
+ }
+
+ return TRUE;
+}
+
+static void
+redis_source_finalize (GSource *source)
+{
+ RedisSource *redis = (RedisSource *)source;
+
+ if (redis->poll_fd.fd >= 0) {
+ g_source_remove_poll(source, &redis->poll_fd);
+ redis->poll_fd.fd = -1;
+ }
+}
+
+static GSource *
+redis_source_new (redisAsyncContext *ac)
+{
+ static GSourceFuncs source_funcs = {
+ .prepare = redis_source_prepare,
+ .check = redis_source_check,
+ .dispatch = redis_source_dispatch,
+ .finalize = redis_source_finalize,
+ };
+ redisContext *c = &ac->c;
+ RedisSource *source;
+
+ g_return_val_if_fail(ac != NULL, NULL);
+
+ source = (RedisSource *)g_source_new(&source_funcs, sizeof *source);
+ source->ac = ac;
+ source->poll_fd.fd = c->fd;
+ source->poll_fd.events = 0;
+ source->poll_fd.revents = 0;
+ g_source_add_poll((GSource *)source, &source->poll_fd);
+
+ ac->ev.addRead = redis_source_add_read;
+ ac->ev.delRead = redis_source_del_read;
+ ac->ev.addWrite = redis_source_add_write;
+ ac->ev.delWrite = redis_source_del_write;
+ ac->ev.cleanup = redis_source_cleanup;
+ ac->ev.data = source;
+
+ return (GSource *)source;
+}
+
+#endif /* __HIREDIS_GLIB_H__ */
diff --git a/deps/hiredis/adapters/ivykis.h b/deps/hiredis/adapters/ivykis.h
new file mode 100644
index 000000000..6a12a868a
--- /dev/null
+++ b/deps/hiredis/adapters/ivykis.h
@@ -0,0 +1,81 @@
+#ifndef __HIREDIS_IVYKIS_H__
+#define __HIREDIS_IVYKIS_H__
+#include <iv.h>
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct redisIvykisEvents {
+ redisAsyncContext *context;
+ struct iv_fd fd;
+} redisIvykisEvents;
+
+static void redisIvykisReadEvent(void *arg) {
+ redisAsyncContext *context = (redisAsyncContext *)arg;
+ redisAsyncHandleRead(context);
+}
+
+static void redisIvykisWriteEvent(void *arg) {
+ redisAsyncContext *context = (redisAsyncContext *)arg;
+ redisAsyncHandleWrite(context);
+}
+
+static void redisIvykisAddRead(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_in(&e->fd, redisIvykisReadEvent);
+}
+
+static void redisIvykisDelRead(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_in(&e->fd, NULL);
+}
+
+static void redisIvykisAddWrite(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_out(&e->fd, redisIvykisWriteEvent);
+}
+
+static void redisIvykisDelWrite(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_out(&e->fd, NULL);
+}
+
+static void redisIvykisCleanup(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+
+ iv_fd_unregister(&e->fd);
+ free(e);
+}
+
+static int redisIvykisAttach(redisAsyncContext *ac) {
+ redisContext *c = &(ac->c);
+ redisIvykisEvents *e;
+
+ /* Nothing should be attached when something is already attached */
+ if (ac->ev.data != NULL)
+ return REDIS_ERR;
+
+ /* Create container for context and r/w events */
+ e = (redisIvykisEvents*)malloc(sizeof(*e));
+ e->context = ac;
+
+ /* Register functions to start/stop listening for events */
+ ac->ev.addRead = redisIvykisAddRead;
+ ac->ev.delRead = redisIvykisDelRead;
+ ac->ev.addWrite = redisIvykisAddWrite;
+ ac->ev.delWrite = redisIvykisDelWrite;
+ ac->ev.cleanup = redisIvykisCleanup;
+ ac->ev.data = e;
+
+ /* Initialize and install read/write events */
+ IV_FD_INIT(&e->fd);
+ e->fd.fd = c->fd;
+ e->fd.handler_in = redisIvykisReadEvent;
+ e->fd.handler_out = redisIvykisWriteEvent;
+ e->fd.handler_err = NULL;
+ e->fd.cookie = e->context;
+
+ iv_fd_register(&e->fd);
+
+ return REDIS_OK;
+}
+#endif
diff --git a/deps/hiredis/adapters/libevent.h b/deps/hiredis/adapters/libevent.h
index 1c2b271bb..273d8b2dd 100644
--- a/deps/hiredis/adapters/libevent.h
+++ b/deps/hiredis/adapters/libevent.h
@@ -30,13 +30,13 @@
#ifndef __HIREDIS_LIBEVENT_H__
#define __HIREDIS_LIBEVENT_H__
-#include <event.h>
+#include <event2/event.h>
#include "../hiredis.h"
#include "../async.h"
typedef struct redisLibeventEvents {
redisAsyncContext *context;
- struct event rev, wev;
+ struct event *rev, *wev;
} redisLibeventEvents;
static void redisLibeventReadEvent(int fd, short event, void *arg) {
@@ -53,28 +53,28 @@ static void redisLibeventWriteEvent(int fd, short event, void *arg) {
static void redisLibeventAddRead(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_add(&e->rev,NULL);
+ event_add(e->rev,NULL);
}
static void redisLibeventDelRead(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->rev);
+ event_del(e->rev);
}
static void redisLibeventAddWrite(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_add(&e->wev,NULL);
+ event_add(e->wev,NULL);
}
static void redisLibeventDelWrite(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->wev);
+ event_del(e->wev);
}
static void redisLibeventCleanup(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->rev);
- event_del(&e->wev);
+ event_del(e->rev);
+ event_del(e->wev);
free(e);
}
@@ -99,10 +99,10 @@ static int redisLibeventAttach(redisAsyncContext *ac, struct event_base *base) {
ac->ev.data = e;
/* Initialize and install read/write events */
- event_set(&e->rev,c->fd,EV_READ,redisLibeventReadEvent,e);
- event_set(&e->wev,c->fd,EV_WRITE,redisLibeventWriteEvent,e);
- event_base_set(base,&e->rev);
- event_base_set(base,&e->wev);
+ e->rev = event_new(base, c->fd, EV_READ, redisLibeventReadEvent, e);
+ e->wev = event_new(base, c->fd, EV_WRITE, redisLibeventWriteEvent, e);
+ event_add(e->rev, NULL);
+ event_add(e->wev, NULL);
return REDIS_OK;
}
#endif
diff --git a/deps/hiredis/adapters/libuv.h b/deps/hiredis/adapters/libuv.h
index a1967f4fd..ff08c25e1 100644
--- a/deps/hiredis/adapters/libuv.h
+++ b/deps/hiredis/adapters/libuv.h
@@ -1,5 +1,6 @@
#ifndef __HIREDIS_LIBUV_H__
#define __HIREDIS_LIBUV_H__
+#include <stdlib.h>
#include <uv.h>
#include "../hiredis.h"
#include "../async.h"
@@ -11,7 +12,6 @@ typedef struct redisLibuvEvents {
int events;
} redisLibuvEvents;
-int redisLibuvAttach(redisAsyncContext*, uv_loop_t*);
static void redisLibuvPoll(uv_poll_t* handle, int status, int events) {
redisLibuvEvents* p = (redisLibuvEvents*)handle->data;
@@ -20,10 +20,10 @@ static void redisLibuvPoll(uv_poll_t* handle, int status, int events) {
return;
}
- if (events & UV_READABLE) {
+ if (p->context != NULL && (events & UV_READABLE)) {
redisAsyncHandleRead(p->context);
}
- if (events & UV_WRITABLE) {
+ if (p->context != NULL && (events & UV_WRITABLE)) {
redisAsyncHandleWrite(p->context);
}
}
@@ -83,6 +83,7 @@ static void on_close(uv_handle_t* handle) {
static void redisLibuvCleanup(void *privdata) {
redisLibuvEvents* p = (redisLibuvEvents*)privdata;
+ p->context = NULL; // indicate that context might no longer exist
uv_close((uv_handle_t*)&p->handle, on_close);
}
diff --git a/deps/hiredis/adapters/macosx.h b/deps/hiredis/adapters/macosx.h
new file mode 100644
index 000000000..72121f606
--- /dev/null
+++ b/deps/hiredis/adapters/macosx.h
@@ -0,0 +1,114 @@
+//
+// Created by Дмитрий Бахвалов on 13.07.15.
+// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved.
+//
+
+#ifndef __HIREDIS_MACOSX_H__
+#define __HIREDIS_MACOSX_H__
+
+#include <CoreFoundation/CoreFoundation.h>
+
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct {
+ redisAsyncContext *context;
+ CFSocketRef socketRef;
+ CFRunLoopSourceRef sourceRef;
+} RedisRunLoop;
+
+static int freeRedisRunLoop(RedisRunLoop* redisRunLoop) {
+ if( redisRunLoop != NULL ) {
+ if( redisRunLoop->sourceRef != NULL ) {
+ CFRunLoopSourceInvalidate(redisRunLoop->sourceRef);
+ CFRelease(redisRunLoop->sourceRef);
+ }
+ if( redisRunLoop->socketRef != NULL ) {
+ CFSocketInvalidate(redisRunLoop->socketRef);
+ CFRelease(redisRunLoop->socketRef);
+ }
+ free(redisRunLoop);
+ }
+ return REDIS_ERR;
+}
+
+static void redisMacOSAddRead(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack);
+}
+
+static void redisMacOSDelRead(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack);
+}
+
+static void redisMacOSAddWrite(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack);
+}
+
+static void redisMacOSDelWrite(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack);
+}
+
+static void redisMacOSCleanup(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ freeRedisRunLoop(redisRunLoop);
+}
+
+static void redisMacOSAsyncCallback(CFSocketRef __unused s, CFSocketCallBackType callbackType, CFDataRef __unused address, const void __unused *data, void *info) {
+ redisAsyncContext* context = (redisAsyncContext*) info;
+
+ switch (callbackType) {
+ case kCFSocketReadCallBack:
+ redisAsyncHandleRead(context);
+ break;
+
+ case kCFSocketWriteCallBack:
+ redisAsyncHandleWrite(context);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int redisMacOSAttach(redisAsyncContext *redisAsyncCtx, CFRunLoopRef runLoop) {
+ redisContext *redisCtx = &(redisAsyncCtx->c);
+
+ /* Nothing should be attached when something is already attached */
+ if( redisAsyncCtx->ev.data != NULL ) return REDIS_ERR;
+
+ RedisRunLoop* redisRunLoop = (RedisRunLoop*) calloc(1, sizeof(RedisRunLoop));
+ if( !redisRunLoop ) return REDIS_ERR;
+
+ /* Setup redis stuff */
+ redisRunLoop->context = redisAsyncCtx;
+
+ redisAsyncCtx->ev.addRead = redisMacOSAddRead;
+ redisAsyncCtx->ev.delRead = redisMacOSDelRead;
+ redisAsyncCtx->ev.addWrite = redisMacOSAddWrite;
+ redisAsyncCtx->ev.delWrite = redisMacOSDelWrite;
+ redisAsyncCtx->ev.cleanup = redisMacOSCleanup;
+ redisAsyncCtx->ev.data = redisRunLoop;
+
+ /* Initialize and install read/write events */
+ CFSocketContext socketCtx = { 0, redisAsyncCtx, NULL, NULL, NULL };
+
+ redisRunLoop->socketRef = CFSocketCreateWithNative(NULL, redisCtx->fd,
+ kCFSocketReadCallBack | kCFSocketWriteCallBack,
+ redisMacOSAsyncCallback,
+ &socketCtx);
+ if( !redisRunLoop->socketRef ) return freeRedisRunLoop(redisRunLoop);
+
+ redisRunLoop->sourceRef = CFSocketCreateRunLoopSource(NULL, redisRunLoop->socketRef, 0);
+ if( !redisRunLoop->sourceRef ) return freeRedisRunLoop(redisRunLoop);
+
+ CFRunLoopAddSource(runLoop, redisRunLoop->sourceRef, kCFRunLoopDefaultMode);
+
+ return REDIS_OK;
+}
+
+#endif
+
diff --git a/deps/hiredis/adapters/qt.h b/deps/hiredis/adapters/qt.h
new file mode 100644
index 000000000..5cc02e6ce
--- /dev/null
+++ b/deps/hiredis/adapters/qt.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (C) 2014 Pietro Cerutti <gahr@gahr.ch>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __HIREDIS_QT_H__
+#define __HIREDIS_QT_H__
+#include <QSocketNotifier>
+#include "../async.h"
+
+static void RedisQtAddRead(void *);
+static void RedisQtDelRead(void *);
+static void RedisQtAddWrite(void *);
+static void RedisQtDelWrite(void *);
+static void RedisQtCleanup(void *);
+
+class RedisQtAdapter : public QObject {
+
+ Q_OBJECT
+
+ friend
+ void RedisQtAddRead(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->addRead();
+ }
+
+ friend
+ void RedisQtDelRead(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->delRead();
+ }
+
+ friend
+ void RedisQtAddWrite(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->addWrite();
+ }
+
+ friend
+ void RedisQtDelWrite(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->delWrite();
+ }
+
+ friend
+ void RedisQtCleanup(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->cleanup();
+ }
+
+ public:
+ RedisQtAdapter(QObject * parent = 0)
+ : QObject(parent), m_ctx(0), m_read(0), m_write(0) { }
+
+ ~RedisQtAdapter() {
+ if (m_ctx != 0) {
+ m_ctx->ev.data = NULL;
+ }
+ }
+
+ int setContext(redisAsyncContext * ac) {
+ if (ac->ev.data != NULL) {
+ return REDIS_ERR;
+ }
+ m_ctx = ac;
+ m_ctx->ev.data = this;
+ m_ctx->ev.addRead = RedisQtAddRead;
+ m_ctx->ev.delRead = RedisQtDelRead;
+ m_ctx->ev.addWrite = RedisQtAddWrite;
+ m_ctx->ev.delWrite = RedisQtDelWrite;
+ m_ctx->ev.cleanup = RedisQtCleanup;
+ return REDIS_OK;
+ }
+
+ private:
+ void addRead() {
+ if (m_read) return;
+ m_read = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Read, 0);
+ connect(m_read, SIGNAL(activated(int)), this, SLOT(read()));
+ }
+
+ void delRead() {
+ if (!m_read) return;
+ delete m_read;
+ m_read = 0;
+ }
+
+ void addWrite() {
+ if (m_write) return;
+ m_write = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Write, 0);
+ connect(m_write, SIGNAL(activated(int)), this, SLOT(write()));
+ }
+
+ void delWrite() {
+ if (!m_write) return;
+ delete m_write;
+ m_write = 0;
+ }
+
+ void cleanup() {
+ delRead();
+ delWrite();
+ }
+
+ private slots:
+ void read() { redisAsyncHandleRead(m_ctx); }
+ void write() { redisAsyncHandleWrite(m_ctx); }
+
+ private:
+ redisAsyncContext * m_ctx;
+ QSocketNotifier * m_read;
+ QSocketNotifier * m_write;
+};
+
+#endif /* !__HIREDIS_QT_H__ */
diff --git a/deps/hiredis/appveyor.yml b/deps/hiredis/appveyor.yml
new file mode 100644
index 000000000..06bbef117
--- /dev/null
+++ b/deps/hiredis/appveyor.yml
@@ -0,0 +1,36 @@
+# Appveyor configuration file for CI build of hiredis on Windows (under Cygwin)
+environment:
+ matrix:
+ - CYG_ROOT: C:\cygwin64
+ CYG_SETUP: setup-x86_64.exe
+ CYG_MIRROR: http://cygwin.mirror.constant.com
+ CYG_CACHE: C:\cygwin64\var\cache\setup
+ CYG_BASH: C:\cygwin64\bin\bash
+ CC: gcc
+ - CYG_ROOT: C:\cygwin
+ CYG_SETUP: setup-x86.exe
+ CYG_MIRROR: http://cygwin.mirror.constant.com
+ CYG_CACHE: C:\cygwin\var\cache\setup
+ CYG_BASH: C:\cygwin\bin\bash
+ CC: gcc
+ TARGET: 32bit
+ TARGET_VARS: 32bit-vars
+
+# Cache Cygwin files to speed up build
+cache:
+ - '%CYG_CACHE%'
+clone_depth: 1
+
+# Attempt to ensure we don't try to convert line endings to Win32 CRLF as this will cause build to fail
+init:
+ - git config --global core.autocrlf input
+
+# Install needed build dependencies
+install:
+ - ps: 'Start-FileDownload "http://cygwin.com/$env:CYG_SETUP" -FileName "$env:CYG_SETUP"'
+ - '%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages automake,bison,gcc-core,libtool,make,gettext-devel,gettext,intltool,pkg-config,clang,llvm > NUL 2>&1'
+ - '%CYG_BASH% -lc "cygcheck -dc cygwin"'
+
+build_script:
+ - 'echo building...'
+ - '%CYG_BASH% -lc "cd $APPVEYOR_BUILD_FOLDER; exec 0</dev/null; make LDFLAGS=$LDFLAGS CC=$CC $TARGET CFLAGS=$CFLAGS && make LDFLAGS=$LDFLAGS CC=$CC $TARGET_VARS hiredis-example"'
diff --git a/deps/hiredis/async.c b/deps/hiredis/async.c
index f7f343bef..d955203f8 100644
--- a/deps/hiredis/async.c
+++ b/deps/hiredis/async.c
@@ -58,7 +58,7 @@
} while(0);
/* Forward declaration of function in hiredis.c */
-void __redisAppendCommand(redisContext *c, char *cmd, size_t len);
+int __redisAppendCommand(redisContext *c, const char *cmd, size_t len);
/* Functions managing dictionary of callbacks for pub/sub. */
static unsigned int callbackHash(const void *key) {
@@ -142,6 +142,9 @@ static redisAsyncContext *redisAsyncInitialize(redisContext *c) {
/* We want the error field to be accessible directly instead of requiring
* an indirection to the redisContext struct. */
static void __redisAsyncCopyError(redisAsyncContext *ac) {
+ if (!ac)
+ return;
+
redisContext *c = &(ac->c);
ac->err = c->err;
ac->errstr = c->errstr;
@@ -173,6 +176,14 @@ redisAsyncContext *redisAsyncConnectBind(const char *ip, int port,
return ac;
}
+redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port,
+ const char *source_addr) {
+ redisContext *c = redisConnectBindNonBlockWithReuse(ip,port,source_addr);
+ redisAsyncContext *ac = redisAsyncInitialize(c);
+ __redisAsyncCopyError(ac);
+ return ac;
+}
+
redisAsyncContext *redisAsyncConnectUnix(const char *path) {
redisContext *c;
redisAsyncContext *ac;
@@ -407,7 +418,8 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
if (reply == NULL) {
/* When the connection is being disconnected and there are
* no more replies, this is the cue to really disconnect. */
- if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0) {
+ if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0
+ && ac->replies.head == NULL) {
__redisAsyncDisconnect(ac);
return;
}
@@ -443,6 +455,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
if (((redisReply*)reply)->type == REDIS_REPLY_ERROR) {
c->err = REDIS_ERR_OTHER;
snprintf(c->errstr,sizeof(c->errstr),"%s",((redisReply*)reply)->str);
+ c->reader->fn->freeObject(reply);
__redisAsyncDisconnect(ac);
return;
}
@@ -476,7 +489,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
}
/* Internal helper function to detect socket status the first time a read or
- * write event fires. When connecting was not succesful, the connect callback
+ * write event fires. When connecting was not successful, the connect callback
* is called with a REDIS_ERR status and the context is free'd. */
static int __redisAsyncHandleConnect(redisAsyncContext *ac) {
redisContext *c = &(ac->c);
@@ -550,8 +563,8 @@ void redisAsyncHandleWrite(redisAsyncContext *ac) {
/* Sets a pointer to the first argument and its length starting at p. Returns
* the number of bytes to skip to get to the following argument. */
-static char *nextArgument(char *start, char **str, size_t *len) {
- char *p = start;
+static const char *nextArgument(const char *start, const char **str, size_t *len) {
+ const char *p = start;
if (p[0] != '$') {
p = strchr(p,'$');
if (p == NULL) return NULL;
@@ -567,14 +580,15 @@ static char *nextArgument(char *start, char **str, size_t *len) {
/* Helper function for the redisAsyncCommand* family of functions. Writes a
* formatted command to the output buffer and registers the provided callback
* function with the context. */
-static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, char *cmd, size_t len) {
+static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) {
redisContext *c = &(ac->c);
redisCallback cb;
int pvariant, hasnext;
- char *cstr, *astr;
+ const char *cstr, *astr;
size_t clen, alen;
- char *p;
+ const char *p;
sds sname;
+ int ret;
/* Don't accept new commands when the connection is about to be closed. */
if (c->flags & (REDIS_DISCONNECTING | REDIS_FREEING)) return REDIS_ERR;
@@ -598,9 +612,11 @@ static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void
while ((p = nextArgument(p,&astr,&alen)) != NULL) {
sname = sdsnewlen(astr,alen);
if (pvariant)
- dictReplace(ac->sub.patterns,sname,&cb);
+ ret = dictReplace(ac->sub.patterns,sname,&cb);
else
- dictReplace(ac->sub.channels,sname,&cb);
+ ret = dictReplace(ac->sub.channels,sname,&cb);
+
+ if (ret == 0) sdsfree(sname);
}
} else if (strncasecmp(cstr,"unsubscribe\r\n",13) == 0) {
/* It is only useful to call (P)UNSUBSCRIBE when the context is
@@ -636,6 +652,11 @@ int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdat
int len;
int status;
len = redisvFormatCommand(&cmd,format,ap);
+
+ /* We don't want to pass -1 or -2 to future functions as a length. */
+ if (len < 0)
+ return REDIS_ERR;
+
status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
free(cmd);
return status;
@@ -651,11 +672,16 @@ int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata
}
int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen) {
- char *cmd;
+ sds cmd;
int len;
int status;
- len = redisFormatCommandArgv(&cmd,argc,argv,argvlen);
+ len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen);
status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
- free(cmd);
+ sdsfree(cmd);
+ return status;
+}
+
+int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) {
+ int status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
return status;
}
diff --git a/deps/hiredis/async.h b/deps/hiredis/async.h
index 8a2cf1ecd..59cbf469b 100644
--- a/deps/hiredis/async.h
+++ b/deps/hiredis/async.h
@@ -103,6 +103,8 @@ typedef struct redisAsyncContext {
/* Functions that proxy to hiredis */
redisAsyncContext *redisAsyncConnect(const char *ip, int port);
redisAsyncContext *redisAsyncConnectBind(const char *ip, int port, const char *source_addr);
+redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port,
+ const char *source_addr);
redisAsyncContext *redisAsyncConnectUnix(const char *path);
int redisAsyncSetConnectCallback(redisAsyncContext *ac, redisConnectCallback *fn);
int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
@@ -118,6 +120,7 @@ void redisAsyncHandleWrite(redisAsyncContext *ac);
int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, va_list ap);
int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, ...);
int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen);
+int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len);
#ifdef __cplusplus
}
diff --git a/deps/hiredis/dict.c b/deps/hiredis/dict.c
index 79b1041ca..e17a62546 100644
--- a/deps/hiredis/dict.c
+++ b/deps/hiredis/dict.c
@@ -161,7 +161,7 @@ static int dictReplace(dict *ht, void *key, void *val) {
dictEntry *entry, auxentry;
/* Try to add the element. If the key
- * does not exists dictAdd will suceed. */
+ * does not exists dictAdd will succeed. */
if (dictAdd(ht, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
@@ -293,7 +293,7 @@ static void dictReleaseIterator(dictIterator *iter) {
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *ht) {
- /* If the hash table is empty expand it to the intial size,
+ /* If the hash table is empty expand it to the initial size,
* if the table is "full" dobule its size. */
if (ht->size == 0)
return dictExpand(ht, DICT_HT_INITIAL_SIZE);
diff --git a/deps/hiredis/examples/example-glib.c b/deps/hiredis/examples/example-glib.c
new file mode 100644
index 000000000..d6e10f8e8
--- /dev/null
+++ b/deps/hiredis/examples/example-glib.c
@@ -0,0 +1,73 @@
+#include <stdlib.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/glib.h>
+
+static GMainLoop *mainloop;
+
+static void
+connect_cb (const redisAsyncContext *ac G_GNUC_UNUSED,
+ int status)
+{
+ if (status != REDIS_OK) {
+ g_printerr("Failed to connect: %s\n", ac->errstr);
+ g_main_loop_quit(mainloop);
+ } else {
+ g_printerr("Connected...\n");
+ }
+}
+
+static void
+disconnect_cb (const redisAsyncContext *ac G_GNUC_UNUSED,
+ int status)
+{
+ if (status != REDIS_OK) {
+ g_error("Failed to disconnect: %s", ac->errstr);
+ } else {
+ g_printerr("Disconnected...\n");
+ g_main_loop_quit(mainloop);
+ }
+}
+
+static void
+command_cb(redisAsyncContext *ac,
+ gpointer r,
+ gpointer user_data G_GNUC_UNUSED)
+{
+ redisReply *reply = r;
+
+ if (reply) {
+ g_print("REPLY: %s\n", reply->str);
+ }
+
+ redisAsyncDisconnect(ac);
+}
+
+gint
+main (gint argc G_GNUC_UNUSED,
+ gchar *argv[] G_GNUC_UNUSED)
+{
+ redisAsyncContext *ac;
+ GMainContext *context = NULL;
+ GSource *source;
+
+ ac = redisAsyncConnect("127.0.0.1", 6379);
+ if (ac->err) {
+ g_printerr("%s\n", ac->errstr);
+ exit(EXIT_FAILURE);
+ }
+
+ source = redis_source_new(ac);
+ mainloop = g_main_loop_new(context, FALSE);
+ g_source_attach(source, context);
+
+ redisAsyncSetConnectCallback(ac, connect_cb);
+ redisAsyncSetDisconnectCallback(ac, disconnect_cb);
+ redisAsyncCommand(ac, command_cb, NULL, "SET key 1234");
+ redisAsyncCommand(ac, command_cb, NULL, "GET key");
+
+ g_main_loop_run(mainloop);
+
+ return EXIT_SUCCESS;
+}
diff --git a/deps/hiredis/examples/example-ivykis.c b/deps/hiredis/examples/example-ivykis.c
new file mode 100644
index 000000000..67affcef3
--- /dev/null
+++ b/deps/hiredis/examples/example-ivykis.c
@@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/ivykis.h>
+
+void getCallback(redisAsyncContext *c, void *r, void *privdata) {
+ redisReply *reply = r;
+ if (reply == NULL) return;
+ printf("argv[%s]: %s\n", (char*)privdata, reply->str);
+
+ /* Disconnect after receiving the reply to GET */
+ redisAsyncDisconnect(c);
+}
+
+void connectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Connected...\n");
+}
+
+void disconnectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Disconnected...\n");
+}
+
+int main (int argc, char **argv) {
+ signal(SIGPIPE, SIG_IGN);
+
+ iv_init();
+
+ redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+ if (c->err) {
+ /* Let *c leak for now... */
+ printf("Error: %s\n", c->errstr);
+ return 1;
+ }
+
+ redisIvykisAttach(c);
+ redisAsyncSetConnectCallback(c,connectCallback);
+ redisAsyncSetDisconnectCallback(c,disconnectCallback);
+ redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1]));
+ redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key");
+
+ iv_main();
+
+ iv_deinit();
+
+ return 0;
+}
diff --git a/deps/hiredis/examples/example-macosx.c b/deps/hiredis/examples/example-macosx.c
new file mode 100644
index 000000000..bc84ed5ba
--- /dev/null
+++ b/deps/hiredis/examples/example-macosx.c
@@ -0,0 +1,66 @@
+//
+// Created by Дмитрий Бахвалов on 13.07.15.
+// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved.
+//
+
+#include <stdio.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/macosx.h>
+
+void getCallback(redisAsyncContext *c, void *r, void *privdata) {
+ redisReply *reply = r;
+ if (reply == NULL) return;
+ printf("argv[%s]: %s\n", (char*)privdata, reply->str);
+
+ /* Disconnect after receiving the reply to GET */
+ redisAsyncDisconnect(c);
+}
+
+void connectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Connected...\n");
+}
+
+void disconnectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ CFRunLoopStop(CFRunLoopGetCurrent());
+ printf("Disconnected...\n");
+}
+
+int main (int argc, char **argv) {
+ signal(SIGPIPE, SIG_IGN);
+
+ CFRunLoopRef loop = CFRunLoopGetCurrent();
+ if( !loop ) {
+ printf("Error: Cannot get current run loop\n");
+ return 1;
+ }
+
+ redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+ if (c->err) {
+ /* Let *c leak for now... */
+ printf("Error: %s\n", c->errstr);
+ return 1;
+ }
+
+ redisMacOSAttach(c, loop);
+
+ redisAsyncSetConnectCallback(c,connectCallback);
+ redisAsyncSetDisconnectCallback(c,disconnectCallback);
+
+ redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1]));
+ redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key");
+
+ CFRunLoopRun();
+
+ return 0;
+}
+
diff --git a/deps/hiredis/examples/example-qt.cpp b/deps/hiredis/examples/example-qt.cpp
new file mode 100644
index 000000000..f524c3f3d
--- /dev/null
+++ b/deps/hiredis/examples/example-qt.cpp
@@ -0,0 +1,46 @@
+#include <iostream>
+using namespace std;
+
+#include <QCoreApplication>
+#include <QTimer>
+
+#include "example-qt.h"
+
+void getCallback(redisAsyncContext *, void * r, void * privdata) {
+
+ redisReply * reply = static_cast<redisReply *>(r);
+ ExampleQt * ex = static_cast<ExampleQt *>(privdata);
+ if (reply == nullptr || ex == nullptr) return;
+
+ cout << "key: " << reply->str << endl;
+
+ ex->finish();
+}
+
+void ExampleQt::run() {
+
+ m_ctx = redisAsyncConnect("localhost", 6379);
+
+ if (m_ctx->err) {
+ cerr << "Error: " << m_ctx->errstr << endl;
+ redisAsyncFree(m_ctx);
+ emit finished();
+ }
+
+ m_adapter.setContext(m_ctx);
+
+ redisAsyncCommand(m_ctx, NULL, NULL, "SET key %s", m_value);
+ redisAsyncCommand(m_ctx, getCallback, this, "GET key");
+}
+
+int main (int argc, char **argv) {
+
+ QCoreApplication app(argc, argv);
+
+ ExampleQt example(argv[argc-1]);
+
+ QObject::connect(&example, SIGNAL(finished()), &app, SLOT(quit()));
+ QTimer::singleShot(0, &example, SLOT(run()));
+
+ return app.exec();
+}
diff --git a/deps/hiredis/examples/example-qt.h b/deps/hiredis/examples/example-qt.h
new file mode 100644
index 000000000..374f47666
--- /dev/null
+++ b/deps/hiredis/examples/example-qt.h
@@ -0,0 +1,32 @@
+#ifndef __HIREDIS_EXAMPLE_QT_H
+#define __HIREDIS_EXAMPLE_QT_H
+
+#include <adapters/qt.h>
+
+class ExampleQt : public QObject {
+
+ Q_OBJECT
+
+ public:
+ ExampleQt(const char * value, QObject * parent = 0)
+ : QObject(parent), m_value(value) {}
+
+ signals:
+ void finished();
+
+ public slots:
+ void run();
+
+ private:
+ void finish() { emit finished(); }
+
+ private:
+ const char * m_value;
+ redisAsyncContext * m_ctx;
+ RedisQtAdapter m_adapter;
+
+ friend
+ void getCallback(redisAsyncContext *, void *, void *);
+};
+
+#endif /* !__HIREDIS_EXAMPLE_QT_H */
diff --git a/deps/hiredis/examples/example.c b/deps/hiredis/examples/example.c
index 25226a807..4d494c55a 100644
--- a/deps/hiredis/examples/example.c
+++ b/deps/hiredis/examples/example.c
@@ -57,7 +57,7 @@ int main(int argc, char **argv) {
for (j = 0; j < 10; j++) {
char buf[64];
- snprintf(buf,64,"%d",j);
+ snprintf(buf,64,"%u",j);
reply = redisCommand(c,"LPUSH mylist element-%s", buf);
freeReplyObject(reply);
}
diff --git a/deps/hiredis/fmacros.h b/deps/hiredis/fmacros.h
index 9e5fec0ce..14fed6060 100644
--- a/deps/hiredis/fmacros.h
+++ b/deps/hiredis/fmacros.h
@@ -1,19 +1,24 @@
#ifndef __HIREDIS_FMACRO_H
#define __HIREDIS_FMACRO_H
-#if !defined(_BSD_SOURCE)
+#if defined(__linux__)
#define _BSD_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#if defined(__CYGWIN__)
+#include <sys/cdefs.h>
#endif
#if defined(__sun__)
#define _POSIX_C_SOURCE 200112L
-#elif defined(__linux__) || defined(__OpenBSD__) || defined(__NetBSD__)
-#define _XOPEN_SOURCE 600
#else
-#define _XOPEN_SOURCE
+#if !(defined(__APPLE__) && defined(__MACH__))
+#define _XOPEN_SOURCE 600
+#endif
#endif
-#if __APPLE__ && __MACH__
+#if defined(__APPLE__) && defined(__MACH__)
#define _OSX
#endif
diff --git a/deps/hiredis/hiredis.c b/deps/hiredis/hiredis.c
index 2afee5666..18bdfc99c 100644
--- a/deps/hiredis/hiredis.c
+++ b/deps/hiredis/hiredis.c
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -73,6 +75,9 @@ void freeReplyObject(void *reply) {
redisReply *r = reply;
size_t j;
+ if (r == NULL)
+ return;
+
switch(r->type) {
case REDIS_REPLY_INTEGER:
break; /* Nothing to free */
@@ -183,504 +188,23 @@ static void *createNilObject(const redisReadTask *task) {
return r;
}
-static void __redisReaderSetError(redisReader *r, int type, const char *str) {
- size_t len;
-
- if (r->reply != NULL && r->fn && r->fn->freeObject) {
- r->fn->freeObject(r->reply);
- r->reply = NULL;
- }
-
- /* Clear input buffer on errors. */
- if (r->buf != NULL) {
- sdsfree(r->buf);
- r->buf = NULL;
- r->pos = r->len = 0;
- }
-
- /* Reset task stack. */
- r->ridx = -1;
-
- /* Set error. */
- r->err = type;
- len = strlen(str);
- len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1);
- memcpy(r->errstr,str,len);
- r->errstr[len] = '\0';
-}
-
-static size_t chrtos(char *buf, size_t size, char byte) {
- size_t len = 0;
-
- switch(byte) {
- case '\\':
- case '"':
- len = snprintf(buf,size,"\"\\%c\"",byte);
- break;
- case '\n': len = snprintf(buf,size,"\"\\n\""); break;
- case '\r': len = snprintf(buf,size,"\"\\r\""); break;
- case '\t': len = snprintf(buf,size,"\"\\t\""); break;
- case '\a': len = snprintf(buf,size,"\"\\a\""); break;
- case '\b': len = snprintf(buf,size,"\"\\b\""); break;
- default:
- if (isprint(byte))
- len = snprintf(buf,size,"\"%c\"",byte);
- else
- len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte);
- break;
- }
-
- return len;
-}
-
-static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) {
- char cbuf[8], sbuf[128];
-
- chrtos(cbuf,sizeof(cbuf),byte);
- snprintf(sbuf,sizeof(sbuf),
- "Protocol error, got %s as reply type byte", cbuf);
- __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf);
-}
-
-static void __redisReaderSetErrorOOM(redisReader *r) {
- __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory");
-}
-
-static char *readBytes(redisReader *r, unsigned int bytes) {
- char *p;
- if (r->len-r->pos >= bytes) {
- p = r->buf+r->pos;
- r->pos += bytes;
- return p;
- }
- return NULL;
-}
-
-/* Find pointer to \r\n. */
-static char *seekNewline(char *s, size_t len) {
- int pos = 0;
- int _len = len-1;
-
- /* Position should be < len-1 because the character at "pos" should be
- * followed by a \n. Note that strchr cannot be used because it doesn't
- * allow to search a limited length and the buffer that is being searched
- * might not have a trailing NULL character. */
- while (pos < _len) {
- while(pos < _len && s[pos] != '\r') pos++;
- if (s[pos] != '\r') {
- /* Not found. */
- return NULL;
- } else {
- if (s[pos+1] == '\n') {
- /* Found. */
- return s+pos;
- } else {
- /* Continue searching. */
- pos++;
- }
- }
- }
- return NULL;
-}
-
-/* Read a long long value starting at *s, under the assumption that it will be
- * terminated by \r\n. Ambiguously returns -1 for unexpected input. */
-static long long readLongLong(char *s) {
- long long v = 0;
- int dec, mult = 1;
- char c;
-
- if (*s == '-') {
- mult = -1;
- s++;
- } else if (*s == '+') {
- mult = 1;
- s++;
- }
-
- while ((c = *(s++)) != '\r') {
- dec = c - '0';
- if (dec >= 0 && dec < 10) {
- v *= 10;
- v += dec;
- } else {
- /* Should not happen... */
- return -1;
- }
- }
-
- return mult*v;
-}
-
-static char *readLine(redisReader *r, int *_len) {
- char *p, *s;
- int len;
-
- p = r->buf+r->pos;
- s = seekNewline(p,(r->len-r->pos));
- if (s != NULL) {
- len = s-(r->buf+r->pos);
- r->pos += len+2; /* skip \r\n */
- if (_len) *_len = len;
- return p;
- }
- return NULL;
-}
-
-static void moveToNextTask(redisReader *r) {
- redisReadTask *cur, *prv;
- while (r->ridx >= 0) {
- /* Return a.s.a.p. when the stack is now empty. */
- if (r->ridx == 0) {
- r->ridx--;
- return;
- }
-
- cur = &(r->rstack[r->ridx]);
- prv = &(r->rstack[r->ridx-1]);
- assert(prv->type == REDIS_REPLY_ARRAY);
- if (cur->idx == prv->elements-1) {
- r->ridx--;
- } else {
- /* Reset the type because the next item can be anything */
- assert(cur->idx < prv->elements);
- cur->type = -1;
- cur->elements = -1;
- cur->idx++;
- return;
- }
- }
-}
-
-static int processLineItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj;
- char *p;
- int len;
-
- if ((p = readLine(r,&len)) != NULL) {
- if (cur->type == REDIS_REPLY_INTEGER) {
- if (r->fn && r->fn->createInteger)
- obj = r->fn->createInteger(cur,readLongLong(p));
- else
- obj = (void*)REDIS_REPLY_INTEGER;
- } else {
- /* Type will be error or status. */
- if (r->fn && r->fn->createString)
- obj = r->fn->createString(cur,p,len);
- else
- obj = (void*)(size_t)(cur->type);
- }
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- /* Set reply if this is the root object. */
- if (r->ridx == 0) r->reply = obj;
- moveToNextTask(r);
- return REDIS_OK;
- }
-
- return REDIS_ERR;
-}
-
-static int processBulkItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj = NULL;
- char *p, *s;
- long len;
- unsigned long bytelen;
- int success = 0;
-
- p = r->buf+r->pos;
- s = seekNewline(p,r->len-r->pos);
- if (s != NULL) {
- p = r->buf+r->pos;
- bytelen = s-(r->buf+r->pos)+2; /* include \r\n */
- len = readLongLong(p);
-
- if (len < 0) {
- /* The nil object can always be created. */
- if (r->fn && r->fn->createNil)
- obj = r->fn->createNil(cur);
- else
- obj = (void*)REDIS_REPLY_NIL;
- success = 1;
- } else {
- /* Only continue when the buffer contains the entire bulk item. */
- bytelen += len+2; /* include \r\n */
- if (r->pos+bytelen <= r->len) {
- if (r->fn && r->fn->createString)
- obj = r->fn->createString(cur,s+2,len);
- else
- obj = (void*)REDIS_REPLY_STRING;
- success = 1;
- }
- }
-
- /* Proceed when obj was created. */
- if (success) {
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- r->pos += bytelen;
-
- /* Set reply if this is the root object. */
- if (r->ridx == 0) r->reply = obj;
- moveToNextTask(r);
- return REDIS_OK;
- }
- }
-
- return REDIS_ERR;
-}
-
-static int processMultiBulkItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj;
- char *p;
- long elements;
- int root = 0;
-
- /* Set error for nested multi bulks with depth > 7 */
- if (r->ridx == 8) {
- __redisReaderSetError(r,REDIS_ERR_PROTOCOL,
- "No support for nested multi bulk replies with depth > 7");
- return REDIS_ERR;
- }
-
- if ((p = readLine(r,NULL)) != NULL) {
- elements = readLongLong(p);
- root = (r->ridx == 0);
-
- if (elements == -1) {
- if (r->fn && r->fn->createNil)
- obj = r->fn->createNil(cur);
- else
- obj = (void*)REDIS_REPLY_NIL;
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- moveToNextTask(r);
- } else {
- if (r->fn && r->fn->createArray)
- obj = r->fn->createArray(cur,elements);
- else
- obj = (void*)REDIS_REPLY_ARRAY;
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- /* Modify task stack when there are more than 0 elements. */
- if (elements > 0) {
- cur->elements = elements;
- cur->obj = obj;
- r->ridx++;
- r->rstack[r->ridx].type = -1;
- r->rstack[r->ridx].elements = -1;
- r->rstack[r->ridx].idx = 0;
- r->rstack[r->ridx].obj = NULL;
- r->rstack[r->ridx].parent = cur;
- r->rstack[r->ridx].privdata = r->privdata;
- } else {
- moveToNextTask(r);
- }
- }
-
- /* Set reply if this is the root object. */
- if (root) r->reply = obj;
- return REDIS_OK;
- }
-
- return REDIS_ERR;
-}
-
-static int processItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- char *p;
-
- /* check if we need to read type */
- if (cur->type < 0) {
- if ((p = readBytes(r,1)) != NULL) {
- switch (p[0]) {
- case '-':
- cur->type = REDIS_REPLY_ERROR;
- break;
- case '+':
- cur->type = REDIS_REPLY_STATUS;
- break;
- case ':':
- cur->type = REDIS_REPLY_INTEGER;
- break;
- case '$':
- cur->type = REDIS_REPLY_STRING;
- break;
- case '*':
- cur->type = REDIS_REPLY_ARRAY;
- break;
- default:
- __redisReaderSetErrorProtocolByte(r,*p);
- return REDIS_ERR;
- }
- } else {
- /* could not consume 1 byte */
- return REDIS_ERR;
- }
- }
-
- /* process typed item */
- switch(cur->type) {
- case REDIS_REPLY_ERROR:
- case REDIS_REPLY_STATUS:
- case REDIS_REPLY_INTEGER:
- return processLineItem(r);
- case REDIS_REPLY_STRING:
- return processBulkItem(r);
- case REDIS_REPLY_ARRAY:
- return processMultiBulkItem(r);
- default:
- assert(NULL);
- return REDIS_ERR; /* Avoid warning. */
- }
-}
-
-redisReader *redisReaderCreate(void) {
- redisReader *r;
-
- r = calloc(sizeof(redisReader),1);
- if (r == NULL)
- return NULL;
-
- r->err = 0;
- r->errstr[0] = '\0';
- r->fn = &defaultFunctions;
- r->buf = sdsempty();
- r->maxbuf = REDIS_READER_MAX_BUF;
- if (r->buf == NULL) {
- free(r);
- return NULL;
- }
-
- r->ridx = -1;
- return r;
-}
-
-void redisReaderFree(redisReader *r) {
- if (r->reply != NULL && r->fn && r->fn->freeObject)
- r->fn->freeObject(r->reply);
- if (r->buf != NULL)
- sdsfree(r->buf);
- free(r);
-}
-
-int redisReaderFeed(redisReader *r, const char *buf, size_t len) {
- sds newbuf;
-
- /* Return early when this reader is in an erroneous state. */
- if (r->err)
- return REDIS_ERR;
-
- /* Copy the provided buffer. */
- if (buf != NULL && len >= 1) {
- /* Destroy internal buffer when it is empty and is quite large. */
- if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) {
- sdsfree(r->buf);
- r->buf = sdsempty();
- r->pos = 0;
-
- /* r->buf should not be NULL since we just free'd a larger one. */
- assert(r->buf != NULL);
- }
-
- newbuf = sdscatlen(r->buf,buf,len);
- if (newbuf == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- r->buf = newbuf;
- r->len = sdslen(r->buf);
- }
-
- return REDIS_OK;
-}
-
-int redisReaderGetReply(redisReader *r, void **reply) {
- /* Default target pointer to NULL. */
- if (reply != NULL)
- *reply = NULL;
-
- /* Return early when this reader is in an erroneous state. */
- if (r->err)
- return REDIS_ERR;
-
- /* When the buffer is empty, there will never be a reply. */
- if (r->len == 0)
- return REDIS_OK;
-
- /* Set first item to process when the stack is empty. */
- if (r->ridx == -1) {
- r->rstack[0].type = -1;
- r->rstack[0].elements = -1;
- r->rstack[0].idx = -1;
- r->rstack[0].obj = NULL;
- r->rstack[0].parent = NULL;
- r->rstack[0].privdata = r->privdata;
- r->ridx = 0;
- }
-
- /* Process items in reply. */
- while (r->ridx >= 0)
- if (processItem(r) != REDIS_OK)
- break;
-
- /* Return ASAP when an error occurred. */
- if (r->err)
- return REDIS_ERR;
-
- /* Discard part of the buffer when we've consumed at least 1k, to avoid
- * doing unnecessary calls to memmove() in sds.c. */
- if (r->pos >= 1024) {
- sdsrange(r->buf,r->pos,-1);
- r->pos = 0;
- r->len = sdslen(r->buf);
- }
-
- /* Emit a reply when there is one. */
- if (r->ridx == -1) {
- if (reply != NULL)
- *reply = r->reply;
- r->reply = NULL;
- }
- return REDIS_OK;
-}
-
-/* Calculate the number of bytes needed to represent an integer as string. */
-static int intlen(int i) {
- int len = 0;
- if (i < 0) {
- len++;
- i = -i;
- }
- do {
- len++;
- i /= 10;
- } while(i);
- return len;
+/* Return the number of digits of 'v' when converted to string in radix 10.
+ * Implementation borrowed from link in redis/src/util.c:string2ll(). */
+static uint32_t countDigits(uint64_t v) {
+ uint32_t result = 1;
+ for (;;) {
+ if (v < 10) return result;
+ if (v < 100) return result + 1;
+ if (v < 1000) return result + 2;
+ if (v < 10000) return result + 3;
+ v /= 10000U;
+ result += 4;
+ }
}
/* Helper that calculates the bulk length given a certain string length. */
static size_t bulklen(size_t len) {
- return 1+intlen(len)+2+len+2;
+ return 1+countDigits(len)+2+len+2;
}
int redisvFormatCommand(char **target, const char *format, va_list ap) {
@@ -692,6 +216,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
char **curargv = NULL, **newargv = NULL;
int argc = 0;
int totlen = 0;
+ int error_type = 0; /* 0 = no error; -1 = memory error; -2 = format error */
int j;
/* Abort if there is not target to set */
@@ -708,19 +233,19 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
if (*c == ' ') {
if (touched) {
newargv = realloc(curargv,sizeof(char*)*(argc+1));
- if (newargv == NULL) goto err;
+ if (newargv == NULL) goto memory_err;
curargv = newargv;
curargv[argc++] = curarg;
totlen += bulklen(sdslen(curarg));
/* curarg is put in argv so it can be overwritten. */
curarg = sdsempty();
- if (curarg == NULL) goto err;
+ if (curarg == NULL) goto memory_err;
touched = 0;
}
} else {
newarg = sdscatlen(curarg,c,1);
- if (newarg == NULL) goto err;
+ if (newarg == NULL) goto memory_err;
curarg = newarg;
touched = 1;
}
@@ -751,17 +276,14 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
/* Try to detect printf format */
{
static const char intfmts[] = "diouxX";
+ static const char flags[] = "#0-+ ";
char _format[16];
const char *_p = c+1;
size_t _l = 0;
va_list _cpy;
/* Flags */
- if (*_p != '\0' && *_p == '#') _p++;
- if (*_p != '\0' && *_p == '0') _p++;
- if (*_p != '\0' && *_p == '-') _p++;
- if (*_p != '\0' && *_p == ' ') _p++;
- if (*_p != '\0' && *_p == '+') _p++;
+ while (*_p != '\0' && strchr(flags,*_p) != NULL) _p++;
/* Field width */
while (*_p != '\0' && isdigit(*_p)) _p++;
@@ -829,7 +351,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
fmt_invalid:
va_end(_cpy);
- goto err;
+ goto format_err;
fmt_valid:
_l = (_p+1)-c;
@@ -848,7 +370,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
}
}
- if (newarg == NULL) goto err;
+ if (newarg == NULL) goto memory_err;
curarg = newarg;
touched = 1;
@@ -860,7 +382,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
/* Add the last argument if needed */
if (touched) {
newargv = realloc(curargv,sizeof(char*)*(argc+1));
- if (newargv == NULL) goto err;
+ if (newargv == NULL) goto memory_err;
curargv = newargv;
curargv[argc++] = curarg;
totlen += bulklen(sdslen(curarg));
@@ -872,11 +394,11 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
curarg = NULL;
/* Add bytes needed to hold multi bulk count */
- totlen += 1+intlen(argc)+2;
+ totlen += 1+countDigits(argc)+2;
/* Build the command at protocol level */
cmd = malloc(totlen+1);
- if (cmd == NULL) goto err;
+ if (cmd == NULL) goto memory_err;
pos = sprintf(cmd,"*%d\r\n",argc);
for (j = 0; j < argc; j++) {
@@ -894,20 +416,29 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
*target = cmd;
return totlen;
-err:
- while(argc--)
- sdsfree(curargv[argc]);
- free(curargv);
+format_err:
+ error_type = -2;
+ goto cleanup;
- if (curarg != NULL)
- sdsfree(curarg);
+memory_err:
+ error_type = -1;
+ goto cleanup;
+
+cleanup:
+ if (curargv) {
+ while(argc--)
+ sdsfree(curargv[argc]);
+ free(curargv);
+ }
+
+ sdsfree(curarg);
/* No need to check cmd since it is the last statement that can fail,
* but do it anyway to be as defensive as possible. */
if (cmd != NULL)
free(cmd);
- return -1;
+ return error_type;
}
/* Format a command according to the Redis protocol. This function
@@ -928,9 +459,69 @@ int redisFormatCommand(char **target, const char *format, ...) {
va_start(ap,format);
len = redisvFormatCommand(target,format,ap);
va_end(ap);
+
+ /* The API says "-1" means bad result, but we now also return "-2" in some
+ * cases. Force the return value to always be -1. */
+ if (len < 0)
+ len = -1;
+
return len;
}
+/* Format a command according to the Redis protocol using an sds string and
+ * sdscatfmt for the processing of arguments. This function takes the
+ * number of arguments, an array with arguments and an array with their
+ * lengths. If the latter is set to NULL, strlen will be used to compute the
+ * argument lengths.
+ */
+int redisFormatSdsCommandArgv(sds *target, int argc, const char **argv,
+ const size_t *argvlen)
+{
+ sds cmd;
+ unsigned long long totlen;
+ int j;
+ size_t len;
+
+ /* Abort on a NULL target */
+ if (target == NULL)
+ return -1;
+
+ /* Calculate our total size */
+ totlen = 1+countDigits(argc)+2;
+ for (j = 0; j < argc; j++) {
+ len = argvlen ? argvlen[j] : strlen(argv[j]);
+ totlen += bulklen(len);
+ }
+
+ /* Use an SDS string for command construction */
+ cmd = sdsempty();
+ if (cmd == NULL)
+ return -1;
+
+ /* We already know how much storage we need */
+ cmd = sdsMakeRoomFor(cmd, totlen);
+ if (cmd == NULL)
+ return -1;
+
+ /* Construct command */
+ cmd = sdscatfmt(cmd, "*%i\r\n", argc);
+ for (j=0; j < argc; j++) {
+ len = argvlen ? argvlen[j] : strlen(argv[j]);
+ cmd = sdscatfmt(cmd, "$%u\r\n", len);
+ cmd = sdscatlen(cmd, argv[j], len);
+ cmd = sdscatlen(cmd, "\r\n", sizeof("\r\n")-1);
+ }
+
+ assert(sdslen(cmd)==totlen);
+
+ *target = cmd;
+ return totlen;
+}
+
+void redisFreeSdsCommand(sds cmd) {
+ sdsfree(cmd);
+}
+
/* Format a command according to the Redis protocol. This function takes the
* number of arguments, an array with arguments and an array with their
* lengths. If the latter is set to NULL, strlen will be used to compute the
@@ -942,8 +533,12 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz
size_t len;
int totlen, j;
+ /* Abort on a NULL target */
+ if (target == NULL)
+ return -1;
+
/* Calculate number of bytes needed for the command */
- totlen = 1+intlen(argc)+2;
+ totlen = 1+countDigits(argc)+2;
for (j = 0; j < argc; j++) {
len = argvlen ? argvlen[j] : strlen(argv[j]);
totlen += bulklen(len);
@@ -970,6 +565,10 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz
return totlen;
}
+void redisFreeCommand(char *cmd) {
+ free(cmd);
+}
+
void __redisSetError(redisContext *c, int type, const char *str) {
size_t len;
@@ -982,10 +581,14 @@ void __redisSetError(redisContext *c, int type, const char *str) {
} else {
/* Only REDIS_ERR_IO may lack a description! */
assert(type == REDIS_ERR_IO);
- strerror_r(errno,c->errstr,sizeof(c->errstr));
+ __redis_strerror_r(errno, c->errstr, sizeof(c->errstr));
}
}
+redisReader *redisReaderCreate(void) {
+ return redisReaderCreateWithFunctions(&defaultFunctions);
+}
+
static redisContext *redisContextInit(void) {
redisContext *c;
@@ -997,24 +600,72 @@ static redisContext *redisContextInit(void) {
c->errstr[0] = '\0';
c->obuf = sdsempty();
c->reader = redisReaderCreate();
+ c->tcp.host = NULL;
+ c->tcp.source_addr = NULL;
+ c->unix_sock.path = NULL;
+ c->timeout = NULL;
+
+ if (c->obuf == NULL || c->reader == NULL) {
+ redisFree(c);
+ return NULL;
+ }
+
return c;
}
void redisFree(redisContext *c) {
+ if (c == NULL)
+ return;
if (c->fd > 0)
close(c->fd);
if (c->obuf != NULL)
sdsfree(c->obuf);
if (c->reader != NULL)
redisReaderFree(c->reader);
+ if (c->tcp.host)
+ free(c->tcp.host);
+ if (c->tcp.source_addr)
+ free(c->tcp.source_addr);
+ if (c->unix_sock.path)
+ free(c->unix_sock.path);
+ if (c->timeout)
+ free(c->timeout);
free(c);
}
int redisFreeKeepFd(redisContext *c) {
- int fd = c->fd;
- c->fd = -1;
- redisFree(c);
- return fd;
+ int fd = c->fd;
+ c->fd = -1;
+ redisFree(c);
+ return fd;
+}
+
+int redisReconnect(redisContext *c) {
+ c->err = 0;
+ memset(c->errstr, '\0', strlen(c->errstr));
+
+ if (c->fd > 0) {
+ close(c->fd);
+ }
+
+ sdsfree(c->obuf);
+ redisReaderFree(c->reader);
+
+ c->obuf = sdsempty();
+ c->reader = redisReaderCreate();
+
+ if (c->connection_type == REDIS_CONN_TCP) {
+ return redisContextConnectBindTcp(c, c->tcp.host, c->tcp.port,
+ c->timeout, c->tcp.source_addr);
+ } else if (c->connection_type == REDIS_CONN_UNIX) {
+ return redisContextConnectUnix(c, c->unix_sock.path, c->timeout);
+ } else {
+ /* Something bad happened here and shouldn't have. There isn't
+ enough information in the context to reconnect. */
+ __redisSetError(c,REDIS_ERR_OTHER,"Not enough information to reconnect");
+ }
+
+ return REDIS_ERR;
}
/* Connect to a Redis instance. On error the field error in the returned
@@ -1064,6 +715,15 @@ redisContext *redisConnectBindNonBlock(const char *ip, int port,
return c;
}
+redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port,
+ const char *source_addr) {
+ redisContext *c = redisContextInit();
+ c->flags &= ~REDIS_BLOCK;
+ c->flags |= REDIS_REUSEADDR;
+ redisContextConnectBindTcp(c,ip,port,NULL,source_addr);
+ return c;
+}
+
redisContext *redisConnectUnix(const char *path) {
redisContext *c;
@@ -1162,10 +822,10 @@ int redisBufferRead(redisContext *c) {
/* Write the output buffer to the socket.
*
* Returns REDIS_OK when the buffer is empty, or (a part of) the buffer was
- * succesfully written to the socket. When the buffer is empty after the
+ * successfully written to the socket. When the buffer is empty after the
* write operation, "done" is set to 1 (if given).
*
- * Returns REDIS_ERR if an error occured trying to write and sets
+ * Returns REDIS_ERR if an error occurred trying to write and sets
* c->errstr to hold the appropriate error string.
*/
int redisBufferWrite(redisContext *c, int *done) {
@@ -1274,6 +934,9 @@ int redisvAppendCommand(redisContext *c, const char *format, va_list ap) {
if (len == -1) {
__redisSetError(c,REDIS_ERR_OOM,"Out of memory");
return REDIS_ERR;
+ } else if (len == -2) {
+ __redisSetError(c,REDIS_ERR_OTHER,"Invalid format string");
+ return REDIS_ERR;
}
if (__redisAppendCommand(c,cmd,len) != REDIS_OK) {
@@ -1296,21 +959,21 @@ int redisAppendCommand(redisContext *c, const char *format, ...) {
}
int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen) {
- char *cmd;
+ sds cmd;
int len;
- len = redisFormatCommandArgv(&cmd,argc,argv,argvlen);
+ len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen);
if (len == -1) {
__redisSetError(c,REDIS_ERR_OOM,"Out of memory");
return REDIS_ERR;
}
if (__redisAppendCommand(c,cmd,len) != REDIS_OK) {
- free(cmd);
+ sdsfree(cmd);
return REDIS_ERR;
}
- free(cmd);
+ sdsfree(cmd);
return REDIS_OK;
}
@@ -1321,7 +984,7 @@ int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const s
* context is non-blocking, the "reply" pointer will not be used and the
* command is simply appended to the write buffer.
*
- * Returns the reply when a reply was succesfully retrieved. Returns NULL
+ * Returns the reply when a reply was successfully retrieved. Returns NULL
* otherwise. When NULL is returned in a blocking context, the error field
* in the context will be set.
*/
diff --git a/deps/hiredis/hiredis.h b/deps/hiredis/hiredis.h
index 7700f4b89..423d5e504 100644
--- a/deps/hiredis/hiredis.h
+++ b/deps/hiredis/hiredis.h
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -31,26 +33,16 @@
#ifndef __HIREDIS_H
#define __HIREDIS_H
-#include <stdio.h> /* for size_t */
+#include "read.h"
#include <stdarg.h> /* for va_list */
#include <sys/time.h> /* for struct timeval */
+#include <stdint.h> /* uintXX_t, etc */
+#include "sds.h" /* for sds */
#define HIREDIS_MAJOR 0
-#define HIREDIS_MINOR 11
-#define HIREDIS_PATCH 0
-
-#define REDIS_ERR -1
-#define REDIS_OK 0
-
-/* When an error occurs, the err flag in a context is set to hold the type of
- * error that occured. REDIS_ERR_IO means there was an I/O error and you
- * should use the "errno" variable to find out what is wrong.
- * For other values, the "errstr" field will hold a description. */
-#define REDIS_ERR_IO 1 /* Error in read or write */
-#define REDIS_ERR_EOF 3 /* End of file */
-#define REDIS_ERR_PROTOCOL 4 /* Protocol error */
-#define REDIS_ERR_OOM 5 /* Out of memory */
-#define REDIS_ERR_OTHER 2 /* Everything else... */
+#define HIREDIS_MINOR 13
+#define HIREDIS_PATCH 3
+#define HIREDIS_SONAME 0.13
/* Connection type can be blocking or non-blocking and is set in the
* least significant bit of the flags field in redisContext. */
@@ -79,17 +71,39 @@
/* Flag that is set when monitor mode is active */
#define REDIS_MONITORING 0x40
-#define REDIS_REPLY_STRING 1
-#define REDIS_REPLY_ARRAY 2
-#define REDIS_REPLY_INTEGER 3
-#define REDIS_REPLY_NIL 4
-#define REDIS_REPLY_STATUS 5
-#define REDIS_REPLY_ERROR 6
-
-#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */
+/* Flag that is set when we should set SO_REUSEADDR before calling bind() */
+#define REDIS_REUSEADDR 0x80
#define REDIS_KEEPALIVE_INTERVAL 15 /* seconds */
+/* number of times we retry to connect in the case of EADDRNOTAVAIL and
+ * SO_REUSEADDR is being used. */
+#define REDIS_CONNECT_RETRIES 10
+
+/* strerror_r has two completely different prototypes and behaviors
+ * depending on system issues, so we need to operate on the error buffer
+ * differently depending on which strerror_r we're using. */
+#ifndef _GNU_SOURCE
+/* "regular" POSIX strerror_r that does the right thing. */
+#define __redis_strerror_r(errno, buf, len) \
+ do { \
+ strerror_r((errno), (buf), (len)); \
+ } while (0)
+#else
+/* "bad" GNU strerror_r we need to clean up after. */
+#define __redis_strerror_r(errno, buf, len) \
+ do { \
+ char *err_str = strerror_r((errno), (buf), (len)); \
+ /* If return value _isn't_ the start of the buffer we passed in, \
+ * then GNU strerror_r returned an internal static buffer and we \
+ * need to copy the result into our private buffer. */ \
+ if (err_str != (buf)) { \
+ strncpy((buf), err_str, ((len) - 1)); \
+ buf[(len)-1] = '\0'; \
+ } \
+ } while (0)
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -98,61 +112,13 @@ extern "C" {
typedef struct redisReply {
int type; /* REDIS_REPLY_* */
long long integer; /* The integer when type is REDIS_REPLY_INTEGER */
- int len; /* Length of string */
+ size_t len; /* Length of string */
char *str; /* Used for both REDIS_REPLY_ERROR and REDIS_REPLY_STRING */
size_t elements; /* number of elements, for REDIS_REPLY_ARRAY */
struct redisReply **element; /* elements vector for REDIS_REPLY_ARRAY */
} redisReply;
-typedef struct redisReadTask {
- int type;
- int elements; /* number of elements in multibulk container */
- int idx; /* index in parent (array) object */
- void *obj; /* holds user-generated value for a read task */
- struct redisReadTask *parent; /* parent task */
- void *privdata; /* user-settable arbitrary field */
-} redisReadTask;
-
-typedef struct redisReplyObjectFunctions {
- void *(*createString)(const redisReadTask*, char*, size_t);
- void *(*createArray)(const redisReadTask*, int);
- void *(*createInteger)(const redisReadTask*, long long);
- void *(*createNil)(const redisReadTask*);
- void (*freeObject)(void*);
-} redisReplyObjectFunctions;
-
-/* State for the protocol parser */
-typedef struct redisReader {
- int err; /* Error flags, 0 when there is no error */
- char errstr[128]; /* String representation of error when applicable */
-
- char *buf; /* Read buffer */
- size_t pos; /* Buffer cursor */
- size_t len; /* Buffer length */
- size_t maxbuf; /* Max length of unused buffer */
-
- redisReadTask rstack[9];
- int ridx; /* Index of current read task */
- void *reply; /* Temporary reply pointer */
-
- redisReplyObjectFunctions *fn;
- void *privdata;
-} redisReader;
-
-/* Public API for the protocol parser. */
redisReader *redisReaderCreate(void);
-void redisReaderFree(redisReader *r);
-int redisReaderFeed(redisReader *r, const char *buf, size_t len);
-int redisReaderGetReply(redisReader *r, void **reply);
-
-/* Backwards compatibility, can be removed on big version bump. */
-#define redisReplyReaderCreate redisReaderCreate
-#define redisReplyReaderFree redisReaderFree
-#define redisReplyReaderFeed redisReaderFeed
-#define redisReplyReaderGetReply redisReaderGetReply
-#define redisReplyReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p))
-#define redisReplyReaderGetObject(_r) (((redisReader*)(_r))->reply)
-#define redisReplyReaderGetError(_r) (((redisReader*)(_r))->errstr)
/* Function to free the reply objects hiredis returns by default. */
void freeReplyObject(void *reply);
@@ -161,6 +127,14 @@ void freeReplyObject(void *reply);
int redisvFormatCommand(char **target, const char *format, va_list ap);
int redisFormatCommand(char **target, const char *format, ...);
int redisFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen);
+int redisFormatSdsCommandArgv(sds *target, int argc, const char ** argv, const size_t *argvlen);
+void redisFreeCommand(char *cmd);
+void redisFreeSdsCommand(sds cmd);
+
+enum redisConnectionType {
+ REDIS_CONN_TCP,
+ REDIS_CONN_UNIX
+};
/* Context for a connection to Redis */
typedef struct redisContext {
@@ -170,16 +144,45 @@ typedef struct redisContext {
int flags;
char *obuf; /* Write buffer */
redisReader *reader; /* Protocol reader */
+
+ enum redisConnectionType connection_type;
+ struct timeval *timeout;
+
+ struct {
+ char *host;
+ char *source_addr;
+ int port;
+ } tcp;
+
+ struct {
+ char *path;
+ } unix_sock;
+
} redisContext;
redisContext *redisConnect(const char *ip, int port);
redisContext *redisConnectWithTimeout(const char *ip, int port, const struct timeval tv);
redisContext *redisConnectNonBlock(const char *ip, int port);
-redisContext *redisConnectBindNonBlock(const char *ip, int port, const char *source_addr);
+redisContext *redisConnectBindNonBlock(const char *ip, int port,
+ const char *source_addr);
+redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port,
+ const char *source_addr);
redisContext *redisConnectUnix(const char *path);
redisContext *redisConnectUnixWithTimeout(const char *path, const struct timeval tv);
redisContext *redisConnectUnixNonBlock(const char *path);
redisContext *redisConnectFd(int fd);
+
+/**
+ * Reconnect the given context using the saved information.
+ *
+ * This re-uses the exact same connect options as in the initial connection.
+ * host, ip (or path), timeout and bind address are reused,
+ * flags are used unmodified from the existing context.
+ *
+ * Returns REDIS_OK on successful connect or REDIS_ERR otherwise.
+ */
+int redisReconnect(redisContext *c);
+
int redisSetTimeout(redisContext *c, const struct timeval tv);
int redisEnableKeepAlive(redisContext *c);
void redisFree(redisContext *c);
diff --git a/deps/hiredis/net.c b/deps/hiredis/net.c
index 9fe80bba7..7d4120985 100644
--- a/deps/hiredis/net.c
+++ b/deps/hiredis/net.c
@@ -1,7 +1,9 @@
/* Extracted from anet.c to work properly with Hiredis error reporting.
*
- * Copyright (c) 2006-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -47,6 +49,7 @@
#include <stdio.h>
#include <poll.h>
#include <limits.h>
+#include <stdlib.h>
#include "net.h"
#include "sds.h"
@@ -67,7 +70,7 @@ static void __redisSetErrorFromErrno(redisContext *c, int type, const char *pref
if (prefix != NULL)
len = snprintf(buf,sizeof(buf),"%s: ",prefix);
- strerror_r(errno,buf+len,sizeof(buf)-len);
+ __redis_strerror_r(errno, (char *)(buf + len), sizeof(buf) - len);
__redisSetError(c,type,buf);
}
@@ -138,7 +141,7 @@ int redisKeepAlive(redisContext *c, int interval) {
return REDIS_ERR;
}
#else
-#ifndef __sun
+#if defined(__GLIBC__) && !defined(__FreeBSD_kernel__)
val = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
__redisSetError(c,REDIS_ERR_OTHER,strerror(errno));
@@ -175,19 +178,15 @@ static int redisSetTcpNoDelay(redisContext *c) {
#define __MAX_MSEC (((LONG_MAX) - 999) / 1000)
-static int redisContextWaitReady(redisContext *c, const struct timeval *timeout) {
- struct pollfd wfd[1];
- long msec;
-
- msec = -1;
- wfd[0].fd = c->fd;
- wfd[0].events = POLLOUT;
+static int redisContextTimeoutMsec(redisContext *c, long *result)
+{
+ const struct timeval *timeout = c->timeout;
+ long msec = -1;
/* Only use timeout when not NULL. */
if (timeout != NULL) {
if (timeout->tv_usec > 1000000 || timeout->tv_sec > __MAX_MSEC) {
- __redisSetErrorFromErrno(c, REDIS_ERR_IO, NULL);
- redisContextCloseFd(c);
+ *result = msec;
return REDIS_ERR;
}
@@ -198,6 +197,16 @@ static int redisContextWaitReady(redisContext *c, const struct timeval *timeout)
}
}
+ *result = msec;
+ return REDIS_OK;
+}
+
+static int redisContextWaitReady(redisContext *c, long msec) {
+ struct pollfd wfd[1];
+
+ wfd[0].fd = c->fd;
+ wfd[0].events = POLLOUT;
+
if (errno == EINPROGRESS) {
int res;
@@ -256,10 +265,57 @@ int redisContextSetTimeout(redisContext *c, const struct timeval tv) {
static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
const struct timeval *timeout,
const char *source_addr) {
- int s, rv;
+ int s, rv, n;
char _port[6]; /* strlen("65535"); */
struct addrinfo hints, *servinfo, *bservinfo, *p, *b;
int blocking = (c->flags & REDIS_BLOCK);
+ int reuseaddr = (c->flags & REDIS_REUSEADDR);
+ int reuses = 0;
+ long timeout_msec = -1;
+
+ servinfo = NULL;
+ c->connection_type = REDIS_CONN_TCP;
+ c->tcp.port = port;
+
+ /* We need to take possession of the passed parameters
+ * to make them reusable for a reconnect.
+ * We also carefully check we don't free data we already own,
+ * as in the case of the reconnect method.
+ *
+ * This is a bit ugly, but atleast it works and doesn't leak memory.
+ **/
+ if (c->tcp.host != addr) {
+ if (c->tcp.host)
+ free(c->tcp.host);
+
+ c->tcp.host = strdup(addr);
+ }
+
+ if (timeout) {
+ if (c->timeout != timeout) {
+ if (c->timeout == NULL)
+ c->timeout = malloc(sizeof(struct timeval));
+
+ memcpy(c->timeout, timeout, sizeof(struct timeval));
+ }
+ } else {
+ if (c->timeout)
+ free(c->timeout);
+ c->timeout = NULL;
+ }
+
+ if (redisContextTimeoutMsec(c, &timeout_msec) != REDIS_OK) {
+ __redisSetError(c, REDIS_ERR_IO, "Invalid timeout specified");
+ goto error;
+ }
+
+ if (source_addr == NULL) {
+ free(c->tcp.source_addr);
+ c->tcp.source_addr = NULL;
+ } else if (c->tcp.source_addr != source_addr) {
+ free(c->tcp.source_addr);
+ c->tcp.source_addr = strdup(source_addr);
+ }
snprintf(_port, 6, "%d", port);
memset(&hints,0,sizeof(hints));
@@ -271,7 +327,7 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
* as this would add latency to every connect. Otherwise a more sensible
* route could be: Use IPv6 if both addresses are available and there is IPv6
* connectivity. */
- if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) {
+ if ((rv = getaddrinfo(c->tcp.host,_port,&hints,&servinfo)) != 0) {
hints.ai_family = AF_INET6;
if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) {
__redisSetError(c,REDIS_ERR_OTHER,gai_strerror(rv));
@@ -279,27 +335,38 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
}
}
for (p = servinfo; p != NULL; p = p->ai_next) {
+addrretry:
if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
continue;
c->fd = s;
if (redisSetBlocking(c,0) != REDIS_OK)
goto error;
- if (source_addr) {
+ if (c->tcp.source_addr) {
int bound = 0;
/* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */
- if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) {
+ if ((rv = getaddrinfo(c->tcp.source_addr, NULL, &hints, &bservinfo)) != 0) {
char buf[128];
snprintf(buf,sizeof(buf),"Can't get addr: %s",gai_strerror(rv));
__redisSetError(c,REDIS_ERR_OTHER,buf);
goto error;
}
+
+ if (reuseaddr) {
+ n = 1;
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char*) &n,
+ sizeof(n)) < 0) {
+ goto error;
+ }
+ }
+
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
bound = 1;
break;
}
}
+ freeaddrinfo(bservinfo);
if (!bound) {
char buf[128];
snprintf(buf,sizeof(buf),"Can't bind socket: %s",strerror(errno));
@@ -313,8 +380,15 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
continue;
} else if (errno == EINPROGRESS && !blocking) {
/* This is ok. */
+ } else if (errno == EADDRNOTAVAIL && reuseaddr) {
+ if (++reuses >= REDIS_CONNECT_RETRIES) {
+ goto error;
+ } else {
+ redisContextCloseFd(c);
+ goto addrretry;
+ }
} else {
- if (redisContextWaitReady(c,timeout) != REDIS_OK)
+ if (redisContextWaitReady(c,timeout_msec) != REDIS_OK)
goto error;
}
}
@@ -355,19 +429,40 @@ int redisContextConnectBindTcp(redisContext *c, const char *addr, int port,
int redisContextConnectUnix(redisContext *c, const char *path, const struct timeval *timeout) {
int blocking = (c->flags & REDIS_BLOCK);
struct sockaddr_un sa;
+ long timeout_msec = -1;
if (redisCreateSocket(c,AF_LOCAL) < 0)
return REDIS_ERR;
if (redisSetBlocking(c,0) != REDIS_OK)
return REDIS_ERR;
+ c->connection_type = REDIS_CONN_UNIX;
+ if (c->unix_sock.path != path)
+ c->unix_sock.path = strdup(path);
+
+ if (timeout) {
+ if (c->timeout != timeout) {
+ if (c->timeout == NULL)
+ c->timeout = malloc(sizeof(struct timeval));
+
+ memcpy(c->timeout, timeout, sizeof(struct timeval));
+ }
+ } else {
+ if (c->timeout)
+ free(c->timeout);
+ c->timeout = NULL;
+ }
+
+ if (redisContextTimeoutMsec(c,&timeout_msec) != REDIS_OK)
+ return REDIS_ERR;
+
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (connect(c->fd, (struct sockaddr*)&sa, sizeof(sa)) == -1) {
if (errno == EINPROGRESS && !blocking) {
/* This is ok. */
} else {
- if (redisContextWaitReady(c,timeout) != REDIS_OK)
+ if (redisContextWaitReady(c,timeout_msec) != REDIS_OK)
return REDIS_ERR;
}
}
diff --git a/deps/hiredis/net.h b/deps/hiredis/net.h
index 5e742f577..2f1a0bf85 100644
--- a/deps/hiredis/net.h
+++ b/deps/hiredis/net.h
@@ -1,7 +1,9 @@
/* Extracted from anet.c to work properly with Hiredis error reporting.
*
- * Copyright (c) 2006-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c
new file mode 100644
index 000000000..50333b534
--- /dev/null
+++ b/deps/hiredis/read.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "fmacros.h"
+#include <string.h>
+#include <stdlib.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h>
+
+#include "read.h"
+#include "sds.h"
+
+static void __redisReaderSetError(redisReader *r, int type, const char *str) {
+ size_t len;
+
+ if (r->reply != NULL && r->fn && r->fn->freeObject) {
+ r->fn->freeObject(r->reply);
+ r->reply = NULL;
+ }
+
+ /* Clear input buffer on errors. */
+ if (r->buf != NULL) {
+ sdsfree(r->buf);
+ r->buf = NULL;
+ r->pos = r->len = 0;
+ }
+
+ /* Reset task stack. */
+ r->ridx = -1;
+
+ /* Set error. */
+ r->err = type;
+ len = strlen(str);
+ len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1);
+ memcpy(r->errstr,str,len);
+ r->errstr[len] = '\0';
+}
+
+static size_t chrtos(char *buf, size_t size, char byte) {
+ size_t len = 0;
+
+ switch(byte) {
+ case '\\':
+ case '"':
+ len = snprintf(buf,size,"\"\\%c\"",byte);
+ break;
+ case '\n': len = snprintf(buf,size,"\"\\n\""); break;
+ case '\r': len = snprintf(buf,size,"\"\\r\""); break;
+ case '\t': len = snprintf(buf,size,"\"\\t\""); break;
+ case '\a': len = snprintf(buf,size,"\"\\a\""); break;
+ case '\b': len = snprintf(buf,size,"\"\\b\""); break;
+ default:
+ if (isprint(byte))
+ len = snprintf(buf,size,"\"%c\"",byte);
+ else
+ len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte);
+ break;
+ }
+
+ return len;
+}
+
+static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) {
+ char cbuf[8], sbuf[128];
+
+ chrtos(cbuf,sizeof(cbuf),byte);
+ snprintf(sbuf,sizeof(sbuf),
+ "Protocol error, got %s as reply type byte", cbuf);
+ __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf);
+}
+
+static void __redisReaderSetErrorOOM(redisReader *r) {
+ __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory");
+}
+
+static char *readBytes(redisReader *r, unsigned int bytes) {
+ char *p;
+ if (r->len-r->pos >= bytes) {
+ p = r->buf+r->pos;
+ r->pos += bytes;
+ return p;
+ }
+ return NULL;
+}
+
+/* Find pointer to \r\n. */
+static char *seekNewline(char *s, size_t len) {
+ int pos = 0;
+ int _len = len-1;
+
+ /* Position should be < len-1 because the character at "pos" should be
+ * followed by a \n. Note that strchr cannot be used because it doesn't
+ * allow to search a limited length and the buffer that is being searched
+ * might not have a trailing NULL character. */
+ while (pos < _len) {
+ while(pos < _len && s[pos] != '\r') pos++;
+ if (pos==_len) {
+ /* Not found. */
+ return NULL;
+ } else {
+ if (s[pos+1] == '\n') {
+ /* Found. */
+ return s+pos;
+ } else {
+ /* Continue searching. */
+ pos++;
+ }
+ }
+ }
+ return NULL;
+}
+
+/* Read a long long value starting at *s, under the assumption that it will be
+ * terminated by \r\n. Ambiguously returns -1 for unexpected input. */
+static long long readLongLong(char *s) {
+ long long v = 0;
+ int dec, mult = 1;
+ char c;
+
+ if (*s == '-') {
+ mult = -1;
+ s++;
+ } else if (*s == '+') {
+ mult = 1;
+ s++;
+ }
+
+ while ((c = *(s++)) != '\r') {
+ dec = c - '0';
+ if (dec >= 0 && dec < 10) {
+ v *= 10;
+ v += dec;
+ } else {
+ /* Should not happen... */
+ return -1;
+ }
+ }
+
+ return mult*v;
+}
+
+static char *readLine(redisReader *r, int *_len) {
+ char *p, *s;
+ int len;
+
+ p = r->buf+r->pos;
+ s = seekNewline(p,(r->len-r->pos));
+ if (s != NULL) {
+ len = s-(r->buf+r->pos);
+ r->pos += len+2; /* skip \r\n */
+ if (_len) *_len = len;
+ return p;
+ }
+ return NULL;
+}
+
+static void moveToNextTask(redisReader *r) {
+ redisReadTask *cur, *prv;
+ while (r->ridx >= 0) {
+ /* Return a.s.a.p. when the stack is now empty. */
+ if (r->ridx == 0) {
+ r->ridx--;
+ return;
+ }
+
+ cur = &(r->rstack[r->ridx]);
+ prv = &(r->rstack[r->ridx-1]);
+ assert(prv->type == REDIS_REPLY_ARRAY);
+ if (cur->idx == prv->elements-1) {
+ r->ridx--;
+ } else {
+ /* Reset the type because the next item can be anything */
+ assert(cur->idx < prv->elements);
+ cur->type = -1;
+ cur->elements = -1;
+ cur->idx++;
+ return;
+ }
+ }
+}
+
+static int processLineItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj;
+ char *p;
+ int len;
+
+ if ((p = readLine(r,&len)) != NULL) {
+ if (cur->type == REDIS_REPLY_INTEGER) {
+ if (r->fn && r->fn->createInteger)
+ obj = r->fn->createInteger(cur,readLongLong(p));
+ else
+ obj = (void*)REDIS_REPLY_INTEGER;
+ } else {
+ /* Type will be error or status. */
+ if (r->fn && r->fn->createString)
+ obj = r->fn->createString(cur,p,len);
+ else
+ obj = (void*)(size_t)(cur->type);
+ }
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ /* Set reply if this is the root object. */
+ if (r->ridx == 0) r->reply = obj;
+ moveToNextTask(r);
+ return REDIS_OK;
+ }
+
+ return REDIS_ERR;
+}
+
+static int processBulkItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj = NULL;
+ char *p, *s;
+ long len;
+ unsigned long bytelen;
+ int success = 0;
+
+ p = r->buf+r->pos;
+ s = seekNewline(p,r->len-r->pos);
+ if (s != NULL) {
+ p = r->buf+r->pos;
+ bytelen = s-(r->buf+r->pos)+2; /* include \r\n */
+ len = readLongLong(p);
+
+ if (len < 0) {
+ /* The nil object can always be created. */
+ if (r->fn && r->fn->createNil)
+ obj = r->fn->createNil(cur);
+ else
+ obj = (void*)REDIS_REPLY_NIL;
+ success = 1;
+ } else {
+ /* Only continue when the buffer contains the entire bulk item. */
+ bytelen += len+2; /* include \r\n */
+ if (r->pos+bytelen <= r->len) {
+ if (r->fn && r->fn->createString)
+ obj = r->fn->createString(cur,s+2,len);
+ else
+ obj = (void*)REDIS_REPLY_STRING;
+ success = 1;
+ }
+ }
+
+ /* Proceed when obj was created. */
+ if (success) {
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ r->pos += bytelen;
+
+ /* Set reply if this is the root object. */
+ if (r->ridx == 0) r->reply = obj;
+ moveToNextTask(r);
+ return REDIS_OK;
+ }
+ }
+
+ return REDIS_ERR;
+}
+
+static int processMultiBulkItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj;
+ char *p;
+ long elements;
+ int root = 0;
+
+ /* Set error for nested multi bulks with depth > 7 */
+ if (r->ridx == 8) {
+ __redisReaderSetError(r,REDIS_ERR_PROTOCOL,
+ "No support for nested multi bulk replies with depth > 7");
+ return REDIS_ERR;
+ }
+
+ if ((p = readLine(r,NULL)) != NULL) {
+ elements = readLongLong(p);
+ root = (r->ridx == 0);
+
+ if (elements == -1) {
+ if (r->fn && r->fn->createNil)
+ obj = r->fn->createNil(cur);
+ else
+ obj = (void*)REDIS_REPLY_NIL;
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ moveToNextTask(r);
+ } else {
+ if (r->fn && r->fn->createArray)
+ obj = r->fn->createArray(cur,elements);
+ else
+ obj = (void*)REDIS_REPLY_ARRAY;
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ /* Modify task stack when there are more than 0 elements. */
+ if (elements > 0) {
+ cur->elements = elements;
+ cur->obj = obj;
+ r->ridx++;
+ r->rstack[r->ridx].type = -1;
+ r->rstack[r->ridx].elements = -1;
+ r->rstack[r->ridx].idx = 0;
+ r->rstack[r->ridx].obj = NULL;
+ r->rstack[r->ridx].parent = cur;
+ r->rstack[r->ridx].privdata = r->privdata;
+ } else {
+ moveToNextTask(r);
+ }
+ }
+
+ /* Set reply if this is the root object. */
+ if (root) r->reply = obj;
+ return REDIS_OK;
+ }
+
+ return REDIS_ERR;
+}
+
+static int processItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ char *p;
+
+ /* check if we need to read type */
+ if (cur->type < 0) {
+ if ((p = readBytes(r,1)) != NULL) {
+ switch (p[0]) {
+ case '-':
+ cur->type = REDIS_REPLY_ERROR;
+ break;
+ case '+':
+ cur->type = REDIS_REPLY_STATUS;
+ break;
+ case ':':
+ cur->type = REDIS_REPLY_INTEGER;
+ break;
+ case '$':
+ cur->type = REDIS_REPLY_STRING;
+ break;
+ case '*':
+ cur->type = REDIS_REPLY_ARRAY;
+ break;
+ default:
+ __redisReaderSetErrorProtocolByte(r,*p);
+ return REDIS_ERR;
+ }
+ } else {
+ /* could not consume 1 byte */
+ return REDIS_ERR;
+ }
+ }
+
+ /* process typed item */
+ switch(cur->type) {
+ case REDIS_REPLY_ERROR:
+ case REDIS_REPLY_STATUS:
+ case REDIS_REPLY_INTEGER:
+ return processLineItem(r);
+ case REDIS_REPLY_STRING:
+ return processBulkItem(r);
+ case REDIS_REPLY_ARRAY:
+ return processMultiBulkItem(r);
+ default:
+ assert(NULL);
+ return REDIS_ERR; /* Avoid warning. */
+ }
+}
+
+redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn) {
+ redisReader *r;
+
+ r = calloc(sizeof(redisReader),1);
+ if (r == NULL)
+ return NULL;
+
+ r->err = 0;
+ r->errstr[0] = '\0';
+ r->fn = fn;
+ r->buf = sdsempty();
+ r->maxbuf = REDIS_READER_MAX_BUF;
+ if (r->buf == NULL) {
+ free(r);
+ return NULL;
+ }
+
+ r->ridx = -1;
+ return r;
+}
+
+void redisReaderFree(redisReader *r) {
+ if (r->reply != NULL && r->fn && r->fn->freeObject)
+ r->fn->freeObject(r->reply);
+ if (r->buf != NULL)
+ sdsfree(r->buf);
+ free(r);
+}
+
+int redisReaderFeed(redisReader *r, const char *buf, size_t len) {
+ sds newbuf;
+
+ /* Return early when this reader is in an erroneous state. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* Copy the provided buffer. */
+ if (buf != NULL && len >= 1) {
+ /* Destroy internal buffer when it is empty and is quite large. */
+ if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) {
+ sdsfree(r->buf);
+ r->buf = sdsempty();
+ r->pos = 0;
+
+ /* r->buf should not be NULL since we just free'd a larger one. */
+ assert(r->buf != NULL);
+ }
+
+ newbuf = sdscatlen(r->buf,buf,len);
+ if (newbuf == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ r->buf = newbuf;
+ r->len = sdslen(r->buf);
+ }
+
+ return REDIS_OK;
+}
+
+int redisReaderGetReply(redisReader *r, void **reply) {
+ /* Default target pointer to NULL. */
+ if (reply != NULL)
+ *reply = NULL;
+
+ /* Return early when this reader is in an erroneous state. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* When the buffer is empty, there will never be a reply. */
+ if (r->len == 0)
+ return REDIS_OK;
+
+ /* Set first item to process when the stack is empty. */
+ if (r->ridx == -1) {
+ r->rstack[0].type = -1;
+ r->rstack[0].elements = -1;
+ r->rstack[0].idx = -1;
+ r->rstack[0].obj = NULL;
+ r->rstack[0].parent = NULL;
+ r->rstack[0].privdata = r->privdata;
+ r->ridx = 0;
+ }
+
+ /* Process items in reply. */
+ while (r->ridx >= 0)
+ if (processItem(r) != REDIS_OK)
+ break;
+
+ /* Return ASAP when an error occurred. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* Discard part of the buffer when we've consumed at least 1k, to avoid
+ * doing unnecessary calls to memmove() in sds.c. */
+ if (r->pos >= 1024) {
+ sdsrange(r->buf,r->pos,-1);
+ r->pos = 0;
+ r->len = sdslen(r->buf);
+ }
+
+ /* Emit a reply when there is one. */
+ if (r->ridx == -1) {
+ if (reply != NULL)
+ *reply = r->reply;
+ r->reply = NULL;
+ }
+ return REDIS_OK;
+}
diff --git a/deps/hiredis/read.h b/deps/hiredis/read.h
new file mode 100644
index 000000000..2988aa453
--- /dev/null
+++ b/deps/hiredis/read.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef __HIREDIS_READ_H
+#define __HIREDIS_READ_H
+#include <stdio.h> /* for size_t */
+
+#define REDIS_ERR -1
+#define REDIS_OK 0
+
+/* When an error occurs, the err flag in a context is set to hold the type of
+ * error that occurred. REDIS_ERR_IO means there was an I/O error and you
+ * should use the "errno" variable to find out what is wrong.
+ * For other values, the "errstr" field will hold a description. */
+#define REDIS_ERR_IO 1 /* Error in read or write */
+#define REDIS_ERR_EOF 3 /* End of file */
+#define REDIS_ERR_PROTOCOL 4 /* Protocol error */
+#define REDIS_ERR_OOM 5 /* Out of memory */
+#define REDIS_ERR_OTHER 2 /* Everything else... */
+
+#define REDIS_REPLY_STRING 1
+#define REDIS_REPLY_ARRAY 2
+#define REDIS_REPLY_INTEGER 3
+#define REDIS_REPLY_NIL 4
+#define REDIS_REPLY_STATUS 5
+#define REDIS_REPLY_ERROR 6
+
+#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct redisReadTask {
+ int type;
+ int elements; /* number of elements in multibulk container */
+ int idx; /* index in parent (array) object */
+ void *obj; /* holds user-generated value for a read task */
+ struct redisReadTask *parent; /* parent task */
+ void *privdata; /* user-settable arbitrary field */
+} redisReadTask;
+
+typedef struct redisReplyObjectFunctions {
+ void *(*createString)(const redisReadTask*, char*, size_t);
+ void *(*createArray)(const redisReadTask*, int);
+ void *(*createInteger)(const redisReadTask*, long long);
+ void *(*createNil)(const redisReadTask*);
+ void (*freeObject)(void*);
+} redisReplyObjectFunctions;
+
+typedef struct redisReader {
+ int err; /* Error flags, 0 when there is no error */
+ char errstr[128]; /* String representation of error when applicable */
+
+ char *buf; /* Read buffer */
+ size_t pos; /* Buffer cursor */
+ size_t len; /* Buffer length */
+ size_t maxbuf; /* Max length of unused buffer */
+
+ redisReadTask rstack[9];
+ int ridx; /* Index of current read task */
+ void *reply; /* Temporary reply pointer */
+
+ redisReplyObjectFunctions *fn;
+ void *privdata;
+} redisReader;
+
+/* Public API for the protocol parser. */
+redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn);
+void redisReaderFree(redisReader *r);
+int redisReaderFeed(redisReader *r, const char *buf, size_t len);
+int redisReaderGetReply(redisReader *r, void **reply);
+
+#define redisReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p))
+#define redisReaderGetObject(_r) (((redisReader*)(_r))->reply)
+#define redisReaderGetError(_r) (((redisReader*)(_r))->errstr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/deps/hiredis/sds.c b/deps/hiredis/sds.c
index 47b9823ea..923ffd82f 100644
--- a/deps/hiredis/sds.c
+++ b/deps/hiredis/sds.c
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,7 +36,35 @@
#include <ctype.h>
#include <assert.h>
#include "sds.h"
-#include "zmalloc.h"
+#include "sdsalloc.h"
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 32)
+ return SDS_TYPE_5;
+ if (string_size < 0xff)
+ return SDS_TYPE_8;
+ if (string_size < 0xffff)
+ return SDS_TYPE_16;
+ if (string_size < 0xffffffff)
+ return SDS_TYPE_32;
+ return SDS_TYPE_64;
+}
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
@@ -43,26 +73,65 @@
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
- * mystring = sdsnewlen("abc",3");
+ * mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
- struct sdshdr *sh;
-
- if (init) {
- sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
- } else {
- sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
- }
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+
+ sh = s_malloc(hdrlen+initlen+1);
if (sh == NULL) return NULL;
- sh->len = initlen;
- sh->free = 0;
+ if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ }
if (initlen && init)
- memcpy(sh->buf, init, initlen);
- sh->buf[initlen] = '\0';
- return (char*)sh->buf;
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
}
/* Create an empty (zero length) sds string. Even in this case the string
@@ -71,7 +140,7 @@ sds sdsempty(void) {
return sdsnewlen("",0);
}
-/* Create a new sds string starting from a null termined C string. */
+/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
@@ -85,7 +154,7 @@ sds sdsdup(const sds s) {
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
- zfree(s-sizeof(struct sdshdr));
+ s_free((char*)s-sdsHdrSize(s[-1]));
}
/* Set the sds string length to the length as obtained with strlen(), so
@@ -103,47 +172,68 @@ void sdsfree(sds s) {
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
void sdsupdatelen(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
int reallen = strlen(s);
- sh->free += (sh->len-reallen);
- sh->len = reallen;
+ sdssetlen(s, reallen);
}
-/* Modify an sds string on-place to make it empty (zero length).
+/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
void sdsclear(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- sh->free += sh->len;
- sh->len = 0;
- sh->buf[0] = '\0';
+ sdssetlen(s, 0);
+ s[0] = '\0';
}
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
- *
+ *
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
- struct sdshdr *sh, *newsh;
- size_t free = sdsavail(s);
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
size_t len, newlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
- if (free >= addlen) return s;
len = sdslen(s);
- sh = (void*) (s-(sizeof(struct sdshdr)));
+ sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
- newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
- if (newsh == NULL) return NULL;
- newsh->free = newlen - len;
- return newsh->buf;
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc(hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, newlen);
+ return s;
}
/* Reallocate the sds string so that it has no free space at the end. The
@@ -153,12 +243,29 @@ sds sdsMakeRoomFor(sds s, size_t addlen) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
- struct sdshdr *sh;
-
- sh = (void*) (s-(sizeof(struct sdshdr)));
- sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
- sh->free = 0;
- return sh->buf;
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+ size_t len = sdslen(s);
+ sh = (char*)s-sdsHdrSize(oldtype);
+
+ type = sdsReqType(len);
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ newsh = s_malloc(hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, len);
+ return s;
}
/* Return the total size of the allocation of the specifed sds string,
@@ -169,9 +276,14 @@ sds sdsRemoveFreeSpace(sds s) {
* 4) The implicit null term.
*/
size_t sdsAllocSize(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
- return sizeof(*sh)+sh->len+sh->free+1;
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
}
/* Increment the sds length and decrements the left free space at the
@@ -198,13 +310,44 @@ size_t sdsAllocSize(sds s) {
* sdsIncrLen(s, nread);
*/
void sdsIncrLen(sds s, int incr) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
-
- assert(sh->free >= incr);
- sh->len += incr;
- sh->free -= incr;
- assert(sh->free >= 0);
- s[sh->len] = '\0';
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
}
/* Grow the sds to have the specified length. Bytes that were not part of
@@ -213,19 +356,15 @@ void sdsIncrLen(sds s, int incr) {
* if the specified length is smaller than the current length, no operation
* is performed. */
sds sdsgrowzero(sds s, size_t len) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- size_t totlen, curlen = sh->len;
+ size_t curlen = sdslen(s);
if (len <= curlen) return s;
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;
/* Make sure added region doesn't contain garbage */
- sh = (void*)(s-(sizeof(struct sdshdr)));
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
- totlen = sh->len+sh->free;
- sh->len = len;
- sh->free = totlen-sh->len;
+ sdssetlen(s, len);
return s;
}
@@ -235,15 +374,12 @@ sds sdsgrowzero(sds s, size_t len) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
- struct sdshdr *sh;
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
- sh->len = curlen+len;
- sh->free = sh->free-len;
+ sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
@@ -267,19 +403,13 @@ sds sdscatsds(sds s, const sds t) {
/* Destructively modify the sds string 's' to hold the specified binary
* safe string pointed by 't' of length 'len' bytes. */
sds sdscpylen(sds s, const char *t, size_t len) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t totlen = sh->free+sh->len;
-
- if (totlen < len) {
- s = sdsMakeRoomFor(s,len-sh->len);
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
- totlen = sh->free+sh->len;
}
memcpy(s, t, len);
s[len] = '\0';
- sh->len = len;
- sh->free = totlen-len;
+ sdssetlen(s, len);
return s;
}
@@ -293,7 +423,7 @@ sds sdscpy(sds s, const char *t) {
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
- * The function returns the lenght of the null-terminated string
+ * The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) {
@@ -367,7 +497,7 @@ sds sdsfromlonglong(long long value) {
return sdsnewlen(buf,len);
}
-/* Like sdscatpritf() but gets va_list instead of being variadic. */
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
@@ -376,7 +506,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
@@ -388,10 +518,11 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
+ va_end(cpy);
if (buf[buflen-2] != '\0') {
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
buflen *= 2;
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
@@ -400,7 +531,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
return t;
}
@@ -412,7 +543,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
*
* Example:
*
- * s = sdsempty("Sum is: ");
+ * s = sdsnew("Sum is: ");
* s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
*
* Often you need to create a string from scratch with the printf-alike
@@ -446,25 +577,21 @@ sds sdscatprintf(sds s, const char *fmt, ...) {
* %% - Verbatim "%" character.
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t initlen = sdslen(s);
const char *f = fmt;
int i;
va_list ap;
va_start(ap,fmt);
- f = fmt; /* Next format specifier byte to process. */
- i = initlen; /* Position of the next byte to write to dest str. */
+ i = sdslen(s); /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
- int l;
+ size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
- if (sh->free == 0) {
+ if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
@@ -476,13 +603,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
break;
case 'i':
@@ -494,13 +619,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
@@ -513,27 +636,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
f++;
@@ -554,25 +673,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
- * s = sdstrim(s,"A. :");
+ * s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/
sds sdstrim(sds s, const char *cset) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
- while(ep > start && strchr(cset, *ep)) ep--;
+ while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
- if (sh->buf != sp) memmove(sh->buf, sp, len);
- sh->buf[len] = '\0';
- sh->free = sh->free+(sh->len-len);
- sh->len = len;
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
return s;
}
@@ -593,7 +710,6 @@ sds sdstrim(sds s, const char *cset) {
* sdsrange(s,1,-1); => "ello World"
*/
void sdsrange(sds s, int start, int end) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
if (len == 0) return;
@@ -616,10 +732,9 @@ void sdsrange(sds s, int start, int end) {
} else {
start = 0;
}
- if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
- sh->buf[newlen] = 0;
- sh->free = sh->free+(sh->len-newlen);
- sh->len = newlen;
+ if (start && newlen) memmove(s, s+start, newlen);
+ s[newlen] = 0;
+ sdssetlen(s,newlen);
}
/* Apply tolower() to every character of the sds string 's'. */
@@ -640,8 +755,8 @@ void sdstoupper(sds s) {
*
* Return value:
*
- * 1 if s1 > s2.
- * -1 if s1 < s2.
+ * positive if s1 > s2.
+ * negative if s1 < s2.
* 0 if s1 and s2 are exactly the same binary string.
*
* If two strings share exactly the same prefix, but one of the two has
@@ -681,7 +796,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
if (seplen < 1 || len < 0) return NULL;
- tokens = zmalloc(sizeof(sds)*slots);
+ tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;
if (len == 0) {
@@ -694,7 +809,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
sds *newtokens;
slots *= 2;
- newtokens = zrealloc(tokens,sizeof(sds)*slots);
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
@@ -718,7 +833,7 @@ cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
- zfree(tokens);
+ s_free(tokens);
*count = 0;
return NULL;
}
@@ -729,7 +844,7 @@ void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
- zfree(tokens);
+ s_free(tokens);
}
/* Append to the sds string "s" an escaped string representation where
@@ -903,13 +1018,13 @@ sds *sdssplitargs(const char *line, int *argc) {
if (*p) p++;
}
/* add the token to the vector */
- vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
- if (vector == NULL) vector = zmalloc(sizeof(void*));
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
return vector;
}
}
@@ -917,7 +1032,7 @@ sds *sdssplitargs(const char *line, int *argc) {
err:
while((*argc)--)
sdsfree(vector[*argc]);
- zfree(vector);
+ s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
@@ -959,14 +1074,35 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#ifdef SDS_TEST_MAIN
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
-int main(void) {
+#define UNUSED(x) (void)(x)
+int sdsTest(void) {
{
- struct sdshdr *sh;
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
@@ -1002,6 +1138,7 @@ int main(void) {
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
"9223372036854775807--",60) == 0)
+ printf("[%s]\n",x);
sdsfree(x);
x = sdsnew("--");
@@ -1011,6 +1148,18 @@ int main(void) {
memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x')
+
+ sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
@@ -1077,24 +1226,47 @@ int main(void) {
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
- int oldfree;
+ unsigned int oldfree;
+ char *p;
+ int step = 10, j, i;
sdsfree(x);
+ sdsfree(y);
x = sdsnew("0");
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
- x = sdsMakeRoomFor(x,1);
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0);
- oldfree = sh->free;
- x[1] = '1';
- sdsIncrLen(x,1);
- test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
- test_cond("sdsIncrLen() -- len", sh->len == 2);
- test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ int oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
}
}
test_report()
return 0;
}
#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/deps/hiredis/sds.h b/deps/hiredis/sds.h
index 9a604021c..13be75a9f 100644
--- a/deps/hiredis/sds.h
+++ b/deps/hiredis/sds.h
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,32 +37,188 @@
#include <sys/types.h>
#include <stdarg.h>
+#include <stdint.h>
typedef char *sds;
-struct sdshdr {
- int len;
- int free;
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
static inline size_t sdslen(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->len;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
}
static inline size_t sdsavail(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->free;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
}
sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
-size_t sdslen(const sds s);
sds sdsdup(const sds s);
void sdsfree(sds s);
-size_t sdsavail(const sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
@@ -91,11 +249,25 @@ sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
#endif
diff --git a/deps/hiredis/sdsalloc.h b/deps/hiredis/sdsalloc.h
new file mode 100644
index 000000000..f43023c48
--- /dev/null
+++ b/deps/hiredis/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#define s_malloc malloc
+#define s_realloc realloc
+#define s_free free
diff --git a/deps/hiredis/test.c b/deps/hiredis/test.c
index 713cc06c5..a23d60676 100644
--- a/deps/hiredis/test.c
+++ b/deps/hiredis/test.c
@@ -11,6 +11,7 @@
#include <limits.h>
#include "hiredis.h"
+#include "net.h"
enum connection_type {
CONN_TCP,
@@ -29,7 +30,7 @@ struct config {
struct {
const char *path;
- } unix;
+ } unix_sock;
};
/* The following lines make up our testing "framework" :) */
@@ -43,6 +44,13 @@ static long long usec(void) {
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
+/* The assert() calls below have side effects, so we need assert()
+ * even if we are compiling without asserts (-DNDEBUG). */
+#ifdef NDEBUG
+#undef assert
+#define assert(e) (void)(e)
+#endif
+
static redisContext *select_database(redisContext *c) {
redisReply *reply;
@@ -89,10 +97,10 @@ static redisContext *connect(struct config config) {
if (config.type == CONN_TCP) {
c = redisConnect(config.tcp.host, config.tcp.port);
} else if (config.type == CONN_UNIX) {
- c = redisConnectUnix(config.unix.path);
+ c = redisConnectUnix(config.unix_sock.path);
} else if (config.type == CONN_FD) {
/* Create a dummy connection just to get an fd to inherit */
- redisContext *dummy_ctx = redisConnectUnix(config.unix.path);
+ redisContext *dummy_ctx = redisConnectUnix(config.unix_sock.path);
if (dummy_ctx) {
int fd = disconnect(dummy_ctx, 1);
printf("Connecting to inherited fd %d\n", fd);
@@ -107,6 +115,7 @@ static redisContext *connect(struct config config) {
exit(1);
} else if (c->err) {
printf("Connection error: %s\n", c->errstr);
+ redisFree(c);
exit(1);
}
@@ -215,6 +224,22 @@ static void test_format_commands(void) {
test_cond(strncmp(cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 &&
len == 4+4+(3+2)+4+(7+2)+4+(3+2));
free(cmd);
+
+ sds sds_cmd;
+
+ sds_cmd = sdsempty();
+ test("Format command into sds by passing argc/argv without lengths: ");
+ len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,NULL);
+ test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\nbar\r\n",len) == 0 &&
+ len == 4+4+(3+2)+4+(3+2)+4+(3+2));
+ sdsfree(sds_cmd);
+
+ sds_cmd = sdsempty();
+ test("Format command into sds by passing argc/argv with lengths: ");
+ len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,lens);
+ test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 &&
+ len == 4+4+(3+2)+4+(7+2)+4+(3+2));
+ sdsfree(sds_cmd);
}
static void test_append_formatted_commands(struct config config) {
@@ -318,16 +343,31 @@ static void test_reply_reader(void) {
redisReaderFree(reader);
}
+static void test_free_null(void) {
+ void *redisCtx = NULL;
+ void *reply = NULL;
+
+ test("Don't fail when redisFree is passed a NULL value: ");
+ redisFree(redisCtx);
+ test_cond(redisCtx == NULL);
+
+ test("Don't fail when freeReplyObject is passed a NULL value: ");
+ freeReplyObject(reply);
+ test_cond(reply == NULL);
+}
+
static void test_blocking_connection_errors(void) {
redisContext *c;
test("Returns error when host cannot be resolved: ");
- c = redisConnect((char*)"idontexist.local", 6379);
+ c = redisConnect((char*)"idontexist.test", 6379);
test_cond(c->err == REDIS_ERR_OTHER &&
(strcmp(c->errstr,"Name or service not known") == 0 ||
- strcmp(c->errstr,"Can't resolve: idontexist.local") == 0 ||
+ strcmp(c->errstr,"Can't resolve: idontexist.test") == 0 ||
strcmp(c->errstr,"nodename nor servname provided, or not known") == 0 ||
strcmp(c->errstr,"No address associated with hostname") == 0 ||
+ strcmp(c->errstr,"Temporary failure in name resolution") == 0 ||
+ strcmp(c->errstr,"hostname nor servname provided, or not known") == 0 ||
strcmp(c->errstr,"no address associated with name") == 0));
redisFree(c);
@@ -337,7 +377,7 @@ static void test_blocking_connection_errors(void) {
strcmp(c->errstr,"Connection refused") == 0);
redisFree(c);
- test("Returns error when the unix socket path doesn't accept connections: ");
+ test("Returns error when the unix_sock socket path doesn't accept connections: ");
c = redisConnectUnix((char*)"/tmp/idontexist.sock");
test_cond(c->err == REDIS_ERR_IO); /* Don't care about the message... */
redisFree(c);
@@ -421,6 +461,52 @@ static void test_blocking_connection(struct config config) {
disconnect(c, 0);
}
+static void test_blocking_connection_timeouts(struct config config) {
+ redisContext *c;
+ redisReply *reply;
+ ssize_t s;
+ const char *cmd = "DEBUG SLEEP 3\r\n";
+ struct timeval tv;
+
+ c = connect(config);
+ test("Successfully completes a command when the timeout is not exceeded: ");
+ reply = redisCommand(c,"SET foo fast");
+ freeReplyObject(reply);
+ tv.tv_sec = 0;
+ tv.tv_usec = 10000;
+ redisSetTimeout(c, tv);
+ reply = redisCommand(c, "GET foo");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STRING && memcmp(reply->str, "fast", 4) == 0);
+ freeReplyObject(reply);
+ disconnect(c, 0);
+
+ c = connect(config);
+ test("Does not return a reply when the command times out: ");
+ s = write(c->fd, cmd, strlen(cmd));
+ tv.tv_sec = 0;
+ tv.tv_usec = 10000;
+ redisSetTimeout(c, tv);
+ reply = redisCommand(c, "GET foo");
+ test_cond(s > 0 && reply == NULL && c->err == REDIS_ERR_IO && strcmp(c->errstr, "Resource temporarily unavailable") == 0);
+ freeReplyObject(reply);
+
+ test("Reconnect properly reconnects after a timeout: ");
+ redisReconnect(c);
+ reply = redisCommand(c, "PING");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0);
+ freeReplyObject(reply);
+
+ test("Reconnect properly uses owned parameters: ");
+ config.tcp.host = "foo";
+ config.unix_sock.path = "foo";
+ redisReconnect(c);
+ reply = redisCommand(c, "PING");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0);
+ freeReplyObject(reply);
+
+ disconnect(c, 0);
+}
+
static void test_blocking_io_errors(struct config config) {
redisContext *c;
redisReply *reply;
@@ -444,7 +530,7 @@ static void test_blocking_io_errors(struct config config) {
test("Returns I/O error when the connection is lost: ");
reply = redisCommand(c,"QUIT");
- if (major >= 2 && minor > 0) {
+ if (major > 2 || (major == 2 && minor > 0)) {
/* > 2.0 returns OK on QUIT and read() should be issued once more
* to know the descriptor is at EOF. */
test_cond(strcasecmp(reply->str,"OK") == 0 &&
@@ -482,7 +568,8 @@ static void test_invalid_timeout_errors(struct config config) {
c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout);
- test_cond(c->err == REDIS_ERR_IO);
+ test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0);
+ redisFree(c);
test("Set error when an invalid timeout sec value is given to redisConnectWithTimeout: ");
@@ -491,8 +578,7 @@ static void test_invalid_timeout_errors(struct config config) {
c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout);
- test_cond(c->err == REDIS_ERR_IO);
-
+ test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0);
redisFree(c);
}
@@ -666,7 +752,7 @@ int main(int argc, char **argv) {
.host = "127.0.0.1",
.port = 6379
},
- .unix = {
+ .unix_sock = {
.path = "/tmp/redis.sock"
}
};
@@ -687,7 +773,7 @@ int main(int argc, char **argv) {
cfg.tcp.port = atoi(argv[0]);
} else if (argc >= 2 && !strcmp(argv[0],"-s")) {
argv++; argc--;
- cfg.unix.path = argv[0];
+ cfg.unix_sock.path = argv[0];
} else if (argc >= 1 && !strcmp(argv[0],"--skip-throughput")) {
throughput = 0;
} else if (argc >= 1 && !strcmp(argv[0],"--skip-inherit-fd")) {
@@ -702,27 +788,31 @@ int main(int argc, char **argv) {
test_format_commands();
test_reply_reader();
test_blocking_connection_errors();
+ test_free_null();
printf("\nTesting against TCP connection (%s:%d):\n", cfg.tcp.host, cfg.tcp.port);
cfg.type = CONN_TCP;
test_blocking_connection(cfg);
+ test_blocking_connection_timeouts(cfg);
test_blocking_io_errors(cfg);
test_invalid_timeout_errors(cfg);
test_append_formatted_commands(cfg);
if (throughput) test_throughput(cfg);
- printf("\nTesting against Unix socket connection (%s):\n", cfg.unix.path);
+ printf("\nTesting against Unix socket connection (%s):\n", cfg.unix_sock.path);
cfg.type = CONN_UNIX;
test_blocking_connection(cfg);
+ test_blocking_connection_timeouts(cfg);
test_blocking_io_errors(cfg);
if (throughput) test_throughput(cfg);
if (test_inherit_fd) {
- printf("\nTesting against inherited fd (%s):\n", cfg.unix.path);
+ printf("\nTesting against inherited fd (%s):\n", cfg.unix_sock.path);
cfg.type = CONN_FD;
test_blocking_connection(cfg);
}
+
if (fails) {
printf("*** %d TESTS FAILED ***\n", fails);
return 1;
diff --git a/deps/hiredis/win32.h b/deps/hiredis/win32.h
new file mode 100644
index 000000000..1a27c18f2
--- /dev/null
+++ b/deps/hiredis/win32.h
@@ -0,0 +1,42 @@
+#ifndef _WIN32_HELPER_INCLUDE
+#define _WIN32_HELPER_INCLUDE
+#ifdef _MSC_VER
+
+#ifndef inline
+#define inline __inline
+#endif
+
+#ifndef va_copy
+#define va_copy(d,s) ((d) = (s))
+#endif
+
+#ifndef snprintf
+#define snprintf c99_snprintf
+
+__inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap)
+{
+ int count = -1;
+
+ if (size != 0)
+ count = _vsnprintf_s(str, size, _TRUNCATE, format, ap);
+ if (count == -1)
+ count = _vscprintf(format, ap);
+
+ return count;
+}
+
+__inline int c99_snprintf(char* str, size_t size, const char* format, ...)
+{
+ int count;
+ va_list ap;
+
+ va_start(ap, format);
+ count = c99_vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ return count;
+}
+#endif
+
+#endif
+#endif \ No newline at end of file
diff --git a/deps/hiredis/zmalloc.h b/deps/hiredis/zmalloc.h
deleted file mode 100644
index 99b87ace9..000000000
--- a/deps/hiredis/zmalloc.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Drop in replacement for zmalloc.h in order to just use libc malloc without
- * any wrappering. */
-
-#ifndef ZMALLOC_H
-#define ZMALLOC_H
-
-#define zmalloc malloc
-#define zrealloc realloc
-#define zcalloc(x) calloc(x,1)
-#define zfree free
-#define zstrdup strdup
-
-#endif
diff --git a/deps/jemalloc/.autom4te.cfg b/deps/jemalloc/.autom4te.cfg
new file mode 100644
index 000000000..fe2424db5
--- /dev/null
+++ b/deps/jemalloc/.autom4te.cfg
@@ -0,0 +1,3 @@
+begin-language: "Autoconf-without-aclocal-m4"
+args: --no-cache
+end-language: "Autoconf-without-aclocal-m4"
diff --git a/deps/jemalloc/.gitattributes b/deps/jemalloc/.gitattributes
new file mode 100644
index 000000000..6313b56c5
--- /dev/null
+++ b/deps/jemalloc/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore
index 4c408ec2c..d0e393619 100644
--- a/deps/jemalloc/.gitignore
+++ b/deps/jemalloc/.gitignore
@@ -1,8 +1,8 @@
/*.gcov.*
-/autom4te.cache/
-
+/bin/jemalloc-config
/bin/jemalloc.sh
+/bin/jeprof
/config.stamp
/config.log
@@ -15,6 +15,8 @@
/doc/jemalloc.html
/doc/jemalloc.3
+/jemalloc.pc
+
/lib/
/Makefile
@@ -35,6 +37,7 @@
/include/jemalloc/jemalloc_protos.h
/include/jemalloc/jemalloc_protos_jet.h
/include/jemalloc/jemalloc_rename.h
+/include/jemalloc/jemalloc_typedefs.h
/src/*.[od]
/src/*.gcda
diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING
index bdda0feb9..611968cda 100644
--- a/deps/jemalloc/COPYING
+++ b/deps/jemalloc/COPYING
@@ -1,10 +1,10 @@
Unless otherwise specified, files in the jemalloc source distribution are
subject to the following license:
--------------------------------------------------------------------------------
-Copyright (C) 2002-2014 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
All rights reserved.
Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
-Copyright (C) 2009-2014 Facebook, Inc. All rights reserved.
+Copyright (C) 2009-2015 Facebook, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog
index d56ee999e..e3b0a5190 100644
--- a/deps/jemalloc/ChangeLog
+++ b/deps/jemalloc/ChangeLog
@@ -1,10 +1,250 @@
Following are change highlights associated with official releases. Important
-bug fixes are all mentioned, but internal enhancements are omitted here for
-brevity (even though they are more fun to write about). Much more detail can be
-found in the git revision history:
+bug fixes are all mentioned, but some internal enhancements are omitted here for
+brevity. Much more detail can be found in the git revision history:
https://github.com/jemalloc/jemalloc
+* 4.0.3 (September 24, 2015)
+
+ This bugfix release continues the trend of xallocx() and heap profiling fixes.
+
+ Bug fixes:
+ - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large
+ allocations when --enable-cache-oblivious configure option is enabled.
+ - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations
+ when resizing from/to a size class that is not a multiple of the chunk size.
+ - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap
+ profile dumping started.
+ - Work around a potentially bad thread-specific data initialization
+ interaction with NPTL (glibc's pthreads implementation).
+
+* 4.0.2 (September 21, 2015)
+
+ This bugfix release addresses a few bugs specific to heap profiling.
+
+ Bug fixes:
+ - Fix ixallocx_prof_sample() to never modify nor create sampled small
+ allocations. xallocx() is in general incapable of moving small allocations,
+ so this fix removes buggy code without loss of generality.
+ - Fix irallocx_prof_sample() to always allocate large regions, even when
+ alignment is non-zero.
+ - Fix prof_alloc_rollback() to read tdata from thread-specific data rather
+ than dereferencing a potentially invalid tctx.
+
+* 4.0.1 (September 15, 2015)
+
+ This is a bugfix release that is somewhat high risk due to the amount of
+ refactoring required to address deep xallocx() problems. As a side effect of
+ these fixes, xallocx() now tries harder to partially fulfill requests for
+ optional extra space. Note that a couple of minor heap profiling
+ optimizations are included, but these are better thought of as performance
+ fixes that were integral to disovering most of the other bugs.
+
+ Optimizations:
+ - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the
+ fast path when heap profiling is enabled. Additionally, split a special
+ case out into arena_prof_tctx_reset(), which also avoids chunk metadata
+ reads.
+ - Optimize irallocx_prof() to optimistically update the sampler state. The
+ prior implementation appears to have been a holdover from when
+ rallocx()/xallocx() functionality was combined as rallocm().
+
+ Bug fixes:
+ - Fix TLS configuration such that it is enabled by default for platforms on
+ which it works correctly.
+ - Fix arenas_cache_cleanup() and arena_get_hard() to handle
+ allocation/deallocation within the application's thread-specific data
+ cleanup functions even after arenas_cache is torn down.
+ - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS.
+ - Fix chunk purge hook calls for in-place huge shrinking reallocation to
+ specify the old chunk size rather than the new chunk size. This bug caused
+ no correctness issues for the default chunk purge function, but was
+ visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
+ - Fix heap profiling bugs:
+ + Fix heap profiling to distinguish among otherwise identical sample sites
+ with interposed resets (triggered via the "prof.reset" mallctl). This bug
+ could cause data structure corruption that would most likely result in a
+ segfault.
+ + Fix irealloc_prof() to prof_alloc_rollback() on OOM.
+ + Make one call to prof_active_get_unlocked() per allocation event, and use
+ the result throughout the relevant functions that handle an allocation
+ event. Also add a missing check in prof_realloc(). These fixes protect
+ allocation events against concurrent prof_active changes.
+ + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample()
+ in the correct order.
+ + Fix prof_realloc() to call prof_free_sampled_object() after calling
+ prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were
+ the same, the tctx could have been prematurely destroyed.
+ - Fix portability bugs:
+ + Don't bitshift by negative amounts when encoding/decoding run sizes in
+ chunk header maps. This affected systems with page sizes greater than 8
+ KiB.
+ + Rename index_t to szind_t to avoid an existing type on Solaris.
+ + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
+ match glibc and avoid compilation errors when including both
+ jemalloc/jemalloc.h and malloc.h in C++ code.
+ + Don't assume that /bin/sh is appropriate when running size_classes.sh
+ during configuration.
+ + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM.
+ + Link tests to librt if it contains clock_gettime(2).
+
+* 4.0.0 (August 17, 2015)
+
+ This version contains many speed and space optimizations, both minor and
+ major. The major themes are generalization, unification, and simplification.
+ Although many of these optimizations cause no visible behavior change, their
+ cumulative effect is substantial.
+
+ New features:
+ - Normalize size class spacing to be consistent across the complete size
+ range. By default there are four size classes per size doubling, but this
+ is now configurable via the --with-lg-size-class-group option. Also add the
+ --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and
+ --with-lg-tiny-min options, which can be used to tweak page and size class
+ settings. Impacts:
+ + Worst case performance for incrementally growing/shrinking reallocation
+ is improved because there are far fewer size classes, and therefore
+ copying happens less often.
+ + Internal fragmentation is limited to 20% for all but the smallest size
+ classes (those less than four times the quantum). (1B + 4 KiB)
+ and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation.
+ + Chunk fragmentation tends to be lower because there are fewer distinct run
+ sizes to pack.
+ - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and
+ "tcache.destroy" mallctls control tcache lifetime and flushing, and the
+ MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API
+ control which tcache is used for each operation.
+ - Implement per thread heap profiling, as well as the ability to
+ enable/disable heap profiling on a per thread basis. Add the "prof.reset",
+ "prof.lg_sample", "thread.prof.name", "thread.prof.active",
+ "opt.prof_thread_active_init", "prof.thread_active_init", and
+ "thread.prof.active" mallctls.
+ - Add support for per arena application-specified chunk allocators, configured
+ via the "arena.<i>.chunk_hooks" mallctl.
+ - Refactor huge allocation to be managed by arenas, so that arenas now
+ function as general purpose independent allocators. This is important in
+ the context of user-specified chunk allocators, aside from the scalability
+ benefits. Related new statistics:
+ + The "stats.arenas.<i>.huge.allocated", "stats.arenas.<i>.huge.nmalloc",
+ "stats.arenas.<i>.huge.ndalloc", and "stats.arenas.<i>.huge.nrequests"
+ mallctls provide high level per arena huge allocation statistics.
+ + The "arenas.nhchunks", "arenas.hchunk.<i>.size",
+ "stats.arenas.<i>.hchunks.<j>.nmalloc",
+ "stats.arenas.<i>.hchunks.<j>.ndalloc",
+ "stats.arenas.<i>.hchunks.<j>.nrequests", and
+ "stats.arenas.<i>.hchunks.<j>.curhchunks" mallctls provide per size class
+ statistics.
+ - Add the 'util' column to malloc_stats_print() output, which reports the
+ proportion of available regions that are currently in use for each small
+ size class.
+ - Add "alloc" and "free" modes for for junk filling (see the "opt.junk"
+ mallctl), so that it is possible to separately enable junk filling for
+ allocation versus deallocation.
+ - Add the jemalloc-config script, which provides information about how
+ jemalloc was configured, and how to integrate it into application builds.
+ - Add metadata statistics, which are accessible via the "stats.metadata",
+ "stats.arenas.<i>.metadata.mapped", and
+ "stats.arenas.<i>.metadata.allocated" mallctls.
+ - Add the "stats.resident" mallctl, which reports the upper limit of
+ physically resident memory mapped by the allocator.
+ - Add per arena control over unused dirty page purging, via the
+ "arenas.lg_dirty_mult", "arena.<i>.lg_dirty_mult", and
+ "stats.arenas.<i>.lg_dirty_mult" mallctls.
+ - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump
+ feature on/off during program execution.
+ - Add sdallocx(), which implements sized deallocation. The primary
+ optimization over dallocx() is the removal of a metadata read, which often
+ suffers an L1 cache miss.
+ - Add missing header includes in jemalloc/jemalloc.h, so that applications
+ only have to #include <jemalloc/jemalloc.h>.
+ - Add support for additional platforms:
+ + Bitrig
+ + Cygwin
+ + DragonFlyBSD
+ + iOS
+ + OpenBSD
+ + OpenRISC/or1k
+
+ Optimizations:
+ - Maintain dirty runs in per arena LRUs rather than in per arena trees of
+ dirty-run-containing chunks. In practice this change significantly reduces
+ dirty page purging volume.
+ - Integrate whole chunks into the unused dirty page purging machinery. This
+ reduces the cost of repeated huge allocation/deallocation, because it
+ effectively introduces a cache of chunks.
+ - Split the arena chunk map into two separate arrays, in order to increase
+ cache locality for the frequently accessed bits.
+ - Move small run metadata out of runs, into arena chunk headers. This reduces
+ run fragmentation, smaller runs reduce external fragmentation for small size
+ classes, and packed (less uniformly aligned) metadata layout improves CPU
+ cache set distribution.
+ - Randomly distribute large allocation base pointer alignment relative to page
+ boundaries in order to more uniformly utilize CPU cache sets. This can be
+ disabled via the --disable-cache-oblivious configure option, and queried via
+ the "config.cache_oblivious" mallctl.
+ - Micro-optimize the fast paths for the public API functions.
+ - Refactor thread-specific data to reside in a single structure. This assures
+ that only a single TLS read is necessary per call into the public API.
+ - Implement in-place huge allocation growing and shrinking.
+ - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make
+ additional optimizations that reduce maximum lookup depth to one or two
+ levels. This resolves what was a concurrency bottleneck for per arena huge
+ allocation, because a global data structure is critical for determining
+ which arenas own which huge allocations.
+
+ Incompatible changes:
+ - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious
+ warnings by default.
+ - Assure that the constness of malloc_usable_size()'s return type matches that
+ of the system implementation.
+ - Change the heap profile dump format to support per thread heap profiling,
+ rename pprof to jeprof, and enhance it with the --thread=<n> option. As a
+ result, the bundled jeprof must now be used rather than the upstream
+ (gperftools) pprof.
+ - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can
+ internally deadlock on some platforms.
+ - Change the "arenas.nlruns" mallctl type from size_t to unsigned.
+ - Replace the "stats.arenas.<i>.bins.<j>.allocated" mallctl with
+ "stats.arenas.<i>.bins.<j>.curregs".
+ - Ignore MALLOC_CONF in set{uid,gid,cap} binaries.
+ - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the
+ MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage.
+
+ Removed features:
+ - Remove the *allocm() API, which is superseded by the *allocx() API.
+ - Remove the --enable-dss options, and make dss non-optional on all platforms
+ which support sbrk(2).
+ - Remove the "arenas.purge" mallctl, which was obsoleted by the
+ "arena.<i>.purge" mallctl in 3.1.0.
+ - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically
+ detects whether it is running inside Valgrind.
+ - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and
+ "stats.huge.ndalloc" mallctls.
+ - Remove the --enable-mremap option.
+ - Remove the "stats.chunks.current", "stats.chunks.total", and
+ "stats.chunks.high" mallctls.
+
+ Bug fixes:
+ - Fix the cactive statistic to decrease (rather than increase) when active
+ memory decreases. This regression was first released in 3.5.0.
+ - Fix OOM handling in memalign() and valloc(). A variant of this bug existed
+ in all releases since 2.0.0, which introduced these functions.
+ - Fix an OOM-related regression in arena_tcache_fill_small(), which could
+ cause cache corruption on OOM. This regression was present in all releases
+ from 2.2.0 through 3.6.0.
+ - Fix size class overflow handling for malloc(), posix_memalign(), memalign(),
+ calloc(), and realloc() when profiling is enabled.
+ - Fix the "arena.<i>.dss" mallctl to return an error if "primary" or
+ "secondary" precedence is specified, but sbrk(2) is not supported.
+ - Fix fallback lg_floor() implementations to handle extremely large inputs.
+ - Ensure the default purgeable zone is after the default zone on OS X.
+ - Fix latent bugs in atomic_*().
+ - Fix the "arena.<i>.dss" mallctl to handle read-only calls.
+ - Fix tls_model configuration to enable the initial-exec model when possible.
+ - Mark malloc_conf as a weak symbol so that the application can override it.
+ - Correctly detect glibc's adaptive pthread mutexes.
+ - Fix the --without-export configure option.
+
* 3.6.0 (March 31, 2014)
This version contains a critical bug fix for a regression present in 3.5.0 and
@@ -21,7 +261,7 @@ found in the git revision history:
backtracing to be reliable.
- Use dss allocation precedence for huge allocations as well as small/large
allocations.
- - Fix test assertion failure message formatting. This bug did not manifect on
+ - Fix test assertion failure message formatting. This bug did not manifest on
x86_64 systems because of implementation subtleties in va_list.
- Fix inconsequential test failures for hash and SFMT code.
@@ -516,7 +756,7 @@ found in the git revision history:
- Make it possible for the application to manually flush a thread's cache, via
the "tcache.flush" mallctl.
- Base maximum dirty page count on proportion of active memory.
- - Compute various addtional run-time statistics, including per size class
+ - Compute various additional run-time statistics, including per size class
statistics for large objects.
- Expose malloc_stats_print(), which can be called repeatedly by the
application.
diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL
index 841704d2a..8d3968745 100644
--- a/deps/jemalloc/INSTALL
+++ b/deps/jemalloc/INSTALL
@@ -1,10 +1,23 @@
-Building and installing jemalloc can be as simple as typing the following while
-in the root directory of the source tree:
+Building and installing a packaged release of jemalloc can be as simple as
+typing the following while in the root directory of the source tree:
./configure
make
make install
+If building from unpackaged developer sources, the simplest command sequence
+that might work is:
+
+ ./autogen.sh
+ make dist
+ make
+ make install
+
+Note that documentation is not built by the default target because doing so
+would create a dependency on xsltproc in packaged releases, hence the
+requirement to either run 'make dist' or avoid installing docs via the various
+install_* targets documented below.
+
=== Advanced configuration =====================================================
The 'configure' script supports numerous options that allow control of which
@@ -56,7 +69,7 @@ any of the following arguments (not a definitive list) to 'configure':
replace the "malloc", "calloc", etc. symbols.
--without-export
- Don't export public APIs. This can be useful when building jemalloc as a
+ Don't export public APIs. This can be useful when building jemalloc as a
static library, or to avoid exporting public APIs when using the zone
allocator on OSX.
@@ -71,10 +84,10 @@ any of the following arguments (not a definitive list) to 'configure':
versions of jemalloc can coexist in the same installation directory. For
example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
---enable-cc-silence
- Enable code that silences non-useful compiler warnings. This is helpful
- when trying to tell serious warnings from those due to compiler
- limitations, but it potentially incurs a performance penalty.
+--disable-cc-silence
+ Disable code that silences non-useful compiler warnings. This is mainly
+ useful during development when auditing the set of warnings that are being
+ silenced.
--enable-debug
Enable assertions and validation code. This incurs a substantial
@@ -94,15 +107,15 @@ any of the following arguments (not a definitive list) to 'configure':
there are interactions between the various coverage targets, so it is
usually advisable to run 'make clean' between repeated code coverage runs.
---enable-ivsalloc
- Enable validation code, which verifies that pointers reside within
- jemalloc-owned chunks before dereferencing them. This incurs a substantial
- performance hit.
-
--disable-stats
Disable statistics gathering functionality. See the "opt.stats_print"
option documentation for usage details.
+--enable-ivsalloc
+ Enable validation code, which verifies that pointers reside within
+ jemalloc-owned chunks before dereferencing them. This incurs a minor
+ performance hit.
+
--enable-prof
Enable heap profiling and leak detection functionality. See the "opt.prof"
option documentation for usage details. When enabled, there are several
@@ -132,12 +145,6 @@ any of the following arguments (not a definitive list) to 'configure':
released in bulk, thus reducing the total number of mutex operations. See
the "opt.tcache" option for usage details.
---enable-mremap
- Enable huge realloc() via mremap(2). mremap() is disabled by default
- because the flavor used is specific to Linux, which has a quirk in its
- virtual memory allocation algorithm that causes semi-permanent VM map holes
- under normal jemalloc operation.
-
--disable-munmap
Disable virtual memory deallocation via munmap(2); instead keep track of
the virtual memory for later use. munmap() is disabled by default (i.e.
@@ -145,10 +152,6 @@ any of the following arguments (not a definitive list) to 'configure':
memory allocation algorithm that causes semi-permanent VM map holes under
normal jemalloc operation.
---enable-dss
- Enable support for page allocation/deallocation via sbrk(2), in addition to
- mmap(2).
-
--disable-fill
Disable support for junk/zero filling of memory, quarantine, and redzones.
See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
@@ -157,11 +160,8 @@ any of the following arguments (not a definitive list) to 'configure':
--disable-valgrind
Disable support for Valgrind.
---disable-experimental
- Disable support for the experimental API (*allocm()).
-
--disable-zone-allocator
- Disable zone allocator for Darwin. This means jemalloc won't be hooked as
+ Disable zone allocator for Darwin. This means jemalloc won't be hooked as
the default allocator on OSX/iOS.
--enable-utrace
@@ -185,10 +185,106 @@ any of the following arguments (not a definitive list) to 'configure':
thread-local variables via the __thread keyword. If TLS is available,
jemalloc uses it for several purposes.
+--disable-cache-oblivious
+ Disable cache-oblivious large allocation alignment for large allocation
+ requests with no alignment constraints. If this feature is disabled, all
+ large allocations are page-aligned as an implementation artifact, which can
+ severely harm CPU cache utilization. However, the cache-oblivious layout
+ comes at the cost of one extra page per large allocation, which in the
+ most extreme case increases physical memory usage for the 16 KiB size class
+ to 20 KiB.
+
--with-xslroot=<path>
Specify where to find DocBook XSL stylesheets when building the
documentation.
+--with-lg-page=<lg-page>
+ Specify the base 2 log of the system page size. This option is only useful
+ when cross compiling, since the configure script automatically determines
+ the host's page size by default.
+
+--with-lg-page-sizes=<lg-page-sizes>
+ Specify the comma-separated base 2 logs of the page sizes to support. This
+ option may be useful when cross-compiling in combination with
+ --with-lg-page, but its primary use case is for integration with FreeBSD's
+ libc, wherein jemalloc is embedded.
+
+--with-lg-size-class-group=<lg-size-class-group>
+ Specify the base 2 log of how many size classes to use for each doubling in
+ size. By default jemalloc uses <lg-size-class-group>=2, which results in
+ e.g. the following size classes:
+
+ [...], 64,
+ 80, 96, 112, 128,
+ 160, [...]
+
+ <lg-size-class-group>=3 results in e.g. the following size classes:
+
+ [...], 64,
+ 72, 80, 88, 96, 104, 112, 120, 128,
+ 144, [...]
+
+ The minimal <lg-size-class-group>=0 causes jemalloc to only provide size
+ classes that are powers of 2:
+
+ [...],
+ 64,
+ 128,
+ 256,
+ [...]
+
+ An implementation detail currently limits the total number of small size
+ classes to 255, and a compilation error will result if the
+ <lg-size-class-group> you specify cannot be supported. The limit is
+ roughly <lg-size-class-group>=4, depending on page size.
+
+--with-lg-quantum=<lg-quantum>
+ Specify the base 2 log of the minimum allocation alignment. jemalloc needs
+ to know the minimum alignment that meets the following C standard
+ requirement (quoted from the April 12, 2011 draft of the C11 standard):
+
+ The pointer returned if the allocation succeeds is suitably aligned so
+ that it may be assigned to a pointer to any type of object with a
+ fundamental alignment requirement and then used to access such an object
+ or an array of such objects in the space allocated [...]
+
+ This setting is architecture-specific, and although jemalloc includes known
+ safe values for the most commonly used modern architectures, there is a
+ wrinkle related to GNU libc (glibc) that may impact your choice of
+ <lg-quantum>. On most modern architectures, this mandates 16-byte alignment
+ (<lg-quantum>=4), but the glibc developers chose not to meet this
+ requirement for performance reasons. An old discussion can be found at
+ https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc,
+ jemalloc does follow the C standard by default (caveat: jemalloc
+ technically cheats if --with-lg-tiny-min is smaller than
+ --with-lg-quantum), but the fact that Linux systems already work around
+ this allocator noncompliance means that it is generally safe in practice to
+ let jemalloc's minimum alignment follow glibc's lead. If you specify
+ --with-lg-quantum=3 during configuration, jemalloc will provide additional
+ size classes that are not 16-byte-aligned (24, 40, and 56, assuming
+ --with-lg-size-class-group=2).
+
+--with-lg-tiny-min=<lg-tiny-min>
+ Specify the base 2 log of the minimum tiny size class to support. Tiny
+ size classes are powers of 2 less than the quantum, and are only
+ incorporated if <lg-tiny-min> is less than <lg-quantum> (see
+ --with-lg-quantum). Tiny size classes technically violate the C standard
+ requirement for minimum alignment, and crashes could conceivably result if
+ the compiler were to generate instructions that made alignment assumptions,
+ both because illegal instruction traps could result, and because accesses
+ could straddle page boundaries and cause segmentation faults due to
+ accessing unmapped addresses.
+
+ The default of <lg-tiny-min>=3 works well in practice even on architectures
+ that technically require 16-byte alignment, probably for the same reason
+ --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause
+ crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an
+ example).
+
+ This option is rarely useful, and is mainly provided as documentation of a
+ subtle implementation detail. If you do use this option, specify a
+ value in [3, ..., <lg-quantum>].
+
The following environment variables (not a definitive list) impact configure's
behavior:
diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in
index d6b7d6ea3..1ac6f2926 100644
--- a/deps/jemalloc/Makefile.in
+++ b/deps/jemalloc/Makefile.in
@@ -28,6 +28,7 @@ CFLAGS := @CFLAGS@
LDFLAGS := @LDFLAGS@
EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
LIBS := @LIBS@
+TESTLIBS := @TESTLIBS@
RPATH_EXTRA := @RPATH_EXTRA@
SO := @so@
IMPORTLIB := @importlib@
@@ -42,14 +43,16 @@ XSLTPROC := @XSLTPROC@
AUTOCONF := @AUTOCONF@
_RPATH = @RPATH@
RPATH = $(if $(1),$(call _RPATH,$(1)))
-cfghdrs_in := @cfghdrs_in@
+cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@)
cfghdrs_out := @cfghdrs_out@
-cfgoutputs_in := @cfgoutputs_in@
+cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
cfgoutputs_out := @cfgoutputs_out@
enable_autogen := @enable_autogen@
enable_code_coverage := @enable_code_coverage@
-enable_experimental := @enable_experimental@
+enable_prof := @enable_prof@
+enable_valgrind := @enable_valgrind@
enable_zone_allocator := @enable_zone_allocator@
+MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
DSO_LDFLAGS = @DSO_LDFLAGS@
SOREV = @SOREV@
PIC_CFLAGS = @PIC_CFLAGS@
@@ -73,16 +76,20 @@ endif
LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
# Lists of files.
-BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh
+BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof
C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
$(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \
$(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \
$(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \
$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
- $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \
- $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \
- $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c
+ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
+ $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \
+ $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \
+ $(srcroot)src/tsd.c
+ifeq ($(enable_valgrind), 1)
+C_SRCS += $(srcroot)src/valgrind.c
+endif
ifeq ($(enable_zone_allocator), 1)
C_SRCS += $(srcroot)src/zone.c
endif
@@ -98,53 +105,60 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
ifneq ($(SOREV),$(SO))
DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
endif
+PC := $(objroot)jemalloc.pc
MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
-DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html)
-DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3)
+DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
+DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
DOCS := $(DOCS_HTML) $(DOCS_MAN3)
-C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \
+C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
+ $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
+ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
- $(srcroot)test/src/thd.c
+ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/bitmap.c \
+TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+ $(srcroot)test/unit/bitmap.c \
$(srcroot)test/unit/ckh.c \
$(srcroot)test/unit/hash.c \
$(srcroot)test/unit/junk.c \
+ $(srcroot)test/unit/junk_alloc.c \
+ $(srcroot)test/unit/junk_free.c \
+ $(srcroot)test/unit/lg_chunk.c \
$(srcroot)test/unit/mallctl.c \
$(srcroot)test/unit/math.c \
$(srcroot)test/unit/mq.c \
$(srcroot)test/unit/mtx.c \
$(srcroot)test/unit/prof_accum.c \
+ $(srcroot)test/unit/prof_active.c \
$(srcroot)test/unit/prof_gdump.c \
$(srcroot)test/unit/prof_idump.c \
+ $(srcroot)test/unit/prof_reset.c \
+ $(srcroot)test/unit/prof_thread_name.c \
$(srcroot)test/unit/ql.c \
$(srcroot)test/unit/qr.c \
$(srcroot)test/unit/quarantine.c \
$(srcroot)test/unit/rb.c \
$(srcroot)test/unit/rtree.c \
$(srcroot)test/unit/SFMT.c \
+ $(srcroot)test/unit/size_classes.c \
$(srcroot)test/unit/stats.c \
$(srcroot)test/unit/tsd.c \
$(srcroot)test/unit/util.c \
$(srcroot)test/unit/zero.c
-TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \
- $(srcroot)test/unit/prof_accum_b.c
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
+ $(srcroot)test/integration/sdallocx.c \
$(srcroot)test/integration/mallocx.c \
- $(srcroot)test/integration/mremap.c \
+ $(srcroot)test/integration/MALLOCX_ARENA.c \
+ $(srcroot)test/integration/overflow.c \
$(srcroot)test/integration/posix_memalign.c \
$(srcroot)test/integration/rallocx.c \
$(srcroot)test/integration/thread_arena.c \
$(srcroot)test/integration/thread_tcache_enabled.c \
- $(srcroot)test/integration/xallocx.c
-ifeq ($(enable_experimental), 1)
-TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \
- $(srcroot)test/integration/MALLOCX_ARENA.c \
- $(srcroot)test/integration/rallocm.c
-endif
-TESTS_STRESS :=
+ $(srcroot)test/integration/xallocx.c \
+ $(srcroot)test/integration/chunk.c
+TESTS_STRESS := $(srcroot)test/stress/microbench.c
TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS)
C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O))
@@ -157,10 +171,9 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
.PHONY: all dist build_doc_html build_doc_man build_doc
.PHONY: install_bin install_include install_lib
@@ -174,10 +187,10 @@ all: build_lib
dist: build_doc
-$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
-$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
build_doc_html: $(DOCS_HTML)
@@ -209,18 +222,12 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%
$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
-$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
-define make-unit-link-dep
-$(1): TESTS_UNIT_LINK_OBJS += $(2)
-$(1): $(2)
-endef
-$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS)))))
$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
$(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
ifneq ($(IMPORTLIB),$(SO))
-$(C_OBJS): CPPFLAGS += -DDLLEXPORT
+$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
endif
ifndef CC_MM
@@ -229,7 +236,7 @@ HEADER_DIRS = $(srcroot)include/jemalloc/internal \
$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS)
-$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h
+$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h
endif
$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
@@ -259,15 +266,15 @@ $(STATIC_LIBS):
$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
$(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
$(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
build_lib_shared: $(DSOS)
build_lib_static: $(STATIC_LIBS)
@@ -301,7 +308,14 @@ install_lib_static: $(STATIC_LIBS)
install -m 755 $$l $(LIBDIR); \
done
-install_lib: install_lib_shared install_lib_static
+install_lib_pc: $(PC)
+ install -d $(LIBDIR)/pkgconfig
+ @for l in $(PC); do \
+ echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
+ install -m 644 $$l $(LIBDIR)/pkgconfig; \
+done
+
+install_lib: install_lib_shared install_lib_static install_lib_pc
install_doc_html:
install -d $(DATADIR)/doc/jemalloc$(install_suffix)
@@ -330,18 +344,23 @@ check_unit_dir:
@mkdir -p $(objroot)test/unit
check_integration_dir:
@mkdir -p $(objroot)test/integration
-check_stress_dir:
+stress_dir:
@mkdir -p $(objroot)test/stress
-check_dir: check_unit_dir check_integration_dir check_stress_dir
+check_dir: check_unit_dir check_integration_dir
check_unit: tests_unit check_unit_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+check_integration_prof: tests_integration check_integration_dir
+ifeq ($(enable_prof), 1)
+ $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+ $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+endif
check_integration: tests_integration check_integration_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-check_stress: tests_stress check_stress_dir
+stress: tests_stress stress_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
-check: tests check_dir
- $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%)
+check: tests check_dir check_integration_prof
+ $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
ifeq ($(enable_code_coverage), 1)
coverage_unit: check_unit
@@ -355,7 +374,7 @@ coverage_integration: check_integration
$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
-coverage_stress: check_stress
+coverage_stress: stress
$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
@@ -400,8 +419,9 @@ clean:
rm -f $(objroot)*.gcov.*
distclean: clean
- rm -rf $(objroot)autom4te.cache
+ rm -f $(objroot)bin/jemalloc-config
rm -f $(objroot)bin/jemalloc.sh
+ rm -f $(objroot)bin/jeprof
rm -f $(objroot)config.log
rm -f $(objroot)config.status
rm -f $(objroot)config.stamp
@@ -410,7 +430,7 @@ distclean: clean
relclean: distclean
rm -f $(objroot)configure
- rm -f $(srcroot)VERSION
+ rm -f $(objroot)VERSION
rm -f $(DOCS_HTML)
rm -f $(DOCS_MAN3)
diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION
index dace31ba7..f1f9f1c61 100644
--- a/deps/jemalloc/VERSION
+++ b/deps/jemalloc/VERSION
@@ -1 +1 @@
-3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340
+4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c
diff --git a/deps/jemalloc/bin/jemalloc-config.in b/deps/jemalloc/bin/jemalloc-config.in
new file mode 100644
index 000000000..b016c8d33
--- /dev/null
+++ b/deps/jemalloc/bin/jemalloc-config.in
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+usage() {
+ cat <<EOF
+Usage:
+ @BINDIR@/jemalloc-config <option>
+Options:
+ --help | -h : Print usage.
+ --version : Print jemalloc version.
+ --revision : Print shared library revision number.
+ --config : Print configure options used to build jemalloc.
+ --prefix : Print installation directory prefix.
+ --bindir : Print binary installation directory.
+ --datadir : Print data installation directory.
+ --includedir : Print include installation directory.
+ --libdir : Print library installation directory.
+ --mandir : Print manual page installation directory.
+ --cc : Print compiler used to build jemalloc.
+ --cflags : Print compiler flags used to build jemalloc.
+ --cppflags : Print preprocessor flags used to build jemalloc.
+ --ldflags : Print library flags used to build jemalloc.
+ --libs : Print libraries jemalloc was linked against.
+EOF
+}
+
+prefix="@prefix@"
+exec_prefix="@exec_prefix@"
+
+case "$1" in
+--help | -h)
+ usage
+ exit 0
+ ;;
+--version)
+ echo "@jemalloc_version@"
+ ;;
+--revision)
+ echo "@rev@"
+ ;;
+--config)
+ echo "@CONFIG@"
+ ;;
+--prefix)
+ echo "@PREFIX@"
+ ;;
+--bindir)
+ echo "@BINDIR@"
+ ;;
+--datadir)
+ echo "@DATADIR@"
+ ;;
+--includedir)
+ echo "@INCLUDEDIR@"
+ ;;
+--libdir)
+ echo "@LIBDIR@"
+ ;;
+--mandir)
+ echo "@MANDIR@"
+ ;;
+--cc)
+ echo "@CC@"
+ ;;
+--cflags)
+ echo "@CFLAGS@"
+ ;;
+--cppflags)
+ echo "@CPPFLAGS@"
+ ;;
+--ldflags)
+ echo "@LDFLAGS@ @EXTRA_LDFLAGS@"
+ ;;
+--libs)
+ echo "@LIBS@"
+ ;;
+*)
+ usage
+ exit 1
+esac
diff --git a/deps/jemalloc/bin/pprof b/deps/jemalloc/bin/jeprof.in
index a309943c1..e7178078a 100755..100644
--- a/deps/jemalloc/bin/pprof
+++ b/deps/jemalloc/bin/jeprof.in
@@ -2,11 +2,11 @@
# Copyright (c) 1998-2007, Google Inc.
# All rights reserved.
-#
+#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
-#
+#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
@@ -16,7 +16,7 @@
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
-#
+#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -40,28 +40,28 @@
#
# Examples:
#
-# % tools/pprof "program" "profile"
+# % tools/jeprof "program" "profile"
# Enters "interactive" mode
#
-# % tools/pprof --text "program" "profile"
+# % tools/jeprof --text "program" "profile"
# Generates one line per procedure
#
-# % tools/pprof --gv "program" "profile"
+# % tools/jeprof --gv "program" "profile"
# Generates annotated call-graph and displays via "gv"
#
-# % tools/pprof --gv --focus=Mutex "program" "profile"
+# % tools/jeprof --gv --focus=Mutex "program" "profile"
# Restrict to code paths that involve an entry that matches "Mutex"
#
-# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile"
+# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile"
# Restrict to code paths that involve an entry that matches "Mutex"
# and does not match "string"
#
-# % tools/pprof --list=IBF_CheckDocid "program" "profile"
+# % tools/jeprof --list=IBF_CheckDocid "program" "profile"
# Generates disassembly listing of all routines with at least one
# sample that match the --list=<regexp> pattern. The listing is
# annotated with the flat and cumulative sample counts at each line.
#
-# % tools/pprof --disasm=IBF_CheckDocid "program" "profile"
+# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile"
# Generates disassembly listing of all routines with at least one
# sample that match the --disasm=<regexp> pattern. The listing is
# annotated with the flat and cumulative sample counts at each PC value.
@@ -72,10 +72,11 @@ use strict;
use warnings;
use Getopt::Long;
+my $JEPROF_VERSION = "@jemalloc_version@";
my $PPROF_VERSION = "2.0";
# These are the object tools we use which can come from a
-# user-specified location using --tools, from the PPROF_TOOLS
+# user-specified location using --tools, from the JEPROF_TOOLS
# environment variable, or from the environment.
my %obj_tool_map = (
"objdump" => "objdump",
@@ -144,13 +145,13 @@ my $sep_address = undef;
sub usage_string {
return <<EOF;
Usage:
-pprof [options] <program> <profiles>
+jeprof [options] <program> <profiles>
<profiles> is a space separated list of profile names.
-pprof [options] <symbolized-profiles>
+jeprof [options] <symbolized-profiles>
<symbolized-profiles> is a list of profile files where each file contains
the necessary symbol mappings as well as profile data (likely generated
with --raw).
-pprof [options] <profile>
+jeprof [options] <profile>
<profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE
Each name can be:
@@ -161,9 +162,9 @@ pprof [options] <profile>
$GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
$CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
For instance:
- pprof http://myserver.com:80$HEAP_PAGE
+ jeprof http://myserver.com:80$HEAP_PAGE
If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
-pprof --symbols <program>
+jeprof --symbols <program>
Maps addresses to symbol names. In this mode, stdin should be a
list of library mappings, in the same format as is found in the heap-
and cpu-profile files (this loosely matches that of /proc/self/maps
@@ -202,7 +203,7 @@ Output type:
--pdf Generate PDF to stdout
--svg Generate SVG to stdout
--gif Generate GIF to stdout
- --raw Generate symbolized pprof data (useful with remote fetch)
+ --raw Generate symbolized jeprof data (useful with remote fetch)
Heap-Profile Options:
--inuse_space Display in-use (mega)bytes [default]
@@ -223,6 +224,7 @@ Call-graph Options:
--edgefraction=<f> Hide edges below <f>*total [default=.001]
--maxdegree=<n> Max incoming/outgoing edges per node [default=8]
--focus=<regexp> Focus on nodes matching <regexp>
+ --thread=<n> Show profile for thread <n>
--ignore=<regexp> Ignore nodes matching <regexp>
--scale=<n> Set GV scaling [default=0]
--heapcheck Make nodes with non-0 object counts
@@ -235,34 +237,34 @@ Miscellaneous:
--version Version information
Environment Variables:
- PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof
- PPROF_TOOLS Prefix for object tools pathnames
+ JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof
+ JEPROF_TOOLS Prefix for object tools pathnames
Examples:
-pprof /bin/ls ls.prof
+jeprof /bin/ls ls.prof
Enters "interactive" mode
-pprof --text /bin/ls ls.prof
+jeprof --text /bin/ls ls.prof
Outputs one line per procedure
-pprof --web /bin/ls ls.prof
+jeprof --web /bin/ls ls.prof
Displays annotated call-graph in web browser
-pprof --gv /bin/ls ls.prof
+jeprof --gv /bin/ls ls.prof
Displays annotated call-graph via 'gv'
-pprof --gv --focus=Mutex /bin/ls ls.prof
+jeprof --gv --focus=Mutex /bin/ls ls.prof
Restricts to code paths including a .*Mutex.* entry
-pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
+jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
Code paths including Mutex but not string
-pprof --list=getdir /bin/ls ls.prof
+jeprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
-pprof --disasm=getdir /bin/ls ls.prof
+jeprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
-pprof http://localhost:1234/
+jeprof http://localhost:1234/
Enters "interactive" mode
-pprof --text localhost:1234
+jeprof --text localhost:1234
Outputs one line per procedure for localhost:1234
-pprof --raw localhost:1234 > ./local.raw
-pprof --text ./local.raw
+jeprof --raw localhost:1234 > ./local.raw
+jeprof --text ./local.raw
Fetches a remote profile for later analysis and then
analyzes it in text mode.
EOF
@@ -270,7 +272,8 @@ EOF
sub version_string {
return <<EOF
-pprof (part of gperftools $PPROF_VERSION)
+jeprof (part of jemalloc $JEPROF_VERSION)
+based on pprof (part of gperftools $PPROF_VERSION)
Copyright 1998-2007 Google Inc.
@@ -293,8 +296,8 @@ sub Init() {
# Setup tmp-file name and handler to clean it up.
# We do this in the very beginning so that we can use
# error() and cleanup() function anytime here after.
- $main::tmpfile_sym = "/tmp/pprof$$.sym";
- $main::tmpfile_ps = "/tmp/pprof$$";
+ $main::tmpfile_sym = "/tmp/jeprof$$.sym";
+ $main::tmpfile_ps = "/tmp/jeprof$$";
$main::next_tmpfile = 0;
$SIG{'INT'} = \&sighandler;
@@ -332,6 +335,7 @@ sub Init() {
$main::opt_edgefraction = 0.001;
$main::opt_maxdegree = 8;
$main::opt_focus = '';
+ $main::opt_thread = undef;
$main::opt_ignore = '';
$main::opt_scale = 0;
$main::opt_heapcheck = 0;
@@ -402,6 +406,7 @@ sub Init() {
"edgefraction=f" => \$main::opt_edgefraction,
"maxdegree=i" => \$main::opt_maxdegree,
"focus=s" => \$main::opt_focus,
+ "thread=s" => \$main::opt_thread,
"ignore=s" => \$main::opt_ignore,
"scale=i" => \$main::opt_scale,
"heapcheck" => \$main::opt_heapcheck,
@@ -562,66 +567,12 @@ sub Init() {
}
}
-sub Main() {
- Init();
- $main::collected_profile = undef;
- @main::profile_files = ();
- $main::op_time = time();
-
- # Printing symbols is special and requires a lot less info that most.
- if ($main::opt_symbols) {
- PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin
- return;
- }
-
- # Fetch all profile data
- FetchDynamicProfiles();
-
- # this will hold symbols that we read from the profile files
- my $symbol_map = {};
-
- # Read one profile, pick the last item on the list
- my $data = ReadProfile($main::prog, pop(@main::profile_files));
- my $profile = $data->{profile};
- my $pcs = $data->{pcs};
- my $libs = $data->{libs}; # Info about main program and shared libraries
- $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
-
- # Add additional profiles, if available.
- if (scalar(@main::profile_files) > 0) {
- foreach my $pname (@main::profile_files) {
- my $data2 = ReadProfile($main::prog, $pname);
- $profile = AddProfile($profile, $data2->{profile});
- $pcs = AddPcs($pcs, $data2->{pcs});
- $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
- }
- }
-
- # Subtract base from profile, if specified
- if ($main::opt_base ne '') {
- my $base = ReadProfile($main::prog, $main::opt_base);
- $profile = SubtractProfile($profile, $base->{profile});
- $pcs = AddPcs($pcs, $base->{pcs});
- $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
- }
+sub FilterAndPrint {
+ my ($profile, $symbols, $libs, $thread) = @_;
# Get total data in profile
my $total = TotalProfile($profile);
- # Collect symbols
- my $symbols;
- if ($main::use_symbolized_profile) {
- $symbols = FetchSymbols($pcs, $symbol_map);
- } elsif ($main::use_symbol_page) {
- $symbols = FetchSymbols($pcs);
- } else {
- # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
- # which may differ from the data from subsequent profiles, especially
- # if they were run on different machines. Use appropriate libs for
- # each pc somehow.
- $symbols = ExtractSymbols($libs, $pcs);
- }
-
# Remove uniniteresting stack items
$profile = RemoveUninterestingFrames($symbols, $profile);
@@ -656,7 +607,9 @@ sub Main() {
# (only matters when --heapcheck is given but we must be
# compatible with old branches that did not pass --heapcheck always):
if ($total != 0) {
- printf("Total: %s %s\n", Unparse($total), Units());
+ printf("Total%s: %s %s\n",
+ (defined($thread) ? " (t$thread)" : ""),
+ Unparse($total), Units());
}
PrintText($symbols, $flat, $cumulative, -1);
} elsif ($main::opt_raw) {
@@ -692,6 +645,77 @@ sub Main() {
} else {
InteractiveMode($profile, $symbols, $libs, $total);
}
+}
+
+sub Main() {
+ Init();
+ $main::collected_profile = undef;
+ @main::profile_files = ();
+ $main::op_time = time();
+
+ # Printing symbols is special and requires a lot less info that most.
+ if ($main::opt_symbols) {
+ PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin
+ return;
+ }
+
+ # Fetch all profile data
+ FetchDynamicProfiles();
+
+ # this will hold symbols that we read from the profile files
+ my $symbol_map = {};
+
+ # Read one profile, pick the last item on the list
+ my $data = ReadProfile($main::prog, pop(@main::profile_files));
+ my $profile = $data->{profile};
+ my $pcs = $data->{pcs};
+ my $libs = $data->{libs}; # Info about main program and shared libraries
+ $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
+
+ # Add additional profiles, if available.
+ if (scalar(@main::profile_files) > 0) {
+ foreach my $pname (@main::profile_files) {
+ my $data2 = ReadProfile($main::prog, $pname);
+ $profile = AddProfile($profile, $data2->{profile});
+ $pcs = AddPcs($pcs, $data2->{pcs});
+ $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
+ }
+ }
+
+ # Subtract base from profile, if specified
+ if ($main::opt_base ne '') {
+ my $base = ReadProfile($main::prog, $main::opt_base);
+ $profile = SubtractProfile($profile, $base->{profile});
+ $pcs = AddPcs($pcs, $base->{pcs});
+ $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
+ }
+
+ # Collect symbols
+ my $symbols;
+ if ($main::use_symbolized_profile) {
+ $symbols = FetchSymbols($pcs, $symbol_map);
+ } elsif ($main::use_symbol_page) {
+ $symbols = FetchSymbols($pcs);
+ } else {
+ # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
+ # which may differ from the data from subsequent profiles, especially
+ # if they were run on different machines. Use appropriate libs for
+ # each pc somehow.
+ $symbols = ExtractSymbols($libs, $pcs);
+ }
+
+ if (!defined($main::opt_thread)) {
+ FilterAndPrint($profile, $symbols, $libs);
+ }
+ if (defined($data->{threads})) {
+ foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) {
+ if (defined($main::opt_thread) &&
+ ($main::opt_thread eq '*' || $main::opt_thread == $thread)) {
+ my $thread_profile = $data->{threads}{$thread};
+ FilterAndPrint($thread_profile, $symbols, $libs, $thread);
+ }
+ }
+ }
cleanup();
exit(0);
@@ -780,14 +804,14 @@ sub InteractiveMode {
$| = 1; # Make output unbuffered for interactive mode
my ($orig_profile, $symbols, $libs, $total) = @_;
- print STDERR "Welcome to pprof! For help, type 'help'.\n";
+ print STDERR "Welcome to jeprof! For help, type 'help'.\n";
# Use ReadLine if it's installed and input comes from a console.
if ( -t STDIN &&
!ReadlineMightFail() &&
defined(eval {require Term::ReadLine}) ) {
- my $term = new Term::ReadLine 'pprof';
- while ( defined ($_ = $term->readline('(pprof) '))) {
+ my $term = new Term::ReadLine 'jeprof';
+ while ( defined ($_ = $term->readline('(jeprof) '))) {
$term->addhistory($_) if /\S/;
if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
last; # exit when we get an interactive command to quit
@@ -795,7 +819,7 @@ sub InteractiveMode {
}
} else { # don't have readline
while (1) {
- print STDERR "(pprof) ";
+ print STDERR "(jeprof) ";
$_ = <STDIN>;
last if ! defined $_ ;
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -988,7 +1012,7 @@ sub ProcessProfile {
sub InteractiveHelpMessage {
print STDERR <<ENDOFHELP;
-Interactive pprof mode
+Interactive jeprof mode
Commands:
gv
@@ -1031,7 +1055,7 @@ Commands:
Generates callgrind file. If no filename is given, kcachegrind is called.
help - This listing
- quit or ^D - End pprof
+ quit or ^D - End jeprof
For commands that accept optional -ignore tags, samples where any routine in
the stack trace matches the regular expression in any of the -ignore
@@ -1476,7 +1500,7 @@ h1 {
}
</style>
<script type="text/javascript">
-function pprof_toggle_asm(e) {
+function jeprof_toggle_asm(e) {
var target;
if (!e) e = window.event;
if (e.target) target = e.target;
@@ -1683,23 +1707,23 @@ sub PrintSource {
HtmlPrintNumber($c2),
UnparseAddress($offset, $e->[0]),
CleanDisassembly($e->[3]));
-
+
# Append the most specific source line associated with this instruction
if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) };
$dis = HtmlEscape($dis);
my $f = $e->[5];
my $l = $e->[6];
if ($f ne $last_dis_filename) {
- $dis .= sprintf("<span class=disasmloc>%s:%d</span>",
+ $dis .= sprintf("<span class=disasmloc>%s:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
} elsif ($l ne $last_dis_linenum) {
# De-emphasize the unchanged file name portion
$dis .= sprintf("<span class=unimportant>%s</span>" .
- "<span class=disasmloc>:%d</span>",
+ "<span class=disasmloc>:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
} else {
# De-emphasize the entire location
- $dis .= sprintf("<span class=unimportant>%s:%d</span>",
+ $dis .= sprintf("<span class=unimportant>%s:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
}
$last_dis_filename = $f;
@@ -1745,7 +1769,7 @@ sub PrintSource {
if ($html) {
printf $output (
- "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" .
+ "<h1>%s</h1>%s\n<pre onClick=\"jeprof_toggle_asm()\">\n" .
"Total:%6s %6s (flat / cumulative %s)\n",
HtmlEscape(ShortFunctionName($routine)),
HtmlEscape(CleanFileName($filename)),
@@ -1788,8 +1812,8 @@ sub PrintSource {
if (defined($dis) && $dis ne '') {
$asm = "<span class=\"asm\">" . $dis . "</span>";
}
- my $source_class = (($n1 + $n2 > 0)
- ? "livesrc"
+ my $source_class = (($n1 + $n2 > 0)
+ ? "livesrc"
: (($asm ne "") ? "deadsrc" : "nop"));
printf $output (
"<span class=\"line\">%5d</span> " .
@@ -2811,9 +2835,15 @@ sub RemoveUninterestingFrames {
'free',
'memalign',
'posix_memalign',
+ 'aligned_alloc',
'pvalloc',
'valloc',
'realloc',
+ 'mallocx', # jemalloc
+ 'rallocx', # jemalloc
+ 'xallocx', # jemalloc
+ 'dallocx', # jemalloc
+ 'sdallocx', # jemalloc
'tc_calloc',
'tc_cfree',
'tc_malloc',
@@ -2923,6 +2953,10 @@ sub RemoveUninterestingFrames {
if (exists($symbols->{$a})) {
my $func = $symbols->{$a}->[0];
if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
+ # Throw away the portion of the backtrace seen so far, under the
+ # assumption that previous frames were for functions internal to the
+ # allocator.
+ @path = ();
next;
}
}
@@ -3401,7 +3435,7 @@ sub FetchDynamicProfile {
$profile_file .= $suffix;
}
- my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
+ my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof");
if (! -d $profile_dir) {
mkdir($profile_dir)
|| die("Unable to create profile directory $profile_dir: $!\n");
@@ -3617,7 +3651,7 @@ BEGIN {
# Reads the top, 'header' section of a profile, and returns the last
# line of the header, commonly called a 'header line'. The header
# section of a profile consists of zero or more 'command' lines that
-# are instructions to pprof, which pprof executes when reading the
+# are instructions to jeprof, which jeprof executes when reading the
# header. All 'command' lines start with a %. After the command
# lines is the 'header line', which is a profile-specific line that
# indicates what type of profile it is, and perhaps other global
@@ -3680,6 +3714,7 @@ sub IsSymbolizedProfileFile {
# $result->{version} Version number of profile file
# $result->{period} Sampling period (in microseconds)
# $result->{profile} Profile object
+# $result->{threads} Map of thread IDs to profile objects
# $result->{map} Memory map info from profile
# $result->{pcs} Hash of all PC values seen, key is hex address
sub ReadProfile {
@@ -3728,6 +3763,9 @@ sub ReadProfile {
} elsif ($header =~ m/^heap profile:/) {
$main::profile_type = 'heap';
$result = ReadHeapProfile($prog, *PROFILE, $header);
+ } elsif ($header =~ m/^heap/) {
+ $main::profile_type = 'heap';
+ $result = ReadThreadedHeapProfile($prog, $fname, $header);
} elsif ($header =~ m/^--- *$contention_marker/o) {
$main::profile_type = 'contention';
$result = ReadSynchProfile($prog, *PROFILE);
@@ -3870,11 +3908,7 @@ sub ReadCPUProfile {
return $r;
}
-sub ReadHeapProfile {
- my $prog = shift;
- local *PROFILE = shift;
- my $header = shift;
-
+sub HeapProfileIndex {
my $index = 1;
if ($main::opt_inuse_space) {
$index = 1;
@@ -3885,6 +3919,84 @@ sub ReadHeapProfile {
} elsif ($main::opt_alloc_objects) {
$index = 2;
}
+ return $index;
+}
+
+sub ReadMappedLibraries {
+ my $fh = shift;
+ my $map = "";
+ # Read the /proc/self/maps data
+ while (<$fh>) {
+ s/\r//g; # turn windows-looking lines into unix-looking lines
+ $map .= $_;
+ }
+ return $map;
+}
+
+sub ReadMemoryMap {
+ my $fh = shift;
+ my $map = "";
+ # Read /proc/self/maps data as formatted by DumpAddressMap()
+ my $buildvar = "";
+ while (<PROFILE>) {
+ s/\r//g; # turn windows-looking lines into unix-looking lines
+ # Parse "build=<dir>" specification if supplied
+ if (m/^\s*build=(.*)\n/) {
+ $buildvar = $1;
+ }
+
+ # Expand "$build" variable if available
+ $_ =~ s/\$build\b/$buildvar/g;
+
+ $map .= $_;
+ }
+ return $map;
+}
+
+sub AdjustSamples {
+ my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_;
+ if ($sample_adjustment) {
+ if ($sampling_algorithm == 2) {
+ # Remote-heap version 2
+ # The sampling frequency is the rate of a Poisson process.
+ # This means that the probability of sampling an allocation of
+ # size X with sampling rate Y is 1 - exp(-X/Y)
+ if ($n1 != 0) {
+ my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n1 *= $scale_factor;
+ $s1 *= $scale_factor;
+ }
+ if ($n2 != 0) {
+ my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n2 *= $scale_factor;
+ $s2 *= $scale_factor;
+ }
+ } else {
+ # Remote-heap version 1
+ my $ratio;
+ $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n1 /= $ratio;
+ $s1 /= $ratio;
+ }
+ $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n2 /= $ratio;
+ $s2 /= $ratio;
+ }
+ }
+ }
+ return ($n1, $s1, $n2, $s2);
+}
+
+sub ReadHeapProfile {
+ my $prog = shift;
+ local *PROFILE = shift;
+ my $header = shift;
+
+ my $index = HeapProfileIndex();
# Find the type of this profile. The header line looks like:
# heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053
@@ -3974,29 +4086,12 @@ sub ReadHeapProfile {
while (<PROFILE>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
if (/^MAPPED_LIBRARIES:/) {
- # Read the /proc/self/maps data
- while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
- $map .= $_;
- }
+ $map .= ReadMappedLibraries(*PROFILE);
last;
}
if (/^--- Memory map:/) {
- # Read /proc/self/maps data as formatted by DumpAddressMap()
- my $buildvar = "";
- while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
- # Parse "build=<dir>" specification if supplied
- if (m/^\s*build=(.*)\n/) {
- $buildvar = $1;
- }
-
- # Expand "$build" variable if available
- $_ =~ s/\$build\b/$buildvar/g;
-
- $map .= $_;
- }
+ $map .= ReadMemoryMap(*PROFILE);
last;
}
@@ -4007,43 +4102,85 @@ sub ReadHeapProfile {
if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
my $stack = $5;
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
+ $n1, $s1, $n2, $s2);
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+ }
+ }
- if ($sample_adjustment) {
- if ($sampling_algorithm == 2) {
- # Remote-heap version 2
- # The sampling frequency is the rate of a Poisson process.
- # This means that the probability of sampling an allocation of
- # size X with sampling rate Y is 1 - exp(-X/Y)
- if ($n1 != 0) {
- my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- my $scale_factor = 1/(1 - exp(-$ratio));
- $n1 *= $scale_factor;
- $s1 *= $scale_factor;
- }
- if ($n2 != 0) {
- my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- my $scale_factor = 1/(1 - exp(-$ratio));
- $n2 *= $scale_factor;
- $s2 *= $scale_factor;
- }
- } else {
- # Remote-heap version 1
- my $ratio;
- $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- if ($ratio < 1) {
- $n1 /= $ratio;
- $s1 /= $ratio;
- }
- $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- if ($ratio < 1) {
- $n2 /= $ratio;
- $s2 /= $ratio;
- }
+ my $r = {};
+ $r->{version} = "heap";
+ $r->{period} = 1;
+ $r->{profile} = $profile;
+ $r->{libs} = ParseLibraries($prog, $map, $pcs);
+ $r->{pcs} = $pcs;
+ return $r;
+}
+
+sub ReadThreadedHeapProfile {
+ my ($prog, $fname, $header) = @_;
+
+ my $index = HeapProfileIndex();
+ my $sampling_algorithm = 0;
+ my $sample_adjustment = 0;
+ chomp($header);
+ my $type = "unknown";
+ # Assuming a very specific type of header for now.
+ if ($header =~ m"^heap_v2/(\d+)") {
+ $type = "_v2";
+ $sampling_algorithm = 2;
+ $sample_adjustment = int($1);
+ }
+ if ($type ne "_v2" || !defined($sample_adjustment)) {
+ die "Threaded heap profiles require v2 sampling with a sample rate\n";
+ }
+
+ my $profile = {};
+ my $thread_profiles = {};
+ my $pcs = {};
+ my $map = "";
+ my $stack = "";
+
+ while (<PROFILE>) {
+ s/\r//g;
+ if (/^MAPPED_LIBRARIES:/) {
+ $map .= ReadMappedLibraries(*PROFILE);
+ last;
+ }
+
+ if (/^--- Memory map:/) {
+ $map .= ReadMemoryMap(*PROFILE);
+ last;
+ }
+
+ # Read entry of the form:
+ # @ a1 a2 ... an
+ # t*: <count1>: <bytes1> [<count2>: <bytes2>]
+ # t1: <count1>: <bytes1> [<count2>: <bytes2>]
+ # ...
+ # tn: <count1>: <bytes1> [<count2>: <bytes2>]
+ s/^\s*//;
+ s/\s*$//;
+ if (m/^@\s+(.*)$/) {
+ $stack = $1;
+ } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) {
+ if ($stack eq "") {
+ # Still in the header, so this is just a per-thread summary.
+ next;
+ }
+ my $thread = $2;
+ my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6);
+ my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
+ $n1, $s1, $n2, $s2);
+ if ($thread eq "*") {
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+ } else {
+ if (!exists($thread_profiles->{$thread})) {
+ $thread_profiles->{$thread} = {};
}
+ AddEntries($thread_profiles->{$thread}, $pcs,
+ FixCallerAddresses($stack), $counts[$index]);
}
-
- my @counts = ($n1, $s1, $n2, $s2);
- AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
}
}
@@ -4051,6 +4188,7 @@ sub ReadHeapProfile {
$r->{version} = "heap";
$r->{period} = 1;
$r->{profile} = $profile;
+ $r->{threads} = $thread_profiles;
$r->{libs} = ParseLibraries($prog, $map, $pcs);
$r->{pcs} = $pcs;
return $r;
@@ -4120,10 +4258,10 @@ sub ReadSynchProfile {
} elsif ($variable eq "sampling period") {
$sampling_period = $value;
} elsif ($variable eq "ms since reset") {
- # Currently nothing is done with this value in pprof
+ # Currently nothing is done with this value in jeprof
# So we just silently ignore it for now
} elsif ($variable eq "discarded samples") {
- # Currently nothing is done with this value in pprof
+ # Currently nothing is done with this value in jeprof
# So we just silently ignore it for now
} else {
printf STDERR ("Ignoring unnknown variable in /contention output: " .
@@ -4429,7 +4567,7 @@ sub ParseLibraries {
}
# Add two hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressAdd {
my $addr1 = shift;
my $addr2 = shift;
@@ -4483,7 +4621,7 @@ sub AddressAdd {
# Subtract two hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressSub {
my $addr1 = shift;
my $addr2 = shift;
@@ -4535,7 +4673,7 @@ sub AddressSub {
}
# Increment a hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressInc {
my $addr = shift;
my $sum;
@@ -4747,7 +4885,7 @@ sub MapToSymbols {
}
}
}
-
+
# Prepend to accumulated symbols for pcstr
# (so that caller comes before callee)
my $sym = $symbols->{$pcstr};
@@ -4853,7 +4991,7 @@ sub UnparseAddress {
# 32-bit or ELF 64-bit executable file. The location of the tools
# is determined by considering the following options in this order:
# 1) --tools option, if set
-# 2) PPROF_TOOLS environment variable, if set
+# 2) JEPROF_TOOLS environment variable, if set
# 3) the environment
sub ConfigureObjTools {
my $prog_file = shift;
@@ -4886,7 +5024,7 @@ sub ConfigureObjTools {
# For windows, we provide a version of nm and addr2line as part of
# the opensource release, which is capable of parsing
# Windows-style PDB executables. It should live in the path, or
- # in the same directory as pprof.
+ # in the same directory as jeprof.
$obj_tool_map{"nm_pdb"} = "nm-pdb";
$obj_tool_map{"addr2line_pdb"} = "addr2line-pdb";
}
@@ -4905,20 +5043,20 @@ sub ConfigureObjTools {
}
# Returns the path of a caller-specified object tool. If --tools or
-# PPROF_TOOLS are specified, then returns the full path to the tool
+# JEPROF_TOOLS are specified, then returns the full path to the tool
# with that prefix. Otherwise, returns the path unmodified (which
# means we will look for it on PATH).
sub ConfigureTool {
my $tool = shift;
my $path;
- # --tools (or $PPROF_TOOLS) is a comma separated list, where each
+ # --tools (or $JEPROF_TOOLS) is a comma separated list, where each
# item is either a) a pathname prefix, or b) a map of the form
# <tool>:<path>. First we look for an entry of type (b) for our
# tool. If one is found, we use it. Otherwise, we consider all the
# pathname prefixes in turn, until one yields an existing file. If
# none does, we use a default path.
- my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
+ my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || "";
if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
$path = $2;
# TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
@@ -4932,16 +5070,16 @@ sub ConfigureTool {
}
if (!$path) {
error("No '$tool' found with prefix specified by " .
- "--tools (or \$PPROF_TOOLS) '$tools'\n");
+ "--tools (or \$JEPROF_TOOLS) '$tools'\n");
}
} else {
# ... otherwise use the version that exists in the same directory as
- # pprof. If there's nothing there, use $PATH.
+ # jeprof. If there's nothing there, use $PATH.
$0 =~ m,[^/]*$,; # this is everything after the last slash
my $dirname = $`; # this is everything up to and including the last slash
if (-x "$dirname$tool") {
$path = "$dirname$tool";
- } else {
+ } else {
$path = $tool;
}
}
@@ -4966,7 +5104,7 @@ sub cleanup {
unlink($main::tmpfile_sym);
unlink(keys %main::tempnames);
- # We leave any collected profiles in $HOME/pprof in case the user wants
+ # We leave any collected profiles in $HOME/jeprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
defined($main::collected_profile)) {
@@ -4975,7 +5113,7 @@ sub cleanup {
}
print STDERR "If you want to investigate this profile further, you can do:\n";
print STDERR "\n";
- print STDERR " pprof \\\n";
+ print STDERR " jeprof \\\n";
print STDERR " $main::prog \\\n";
print STDERR " $main::collected_profile\n";
print STDERR "\n";
@@ -5160,7 +5298,7 @@ sub GetProcedureBoundaries {
# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
# To make them more readable, we add underscores at interesting places.
# This routine removes the underscores, producing the canonical representation
-# used by pprof to represent addresses, particularly in the tested routines.
+# used by jeprof to represent addresses, particularly in the tested routines.
sub CanonicalHex {
my $arg = shift;
return join '', (split '_',$arg);
diff --git a/deps/jemalloc/config.guess b/deps/jemalloc/config.guess
index b79252d6b..1f5c50c0d 100755
--- a/deps/jemalloc/config.guess
+++ b/deps/jemalloc/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2013 Free Software Foundation, Inc.
+# Copyright 1992-2014 Free Software Foundation, Inc.
-timestamp='2013-06-10'
+timestamp='2014-03-23'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2013 Free Software Foundation, Inc.
+Copyright 1992-2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -149,7 +149,7 @@ Linux|GNU|GNU/*)
LIBC=gnu
#endif
EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
;;
esac
@@ -826,7 +826,7 @@ EOF
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
- i*:MSYS*:*)
+ *:MSYS*:*)
echo ${UNAME_MACHINE}-pc-msys
exit ;;
i*:windows32*:*)
@@ -969,10 +969,10 @@ EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
- or1k:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ openrisc*:Linux:*:*)
+ echo or1k-unknown-linux-${LIBC}
exit ;;
- or32:Linux:*:*)
+ or32:Linux:*:* | or1k*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
padre:Linux:*:*)
@@ -1260,16 +1260,26 @@ EOF
if test "$UNAME_PROCESSOR" = unknown ; then
UNAME_PROCESSOR=powerpc
fi
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- case $UNAME_PROCESSOR in
- i386) UNAME_PROCESSOR=x86_64 ;;
- powerpc) UNAME_PROCESSOR=powerpc64 ;;
- esac
+ if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
+ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ case $UNAME_PROCESSOR in
+ i386) UNAME_PROCESSOR=x86_64 ;;
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
+ esac
+ fi
fi
+ elif test "$UNAME_PROCESSOR" = i386 ; then
+ # Avoid executing cc on OS X 10.9, as it ships with a stub
+ # that puts up a graphical alert prompting to install
+ # developer tools. Any system running Mac OS X 10.7 or
+ # later (Darwin 11 and later) is required to have a 64-bit
+ # processor. This is not true of the ARM version of Darwin
+ # that Apple uses in portable devices.
+ UNAME_PROCESSOR=x86_64
fi
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
exit ;;
@@ -1361,154 +1371,6 @@ EOF
exit ;;
esac
-eval $set_cc_for_build
-cat >$dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
- /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
- I don't know.... */
- printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
- printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
- "4"
-#else
- ""
-#endif
- ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
- printf ("arm-acorn-riscix\n"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
- printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
- int version;
- version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
- if (version < 4)
- printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
- else
- printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
- exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
- printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
- printf ("ns32k-encore-mach\n"); exit (0);
-#else
- printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
- printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
- printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
- printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
- struct utsname un;
-
- uname(&un);
-
- if (strncmp(un.version, "V2", 2) == 0) {
- printf ("i386-sequent-ptx2\n"); exit (0);
- }
- if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
- printf ("i386-sequent-ptx1\n"); exit (0);
- }
- printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-# if !defined (ultrix)
-# include <sys/param.h>
-# if defined (BSD)
-# if BSD == 43
- printf ("vax-dec-bsd4.3\n"); exit (0);
-# else
-# if BSD == 199006
- printf ("vax-dec-bsd4.3reno\n"); exit (0);
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# endif
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# else
- printf ("vax-dec-ultrix\n"); exit (0);
-# endif
-#endif
-
-#if defined (alliant) && defined (i860)
- printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
- exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
- case `getsysinfo -f cpu_type` in
- c1*)
- echo c1-convex-bsd
- exit ;;
- c2*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- c34*)
- echo c34-convex-bsd
- exit ;;
- c38*)
- echo c38-convex-bsd
- exit ;;
- c4*)
- echo c4-convex-bsd
- exit ;;
- esac
-fi
-
cat >&2 <<EOF
$0: unable to guess system type
diff --git a/deps/jemalloc/config.sub b/deps/jemalloc/config.sub
index 61cb4bc22..0ccff7706 100755
--- a/deps/jemalloc/config.sub
+++ b/deps/jemalloc/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2013 Free Software Foundation, Inc.
+# Copyright 1992-2014 Free Software Foundation, Inc.
-timestamp='2013-10-01'
+timestamp='2014-05-01'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -68,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2013 Free Software Foundation, Inc.
+Copyright 1992-2014 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -283,8 +283,10 @@ case $basic_machine in
| mips64vr5900 | mips64vr5900el \
| mipsisa32 | mipsisa32el \
| mipsisa32r2 | mipsisa32r2el \
+ | mipsisa32r6 | mipsisa32r6el \
| mipsisa64 | mipsisa64el \
| mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64r6 | mipsisa64r6el \
| mipsisa64sb1 | mipsisa64sb1el \
| mipsisa64sr71k | mipsisa64sr71kel \
| mipsr5900 | mipsr5900el \
@@ -296,8 +298,7 @@ case $basic_machine in
| nds32 | nds32le | nds32be \
| nios | nios2 | nios2eb | nios2el \
| ns16k | ns32k \
- | open8 \
- | or1k | or32 \
+ | open8 | or1k | or1knd | or32 \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \
@@ -402,8 +403,10 @@ case $basic_machine in
| mips64vr5900-* | mips64vr5900el-* \
| mipsisa32-* | mipsisa32el-* \
| mipsisa32r2-* | mipsisa32r2el-* \
+ | mipsisa32r6-* | mipsisa32r6el-* \
| mipsisa64-* | mipsisa64el-* \
| mipsisa64r2-* | mipsisa64r2el-* \
+ | mipsisa64r6-* | mipsisa64r6el-* \
| mipsisa64sb1-* | mipsisa64sb1el-* \
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
| mipsr5900-* | mipsr5900el-* \
@@ -415,6 +418,7 @@ case $basic_machine in
| nios-* | nios2-* | nios2eb-* | nios2el-* \
| none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
+ | or1k*-* \
| orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
@@ -1376,7 +1380,7 @@ case $os in
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
+ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1400,6 +1404,9 @@ case $os in
-mac*)
os=`echo $os | sed -e 's|mac|macos|'`
;;
+ # Apple iOS
+ -ios*)
+ ;;
-linux-dietlibc)
os=-linux-dietlibc
;;
@@ -1594,9 +1601,6 @@ case $basic_machine in
mips*-*)
os=-elf
;;
- or1k-*)
- os=-elf
- ;;
or32-*)
os=-coff
;;
diff --git a/deps/jemalloc/configure b/deps/jemalloc/configure
index 2e5496bfb..8c56c92a1 100755
--- a/deps/jemalloc/configure
+++ b/deps/jemalloc/configure
@@ -628,19 +628,19 @@ cfghdrs_in
enable_zone_allocator
enable_tls
enable_lazy_lock
+TESTLIBS
jemalloc_version_gid
jemalloc_version_nrev
jemalloc_version_bugfix
jemalloc_version_minor
jemalloc_version_major
jemalloc_version
+enable_cache_oblivious
enable_xmalloc
enable_valgrind
enable_utrace
enable_fill
-enable_dss
enable_munmap
-enable_mremap
enable_tcache
enable_prof
enable_stats
@@ -648,8 +648,8 @@ enable_debug
je_
install_suffix
private_namespace
+JEMALLOC_CPREFIX
enable_code_coverage
-enable_experimental
AUTOCONF
LD
RANLIB
@@ -709,6 +709,7 @@ objroot
abs_srcroot
srcroot
rev
+CONFIG
target_alias
host_alias
build_alias
@@ -753,7 +754,6 @@ enable_option_checking
with_xslroot
with_rpath
enable_autogen
-enable_experimental
enable_code_coverage
with_mangling
with_jemalloc_prefix
@@ -770,13 +770,17 @@ with_static_libunwind
enable_prof_libgcc
enable_prof_gcc
enable_tcache
-enable_mremap
enable_munmap
-enable_dss
enable_fill
enable_utrace
enable_valgrind
enable_xmalloc
+enable_cache_oblivious
+with_lg_tiny_min
+with_lg_quantum
+with_lg_page
+with_lg_page_sizes
+with_lg_size_class_group
enable_lazy_lock
enable_tls
enable_zone_allocator
@@ -1402,9 +1406,8 @@ Optional Features:
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--enable-autogen Automatically regenerate configure output
- --disable-experimental Disable support for the experimental API
--enable-code-coverage Enable code coverage
- --enable-cc-silence Silence irrelevant compiler warnings
+ --disable-cc-silence Do not silence irrelevant compiler warnings
--enable-debug Build debugging code (implies --enable-ivsalloc)
--enable-ivsalloc Validate pointers passed through the public API
--disable-stats Disable statistics calculation/reporting
@@ -1413,14 +1416,15 @@ Optional Features:
--disable-prof-libgcc Do not use libgcc for backtracing
--disable-prof-gcc Do not use gcc intrinsics for backtracing
--disable-tcache Disable per thread caches
- --enable-mremap Enable mremap(2) for huge realloc()
--disable-munmap Disable VM deallocation via munmap(2)
- --enable-dss Enable allocation from DSS
--disable-fill Disable support for junk/zero filling, quarantine,
and redzones
--enable-utrace Enable utrace(2)-based tracing
--disable-valgrind Disable support for Valgrind
--enable-xmalloc Support xmalloc option
+ --disable-cache-oblivious
+ Disable support for cache-oblivious allocation
+ alignment
--enable-lazy-lock Enable lazy locking (only lock when multi-threaded)
--disable-tls Disable thread-local storage (__thread keyword)
--disable-zone-allocator
@@ -1442,6 +1446,16 @@ Optional Packages:
--with-static-libunwind=<libunwind.a>
Path to static libunwind library; use rather than
dynamically linking
+ --with-lg-tiny-min=<lg-tiny-min>
+ Base 2 log of minimum tiny size class to support
+ --with-lg-quantum=<lg-quantum>
+ Base 2 log of minimum allocation alignment
+ --with-lg-page=<lg-page>
+ Base 2 log of system page size
+ --with-lg-page-sizes=<lg-page-sizes>
+ Base 2 logs of system page sizes to support
+ --with-lg-size-class-group=<lg-size-class-group>
+ Base 2 log of size classes per doubling
Some influential environment variables:
CC C compiler command
@@ -1910,73 +1924,6 @@ fi
} # ac_fn_c_try_link
-# ac_fn_c_check_func LINENO FUNC VAR
-# ----------------------------------
-# Tests whether FUNC exists, setting the cache variable VAR accordingly
-ac_fn_c_check_func ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
- For example, HP-UX 11i <limits.h> declares gettimeofday. */
-#define $2 innocuous_$2
-
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $2 (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $2
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char $2 ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined __stub_$2 || defined __stub___$2
-choke me
-#endif
-
-int
-main ()
-{
-return $2 ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_func
-
# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
# -------------------------------------------------------
# Tests whether HEADER exists, giving a warning if it cannot be compiled using
@@ -2064,6 +2011,73 @@ fi
} # ac_fn_c_check_header_mongrel
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $2 (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
# -------------------------------------------
# Tests whether TYPE exists after having included INCLUDES, setting cache
@@ -2476,7 +2490,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
-rev=1
+CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
+
+
+rev=2
srcroot=$srcdir
@@ -3488,6 +3505,42 @@ fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror=declaration-after-statement" >&5
+$as_echo_n "checking whether compiler supports -Werror=declaration-after-statement... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+ CFLAGS="-Werror=declaration-after-statement"
+else
+ CFLAGS="${CFLAGS} -Werror=declaration-after-statement"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_appended=-Werror=declaration-after-statement
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_appended=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
$as_echo_n "checking whether compiler supports -pipe... " >&6; }
TCFLAGS="${CFLAGS}"
@@ -3669,7 +3722,43 @@ $as_echo "no" >&6; }
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -FS" >&5
+$as_echo_n "checking whether compiler supports -FS... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+ CFLAGS="-FS"
+else
+ CFLAGS="${CFLAGS} -FS"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_appended=-FS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_appended=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+ CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
fi
fi
if test "x$EXTRA_CFLAGS" != "x" ; then
@@ -4338,6 +4427,10 @@ _ACEOF
fi
+if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
+ CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
+fi
+
# The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
@@ -4622,9 +4715,10 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
CPU_SPINWAIT=""
case "${host_cpu}" in
- i[345]86)
- ;;
i686|x86_64)
+ if ${je_cv_pause+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction is compilable" >&5
$as_echo_n "checking whether pause instruction is compilable... " >&6; }
@@ -4653,44 +4747,10 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause" >&5
$as_echo "$je_cv_pause" >&6; }
- if test "x${je_cv_pause}" = "xyes" ; then
- CPU_SPINWAIT='__asm__ volatile("pause")'
- fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether SSE2 intrinsics is compilable" >&5
-$as_echo_n "checking whether SSE2 intrinsics is compilable... " >&6; }
-if ${je_cv_sse2+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-#include <emmintrin.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- je_cv_sse2=yes
-else
- je_cv_sse2=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sse2" >&5
-$as_echo "$je_cv_sse2" >&6; }
-
- if test "x${je_cv_sse2}" = "xyes" ; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_SSE2
-_ACEOF
+ if test "x${je_cv_pause}" = "xyes" ; then
+ CPU_SPINWAIT='__asm__ volatile("pause")'
fi
;;
powerpc)
@@ -4822,9 +4882,9 @@ fi
default_munmap="1"
-JEMALLOC_USABLE_SIZE_CONST="const"
+maps_coalesce="1"
case "${host}" in
- *-*-darwin*)
+ *-*-darwin* | *-*-ios*)
CFLAGS="$CFLAGS"
abi="macho"
$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
@@ -4834,7 +4894,7 @@ case "${host}" in
so="dylib"
importlib="${so}"
force_tls="0"
- DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+ DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
SOREV="${rev}.${so}"
sbrk_deprecated="1"
;;
@@ -4845,6 +4905,25 @@ case "${host}" in
force_lazy_lock="1"
;;
+ *-*-dragonfly*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+
+ ;;
+ *-*-openbsd*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+
+ force_tls="0"
+ ;;
+ *-*-bitrig*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+
+ ;;
*-*-linux*)
CFLAGS="$CFLAGS"
CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
@@ -4855,7 +4934,8 @@ case "${host}" in
$as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h
- JEMALLOC_USABLE_SIZE_CONST=""
+ $as_echo "#define JEMALLOC_USE_CXX_THROW " >>confdefs.h
+
default_munmap="0"
;;
*-*-netbsd*)
@@ -4905,9 +4985,11 @@ $as_echo "$abi" >&6; }
fi
abi="xcoff"
;;
- *-*-mingw*)
+ *-*-mingw* | *-*-cygwin*)
abi="pecoff"
force_tls="0"
+ force_lazy_lock="1"
+ maps_coalesce="0"
RPATH=""
so="dll"
if test "x$je_cv_msvc" = "xyes" ; then
@@ -4935,6 +5017,50 @@ $as_echo "Unsupported operating system: ${host}" >&6; }
abi="elf"
;;
esac
+
+JEMALLOC_USABLE_SIZE_CONST=const
+for ac_header in malloc.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "malloc.h" "ac_cv_header_malloc_h" "$ac_includes_default"
+if test "x$ac_cv_header_malloc_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_MALLOC_H 1
+_ACEOF
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether malloc_usable_size definition can use const argument" >&5
+$as_echo_n "checking whether malloc_usable_size definition can use const argument... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <malloc.h>
+ #include <stddef.h>
+ size_t malloc_usable_size(const void *ptr);
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ JEMALLOC_USABLE_SIZE_CONST=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+
+done
+
cat >>confdefs.h <<_ACEOF
#define JEMALLOC_USABLE_SIZE_CONST $JEMALLOC_USABLE_SIZE_CONST
_ACEOF
@@ -5079,7 +5205,7 @@ int
main ()
{
static __thread int
- __attribute__((tls_model("initial-exec"))) foo;
+ __attribute__((tls_model("initial-exec"), unused)) foo;
foo = 0;
;
return 0;
@@ -5104,6 +5230,216 @@ else
$as_echo "#define JEMALLOC_TLS_MODEL " >>confdefs.h
fi
+SAVED_CFLAGS="${CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+ CFLAGS="-Werror"
+else
+ CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_appended=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_appended=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5
+$as_echo_n "checking whether alloc_size attribute is compilable... " >&6; }
+if ${je_cv_alloc_size+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(size_t size) __attribute__((alloc_size(1)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_alloc_size=yes
+else
+ je_cv_alloc_size=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_alloc_size" >&5
+$as_echo "$je_cv_alloc_size" >&6; }
+
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_alloc_size}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE " >>confdefs.h
+
+fi
+SAVED_CFLAGS="${CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+ CFLAGS="-Werror"
+else
+ CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_appended=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_appended=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5
+$as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; }
+if ${je_cv_format_gnu_printf+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_format_gnu_printf=yes
+else
+ je_cv_format_gnu_printf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_format_gnu_printf" >&5
+$as_echo "$je_cv_format_gnu_printf" >&6; }
+
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF " >>confdefs.h
+
+fi
+SAVED_CFLAGS="${CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+ CFLAGS="-Werror"
+else
+ CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_appended=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_appended=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5
+$as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; }
+if ${je_cv_format_printf+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_format_printf=yes
+else
+ je_cv_format_printf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_format_printf" >&5
+$as_echo "$je_cv_format_printf" >&6; }
+
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_format_printf}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF " >>confdefs.h
+
+fi
# Check whether --with-rpath was given.
@@ -5403,7 +5739,7 @@ fi
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
+public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign"
if test "x$ac_cv_func_memalign" = xyes; then :
@@ -5420,26 +5756,6 @@ if test "x$ac_cv_func_valloc" = xyes; then :
fi
-# Check whether --enable-experimental was given.
-if test "${enable_experimental+set}" = set; then :
- enableval=$enable_experimental; if test "x$enable_experimental" = "xno" ; then
- enable_experimental="0"
-else
- enable_experimental="1"
-fi
-
-else
- enable_experimental="1"
-
-fi
-
-if test "x$enable_experimental" = "x1" ; then
- $as_echo "#define JEMALLOC_EXPERIMENTAL " >>confdefs.h
-
- public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
-fi
-
-
GCOV_FLAGS=
# Check whether --enable-code-coverage was given.
if test "${enable_code_coverage+set}" = set; then :
@@ -5572,6 +5888,7 @@ _ACEOF
fi
+
# Check whether --with-export was given.
if test "${with_export+set}" = set; then :
withval=$with_export; if test "x$with_export" = "xno"; then
@@ -5613,48 +5930,54 @@ install_suffix="$INSTALL_SUFFIX"
je_="je_"
-cfgoutputs_in="${srcroot}Makefile.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in"
+cfgoutputs_in="Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
+cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} test/test.sh.in"
+cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in"
cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} jemalloc.pc"
cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
cfgoutputs_out="${cfgoutputs_out} test/test.sh"
cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h"
cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} jemalloc.pc:jemalloc.pc.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in"
cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in"
-cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.txt"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh"
+cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
cfghdrs_out="include/jemalloc/jemalloc_defs.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
@@ -5672,8 +5995,8 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h"
cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h"
cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
# Check whether --enable-cc-silence was given.
if test "${enable_cc_silence+set}" = set; then :
@@ -5684,7 +6007,7 @@ else
fi
else
- enable_cc_silence="0"
+ enable_cc_silence="1"
fi
@@ -5709,6 +6032,10 @@ fi
if test "x$enable_debug" = "x1" ; then
$as_echo "#define JEMALLOC_DEBUG " >>confdefs.h
+fi
+if test "x$enable_debug" = "x1" ; then
+ $as_echo "#define JEMALLOC_DEBUG " >>confdefs.h
+
enable_ivsalloc="1"
fi
@@ -5969,9 +6296,9 @@ fi
done
if test "x$LUNWIND" = "x-lunwind" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lunwind" >&5
-$as_echo_n "checking for backtrace in -lunwind... " >&6; }
-if ${ac_cv_lib_unwind_backtrace+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for unw_backtrace in -lunwind" >&5
+$as_echo_n "checking for unw_backtrace in -lunwind... " >&6; }
+if ${ac_cv_lib_unwind_unw_backtrace+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
@@ -5985,27 +6312,27 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
#ifdef __cplusplus
extern "C"
#endif
-char backtrace ();
+char unw_backtrace ();
int
main ()
{
-return backtrace ();
+return unw_backtrace ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_unwind_backtrace=yes
+ ac_cv_lib_unwind_unw_backtrace=yes
else
- ac_cv_lib_unwind_backtrace=no
+ ac_cv_lib_unwind_unw_backtrace=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_backtrace" >&5
-$as_echo "$ac_cv_lib_unwind_backtrace" >&6; }
-if test "x$ac_cv_lib_unwind_backtrace" = xyes; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_unw_backtrace" >&5
+$as_echo "$ac_cv_lib_unwind_unw_backtrace" >&6; }
+if test "x$ac_cv_lib_unwind_unw_backtrace" = xyes; then :
LIBS="$LIBS $LUNWIND"
else
enable_prof_libunwind="0"
@@ -6168,11 +6495,6 @@ $as_echo_n "checking configured backtracing method... " >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5
$as_echo "$backtrace_method" >&6; }
if test "x$enable_prof" = "x1" ; then
- if test "x${force_tls}" = "x0" ; then
- as_fn_error $? "Heap profiling requires TLS" "$LINENO" 5;
- fi
- force_tls="1"
-
if test "x$abi" != "xpecoff"; then
LIBS="$LIBS -lm"
fi
@@ -6201,63 +6523,11 @@ if test "x$enable_tcache" = "x1" ; then
fi
-# Check whether --enable-mremap was given.
-if test "${enable_mremap+set}" = set; then :
- enableval=$enable_mremap; if test "x$enable_mremap" = "xno" ; then
- enable_mremap="0"
-else
- enable_mremap="1"
-fi
-
-else
- enable_mremap="0"
-
-fi
-
-if test "x$enable_mremap" = "x1" ; then
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mremap(...MREMAP_FIXED...) is compilable" >&5
-$as_echo_n "checking whether mremap(...MREMAP_FIXED...) is compilable... " >&6; }
-if ${je_cv_mremap_fixed+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-#define _GNU_SOURCE
-#include <sys/mman.h>
-
-int
-main ()
-{
-
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- je_cv_mremap_fixed=yes
-else
- je_cv_mremap_fixed=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mremap_fixed" >&5
-$as_echo "$je_cv_mremap_fixed" >&6; }
-
- if test "x${je_cv_mremap_fixed}" = "xno" ; then
- enable_mremap="0"
- fi
-fi
-if test "x$enable_mremap" = "x1" ; then
- $as_echo "#define JEMALLOC_MREMAP " >>confdefs.h
+if test "x${maps_coalesce}" = "x1" ; then
+ $as_echo "#define JEMALLOC_MAPS_COALESCE " >>confdefs.h
fi
-
# Check whether --enable-munmap was given.
if test "${enable_munmap+set}" = set; then :
enableval=$enable_munmap; if test "x$enable_munmap" = "xno" ; then
@@ -6277,19 +6547,7 @@ if test "x$enable_munmap" = "x1" ; then
fi
-# Check whether --enable-dss was given.
-if test "${enable_dss+set}" = set; then :
- enableval=$enable_dss; if test "x$enable_dss" = "xno" ; then
- enable_dss="0"
-else
- enable_dss="1"
-fi
-
-else
- enable_dss="0"
-
-fi
-
+have_dss="1"
ac_fn_c_check_func "$LINENO" "sbrk" "ac_cv_func_sbrk"
if test "x$ac_cv_func_sbrk" = xyes; then :
have_sbrk="1"
@@ -6298,24 +6556,20 @@ else
fi
if test "x$have_sbrk" = "x1" ; then
- if test "x$sbrk_deprecated" == "x1" ; then
+ if test "x$sbrk_deprecated" = "x1" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Disabling dss allocation because sbrk is deprecated" >&5
$as_echo "Disabling dss allocation because sbrk is deprecated" >&6; }
- enable_dss="0"
- else
- $as_echo "#define JEMALLOC_HAVE_SBRK " >>confdefs.h
-
+ have_dss="0"
fi
else
- enable_dss="0"
+ have_dss="0"
fi
-if test "x$enable_dss" = "x1" ; then
+if test "x$have_dss" = "x1" ; then
$as_echo "#define JEMALLOC_DSS " >>confdefs.h
fi
-
# Check whether --enable-fill was given.
if test "${enable_fill+set}" = set; then :
enableval=$enable_fill; if test "x$enable_fill" = "xno" ; then
@@ -6471,16 +6725,159 @@ if test "x$enable_xmalloc" = "x1" ; then
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking STATIC_PAGE_SHIFT" >&5
-$as_echo_n "checking STATIC_PAGE_SHIFT... " >&6; }
-if ${je_cv_static_page_shift+:} false; then :
+# Check whether --enable-cache-oblivious was given.
+if test "${enable_cache_oblivious+set}" = set; then :
+ enableval=$enable_cache_oblivious; if test "x$enable_cache_oblivious" = "xno" ; then
+ enable_cache_oblivious="0"
+else
+ enable_cache_oblivious="1"
+fi
+
+else
+ enable_cache_oblivious="1"
+
+fi
+
+if test "x$enable_cache_oblivious" = "x1" ; then
+ $as_echo "#define JEMALLOC_CACHE_OBLIVIOUS " >>confdefs.h
+
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5
+$as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; }
+if ${je_cv_gcc_builtin_ffsl+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <stdio.h>
+#include <strings.h>
+#include <string.h>
+
+int
+main ()
+{
+
+ {
+ int rv = __builtin_ffsl(0x08);
+ printf("%d\n", rv);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_gcc_builtin_ffsl=yes
+else
+ je_cv_gcc_builtin_ffsl=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_ffsl" >&5
+$as_echo "$je_cv_gcc_builtin_ffsl" >&6; }
+
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl" >>confdefs.h
+
+ $as_echo "#define JEMALLOC_INTERNAL_FFS __builtin_ffs" >>confdefs.h
+
+else
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
+$as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
+if ${je_cv_function_ffsl+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <stdio.h>
+ #include <strings.h>
+ #include <string.h>
+
+int
+main ()
+{
+
+ {
+ int rv = ffsl(0x08);
+ printf("%d\n", rv);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_function_ffsl=yes
+else
+ je_cv_function_ffsl=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
+$as_echo "$je_cv_function_ffsl" >&6; }
+
+ if test "x${je_cv_function_ffsl}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_INTERNAL_FFSL ffsl" >>confdefs.h
+
+ $as_echo "#define JEMALLOC_INTERNAL_FFS ffs" >>confdefs.h
+
+ else
+ as_fn_error $? "Cannot build without ffsl(3) or __builtin_ffsl()" "$LINENO" 5
+ fi
+fi
+
+
+# Check whether --with-lg_tiny_min was given.
+if test "${with_lg_tiny_min+set}" = set; then :
+ withval=$with_lg_tiny_min; LG_TINY_MIN="$with_lg_tiny_min"
+else
+ LG_TINY_MIN="3"
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define LG_TINY_MIN $LG_TINY_MIN
+_ACEOF
+
+
+
+# Check whether --with-lg_quantum was given.
+if test "${with_lg_quantum+set}" = set; then :
+ withval=$with_lg_quantum; LG_QUANTA="$with_lg_quantum"
+else
+ LG_QUANTA="3 4"
+fi
+
+if test "x$with_lg_quantum" != "x" ; then
+ cat >>confdefs.h <<_ACEOF
+#define LG_QUANTUM $with_lg_quantum
+_ACEOF
+
+fi
+
+
+# Check whether --with-lg_page was given.
+if test "${with_lg_page+set}" = set; then :
+ withval=$with_lg_page; LG_PAGE="$with_lg_page"
+else
+ LG_PAGE="detect"
+fi
+
+if test "x$LG_PAGE" = "xdetect"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking LG_PAGE" >&5
+$as_echo_n "checking LG_PAGE... " >&6; }
+if ${je_cv_lg_page+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
+ je_cv_lg_page=12
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -6510,7 +6907,7 @@ main ()
if (result == -1) {
return 1;
}
- result = ffsl(result) - 1;
+ result = JEMALLOC_INTERNAL_FFSL(result) - 1;
f = fopen("conftest.out", "w");
if (f == NULL) {
@@ -6526,32 +6923,58 @@ main ()
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
- je_cv_static_page_shift=`cat conftest.out`
+ je_cv_lg_page=`cat conftest.out`
else
- je_cv_static_page_shift=undefined
+ je_cv_lg_page=undefined
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_static_page_shift" >&5
-$as_echo "$je_cv_static_page_shift" >&6; }
-
-if test "x$je_cv_static_page_shift" != "xundefined"; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_lg_page" >&5
+$as_echo "$je_cv_lg_page" >&6; }
+fi
+if test "x${je_cv_lg_page}" != "x" ; then
+ LG_PAGE="${je_cv_lg_page}"
+fi
+if test "x${LG_PAGE}" != "xundefined" ; then
cat >>confdefs.h <<_ACEOF
-#define STATIC_PAGE_SHIFT $je_cv_static_page_shift
+#define LG_PAGE $LG_PAGE
_ACEOF
else
- as_fn_error $? "cannot determine value for STATIC_PAGE_SHIFT" "$LINENO" 5
+ as_fn_error $? "cannot determine value for LG_PAGE" "$LINENO" 5
+fi
+
+
+# Check whether --with-lg_page_sizes was given.
+if test "${with_lg_page_sizes+set}" = set; then :
+ withval=$with_lg_page_sizes; LG_PAGE_SIZES="$with_lg_page_sizes"
+else
+ LG_PAGE_SIZES="$LG_PAGE"
+fi
+
+
+
+# Check whether --with-lg_size_class_group was given.
+if test "${with_lg_size_class_group+set}" = set; then :
+ withval=$with_lg_size_class_group; LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"
+else
+ LG_SIZE_CLASS_GROUP="2"
fi
-if test -d "${srcroot}.git" ; then
- git describe --long --abbrev=40 > ${srcroot}VERSION
+if test ! -e "${objroot}VERSION" ; then
+ if test ! -e "${srcroot}VERSION" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5
+$as_echo "Missing VERSION file, and unable to generate it; creating bogus VERSION" >&6; }
+ echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+ else
+ cp ${srcroot}VERSION ${objroot}VERSION
+ fi
fi
-jemalloc_version=`cat ${srcroot}VERSION`
+jemalloc_version=`cat "${objroot}VERSION"`
jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $1}'`
jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $2}'`
jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $3}'`
@@ -6683,6 +7106,93 @@ fi
CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+SAVED_LIBS="${LIBS}"
+LIBS=
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_clock_gettime+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+ ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+ TESTLIBS="${LIBS}"
+fi
+
+
+LIBS="${SAVED_LIBS}"
+
+ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv"
+if test "x$ac_cv_func_secure_getenv" = xyes; then :
+ have_secure_getenv="1"
+else
+ have_secure_getenv="0"
+
+fi
+
+if test "x$have_secure_getenv" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_SECURE_GETENV " >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "issetugid" "ac_cv_func_issetugid"
+if test "x$ac_cv_func_issetugid" = xyes; then :
+ have_issetugid="1"
+else
+ have_issetugid="0"
+
+fi
+
+if test "x$have_issetugid" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_ISSETUGID " >>confdefs.h
+
+fi
+
ac_fn_c_check_func "$LINENO" "_malloc_thread_cleanup" "ac_cv_func__malloc_thread_cleanup"
if test "x$ac_cv_func__malloc_thread_cleanup" = xyes; then :
have__malloc_thread_cleanup="1"
@@ -6719,11 +7229,11 @@ else
fi
else
- enable_lazy_lock="0"
+ enable_lazy_lock=""
fi
-if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
+if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
$as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
enable_lazy_lock="1"
@@ -6796,6 +7306,8 @@ fi
fi
$as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h
+else
+ enable_lazy_lock="0"
fi
@@ -6808,19 +7320,22 @@ else
fi
else
- enable_tls="1"
+ enable_tls=""
fi
-if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing TLS to avoid allocator/threading bootstrap issues" >&5
+if test "x${enable_tls}" = "x" ; then
+ if test "x${force_tls}" = "x1" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing TLS to avoid allocator/threading bootstrap issues" >&5
$as_echo "Forcing TLS to avoid allocator/threading bootstrap issues" >&6; }
- enable_tls="1"
-fi
-if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no TLS to avoid allocator/threading bootstrap issues" >&5
+ enable_tls="1"
+ elif test "x${force_tls}" = "x0" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no TLS to avoid allocator/threading bootstrap issues" >&5
$as_echo "Forcing no TLS to avoid allocator/threading bootstrap issues" >&6; }
- enable_tls="0"
+ enable_tls="0"
+ else
+ enable_tls="1"
+ fi
fi
if test "x${enable_tls}" = "x1" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TLS" >&5
@@ -6851,56 +7366,69 @@ $as_echo "no" >&6; }
enable_tls="0"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+ enable_tls="0"
fi
if test "x${enable_tls}" = "x1" ; then
+ if test "x${force_tls}" = "x0" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: TLS enabled despite being marked unusable on this platform" >&5
+$as_echo "$as_me: WARNING: TLS enabled despite being marked unusable on this platform" >&2;}
+ fi
cat >>confdefs.h <<_ACEOF
#define JEMALLOC_TLS
_ACEOF
elif test "x${force_tls}" = "x1" ; then
- as_fn_error $? "Failed to configure TLS, which is mandatory for correct function" "$LINENO" 5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: TLS disabled despite being marked critical on this platform" >&5
+$as_echo "$as_me: WARNING: TLS disabled despite being marked critical on this platform" >&2;}
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
-$as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
-if ${je_cv_function_ffsl+:} false; then :
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C11 atomics is compilable" >&5
+$as_echo_n "checking whether C11 atomics is compilable... " >&6; }
+if ${je_cv_c11atomics+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <stdio.h>
-#include <strings.h>
-#include <string.h>
+#include <stdint.h>
+#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#else
+#error Atomics not available
+#endif
int
main ()
{
- {
- int rv = ffsl(0x08);
- printf("%d\n", rv);
- }
+ uint64_t *p = (uint64_t *)0;
+ uint64_t x = 1;
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ uint64_t r = atomic_fetch_add(a, x) + x;
+ return (r == 0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_function_ffsl=yes
+ je_cv_c11atomics=yes
else
- je_cv_function_ffsl=no
+ je_cv_c11atomics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
-$as_echo "$je_cv_function_ffsl" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_c11atomics" >&5
+$as_echo "$je_cv_c11atomics" >&6; }
+
+if test "x${je_cv_c11atomics}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_C11ATOMICS 1" >>confdefs.h
-if test "x${je_cv_function_ffsl}" != "xyes" ; then
- as_fn_error $? "Cannot build without ffsl(3)" "$LINENO" 5
fi
@@ -7002,6 +7530,46 @@ fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(2) is compilable" >&5
+$as_echo_n "checking whether madvise(2) is compilable... " >&6; }
+if ${je_cv_madvise+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ {
+ madvise((void *)0, 0, 0);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_madvise=yes
+else
+ je_cv_madvise=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madvise" >&5
+$as_echo "$je_cv_madvise" >&6; }
+
+if test "x${je_cv_madvise}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_MADVISE " >>confdefs.h
+
+fi
+
+
+
if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
@@ -7097,6 +7665,48 @@ $as_echo "$je_cv_sync_compare_and_swap_8" >&6; }
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
+$as_echo_n "checking for __builtin_clz... " >&6; }
+if ${je_cv_builtin_clz+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ {
+ unsigned x = 0;
+ int y = __builtin_clz(x);
+ }
+ {
+ unsigned long x = 0;
+ int y = __builtin_clzl(x);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_builtin_clz=yes
+else
+ je_cv_builtin_clz=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_builtin_clz" >&5
+$as_echo "$je_cv_builtin_clz" >&6; }
+
+if test "x${je_cv_builtin_clz}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_BUILTIN_CLZ " >>confdefs.h
+
+fi
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5
$as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; }
@@ -7160,8 +7770,6 @@ if test "x${enable_zone_allocator}" = "x1" ; then
if test "x${abi}" != "xmacho"; then
as_fn_error $? "--enable-zone-allocator is only supported on Darwin" "$LINENO" 5
fi
- $as_echo "#define JEMALLOC_IVSALLOC " >>confdefs.h
-
$as_echo "#define JEMALLOC_ZONE " >>confdefs.h
@@ -7175,7 +7783,7 @@ $as_echo_n "checking malloc zone version... " >&6; }
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 14 ? 1 : -1]
+static int foo[sizeof(malloc_zone_t) == sizeof(void *) * 14 ? 1 : -1]
;
return 0;
@@ -7191,7 +7799,7 @@ else
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 15 ? 1 : -1]
+static int foo[sizeof(malloc_zone_t) == sizeof(void *) * 15 ? 1 : -1]
;
return 0;
@@ -7207,7 +7815,7 @@ else
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 16 ? 1 : -1]
+static int foo[sizeof(malloc_zone_t) == sizeof(void *) * 16 ? 1 : -1]
;
return 0;
@@ -7221,7 +7829,7 @@ if ac_fn_c_try_compile "$LINENO"; then :
int
main ()
{
-static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 9 ? 1 : -1]
+static int foo[sizeof(malloc_introspection_t) == sizeof(void *) * 9 ? 1 : -1]
;
return 0;
@@ -7237,7 +7845,7 @@ else
int
main ()
{
-static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 13 ? 1 : -1]
+static int foo[sizeof(malloc_introspection_t) == sizeof(void *) * 13 ? 1 : -1]
;
return 0;
@@ -7260,7 +7868,7 @@ else
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 17 ? 1 : -1]
+static int foo[sizeof(malloc_zone_t) == sizeof(void *) * 17 ? 1 : -1]
;
return 0;
@@ -7276,7 +7884,7 @@ else
int
main ()
{
-static foo[sizeof(malloc_zone_t) > sizeof(void *) * 17 ? 1 : -1]
+static int foo[sizeof(malloc_zone_t) > sizeof(void *) * 17 ? 1 : -1]
;
return 0;
@@ -7316,6 +7924,131 @@ _ACEOF
fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether glibc malloc hook is compilable" >&5
+$as_echo_n "checking whether glibc malloc hook is compilable... " >&6; }
+if ${je_cv_glibc_malloc_hook+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <stddef.h>
+
+extern void (* __free_hook)(void *ptr);
+extern void *(* __malloc_hook)(size_t size);
+extern void *(* __realloc_hook)(void *ptr, size_t size);
+
+int
+main ()
+{
+
+ void *ptr = 0L;
+ if (__malloc_hook) ptr = __malloc_hook(1);
+ if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+ if (__free_hook && ptr) __free_hook(ptr);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_glibc_malloc_hook=yes
+else
+ je_cv_glibc_malloc_hook=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_glibc_malloc_hook" >&5
+$as_echo "$je_cv_glibc_malloc_hook" >&6; }
+
+if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_GLIBC_MALLOC_HOOK " >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether glibc memalign hook is compilable" >&5
+$as_echo_n "checking whether glibc memalign hook is compilable... " >&6; }
+if ${je_cv_glibc_memalign_hook+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <stddef.h>
+
+extern void *(* __memalign_hook)(size_t alignment, size_t size);
+
+int
+main ()
+{
+
+ void *ptr = 0L;
+ if (__memalign_hook) ptr = __memalign_hook(16, 7);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_glibc_memalign_hook=yes
+else
+ je_cv_glibc_memalign_hook=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_glibc_memalign_hook" >&5
+$as_echo "$je_cv_glibc_memalign_hook" >&6; }
+
+if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_GLIBC_MEMALIGN_HOOK " >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads adaptive mutexes is compilable" >&5
+$as_echo_n "checking whether pthreads adaptive mutexes is compilable... " >&6; }
+if ${je_cv_pthread_mutex_adaptive_np+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <pthread.h>
+
+int
+main ()
+{
+
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ pthread_mutexattr_destroy(&attr);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_pthread_mutex_adaptive_np=yes
+else
+ je_cv_pthread_mutex_adaptive_np=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_mutex_adaptive_np" >&5
+$as_echo "$je_cv_pthread_mutex_adaptive_np" >&6; }
+
+if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP " >>confdefs.h
+
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5
$as_echo_n "checking for stdbool.h that conforms to C99... " >&6; }
if ${ac_cv_header_stdbool_h+:} false; then :
@@ -7440,7 +8173,7 @@ ac_config_headers="$ac_config_headers $cfghdrs_tup"
-ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc.sh"
+ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc-config bin/jemalloc.sh bin/jeprof"
@@ -8158,8 +8891,13 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
objroot="${objroot}"
+ SHELL="${SHELL}"
srcdir="${srcdir}"
objroot="${objroot}"
+ LG_QUANTA="${LG_QUANTA}"
+ LG_TINY_MIN=${LG_TINY_MIN}
+ LG_PAGE_SIZES="${LG_PAGE_SIZES}"
+ LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP}
srcdir="${srcdir}"
@@ -8205,7 +8943,9 @@ do
"$cfghdrs_tup") CONFIG_HEADERS="$CONFIG_HEADERS $cfghdrs_tup" ;;
"$cfgoutputs_tup") CONFIG_FILES="$CONFIG_FILES $cfgoutputs_tup" ;;
"config.stamp") CONFIG_FILES="$CONFIG_FILES config.stamp" ;;
+ "bin/jemalloc-config") CONFIG_FILES="$CONFIG_FILES bin/jemalloc-config" ;;
"bin/jemalloc.sh") CONFIG_FILES="$CONFIG_FILES bin/jemalloc.sh" ;;
+ "bin/jeprof") CONFIG_FILES="$CONFIG_FILES bin/jeprof" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
@@ -8795,7 +9535,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
;;
"include/jemalloc/internal/size_classes.h":C)
mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+ "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
;;
"include/jemalloc/jemalloc_protos_jet.h":C)
mkdir -p "${objroot}include/jemalloc"
@@ -8864,18 +9604,22 @@ $as_echo "jemalloc version : ${jemalloc_version}" >&6; }
$as_echo "library revision : ${rev}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
$as_echo "" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIG : ${CONFIG}" >&5
+$as_echo "CONFIG : ${CONFIG}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CC : ${CC}" >&5
$as_echo "CC : ${CC}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5
-$as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS : ${CFLAGS}" >&5
$as_echo "CFLAGS : ${CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5
+$as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS : ${LDFLAGS}" >&5
$as_echo "LDFLAGS : ${LDFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&5
$as_echo "EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS : ${LIBS}" >&5
$as_echo "LIBS : ${LIBS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: TESTLIBS : ${TESTLIBS}" >&5
+$as_echo "TESTLIBS : ${TESTLIBS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA : ${RPATH_EXTRA}" >&5
$as_echo "RPATH_EXTRA : ${RPATH_EXTRA}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
@@ -8890,12 +9634,12 @@ $as_echo "" >&6; }
$as_echo "PREFIX : ${PREFIX}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: BINDIR : ${BINDIR}" >&5
$as_echo "BINDIR : ${BINDIR}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DATADIR : ${DATADIR}" >&5
+$as_echo "DATADIR : ${DATADIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: INCLUDEDIR : ${INCLUDEDIR}" >&5
$as_echo "INCLUDEDIR : ${INCLUDEDIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBDIR : ${LIBDIR}" >&5
$as_echo "LIBDIR : ${LIBDIR}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DATADIR : ${DATADIR}" >&5
-$as_echo "DATADIR : ${DATADIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: MANDIR : ${MANDIR}" >&5
$as_echo "MANDIR : ${MANDIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
@@ -8920,8 +9664,6 @@ $as_echo " : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; }
$as_echo "install_suffix : ${install_suffix}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen : ${enable_autogen}" >&5
$as_echo "autogen : ${enable_autogen}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: experimental : ${enable_experimental}" >&5
-$as_echo "experimental : ${enable_experimental}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence : ${enable_cc_silence}" >&5
$as_echo "cc-silence : ${enable_cc_silence}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: debug : ${enable_debug}" >&5
@@ -8948,15 +9690,13 @@ $as_echo "utrace : ${enable_utrace}" >&6; }
$as_echo "valgrind : ${enable_valgrind}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: xmalloc : ${enable_xmalloc}" >&5
$as_echo "xmalloc : ${enable_xmalloc}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: mremap : ${enable_mremap}" >&5
-$as_echo "mremap : ${enable_mremap}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: munmap : ${enable_munmap}" >&5
$as_echo "munmap : ${enable_munmap}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: dss : ${enable_dss}" >&5
-$as_echo "dss : ${enable_dss}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: lazy_lock : ${enable_lazy_lock}" >&5
$as_echo "lazy_lock : ${enable_lazy_lock}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tls : ${enable_tls}" >&5
$as_echo "tls : ${enable_tls}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cache-oblivious : ${enable_cache_oblivious}" >&5
+$as_echo "cache-oblivious : ${enable_cache_oblivious}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ===============================================================================" >&5
$as_echo "===============================================================================" >&6; }
diff --git a/deps/jemalloc/configure.ac b/deps/jemalloc/configure.ac
index 4de81dc1d..7a1290e0d 100644
--- a/deps/jemalloc/configure.ac
+++ b/deps/jemalloc/configure.ac
@@ -43,8 +43,11 @@ AC_CACHE_CHECK([whether $1 is compilable],
dnl ============================================================================
+CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
+AC_SUBST([CONFIG])
+
dnl Library revision.
-rev=1
+rev=2
AC_SUBST([rev])
srcroot=$srcdir
@@ -134,6 +137,7 @@ if test "x$CFLAGS" = "x" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
fi
JE_CFLAGS_APPEND([-Wall])
+ JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
JE_CFLAGS_APPEND([-pipe])
JE_CFLAGS_APPEND([-g3])
elif test "x$je_cv_msvc" = "xyes" ; then
@@ -141,7 +145,8 @@ if test "x$CFLAGS" = "x" ; then
JE_CFLAGS_APPEND([-Zi])
JE_CFLAGS_APPEND([-MT])
JE_CFLAGS_APPEND([-W3])
- CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
+ JE_CFLAGS_APPEND([-FS])
+ CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
fi
fi
dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
@@ -155,6 +160,10 @@ if test "x${ac_cv_big_endian}" = "x1" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
fi
+if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
+ CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
+fi
+
AC_CHECK_SIZEOF([void *])
if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
LG_SIZEOF_PTR=3
@@ -201,23 +210,14 @@ AC_CANONICAL_HOST
dnl CPU-specific settings.
CPU_SPINWAIT=""
case "${host_cpu}" in
- i[[345]]86)
- ;;
i686|x86_64)
- JE_COMPILABLE([pause instruction], [],
- [[__asm__ volatile("pause"); return 0;]],
- [je_cv_pause])
+ AC_CACHE_VAL([je_cv_pause],
+ [JE_COMPILABLE([pause instruction], [],
+ [[__asm__ volatile("pause"); return 0;]],
+ [je_cv_pause])])
if test "x${je_cv_pause}" = "xyes" ; then
CPU_SPINWAIT='__asm__ volatile("pause")'
fi
- dnl emmintrin.h fails to compile unless MMX, SSE, and SSE2 are
- dnl supported.
- JE_COMPILABLE([SSE2 intrinsics], [
-#include <emmintrin.h>
-], [], [je_cv_sse2])
- if test "x${je_cv_sse2}" = "xyes" ; then
- AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ])
- fi
;;
powerpc)
AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
@@ -258,9 +258,9 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
dnl definitions need to be seen before any headers are included, which is a pain
dnl to make happen otherwise.
default_munmap="1"
-JEMALLOC_USABLE_SIZE_CONST="const"
+maps_coalesce="1"
case "${host}" in
- *-*-darwin*)
+ *-*-darwin* | *-*-ios*)
CFLAGS="$CFLAGS"
abi="macho"
AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
@@ -269,7 +269,7 @@ case "${host}" in
so="dylib"
importlib="${so}"
force_tls="0"
- DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+ DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
SOREV="${rev}.${so}"
sbrk_deprecated="1"
;;
@@ -279,6 +279,22 @@ case "${host}" in
AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
force_lazy_lock="1"
;;
+ *-*-dragonfly*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ ;;
+ *-*-openbsd*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ force_tls="0"
+ ;;
+ *-*-bitrig*)
+ CFLAGS="$CFLAGS"
+ abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ ;;
*-*-linux*)
CFLAGS="$CFLAGS"
CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
@@ -286,7 +302,7 @@ case "${host}" in
AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
- JEMALLOC_USABLE_SIZE_CONST=""
+ AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
default_munmap="0"
;;
*-*-netbsd*)
@@ -322,9 +338,11 @@ case "${host}" in
fi
abi="xcoff"
;;
- *-*-mingw*)
+ *-*-mingw* | *-*-cygwin*)
abi="pecoff"
force_tls="0"
+ force_lazy_lock="1"
+ maps_coalesce="0"
RPATH=""
so="dll"
if test "x$je_cv_msvc" = "xyes" ; then
@@ -351,6 +369,22 @@ case "${host}" in
abi="elf"
;;
esac
+
+JEMALLOC_USABLE_SIZE_CONST=const
+AC_CHECK_HEADERS([malloc.h], [
+ AC_MSG_CHECKING([whether malloc_usable_size definition can use const argument])
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+ [#include <malloc.h>
+ #include <stddef.h>
+ size_t malloc_usable_size(const void *ptr);
+ ],
+ [])],[
+ AC_MSG_RESULT([yes])
+ ],[
+ JEMALLOC_USABLE_SIZE_CONST=
+ AC_MSG_RESULT([no])
+ ])
+])
AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST])
AC_SUBST([abi])
AC_SUBST([RPATH])
@@ -387,7 +421,7 @@ SAVED_CFLAGS="${CFLAGS}"
JE_CFLAGS_APPEND([-Werror])
JE_COMPILABLE([tls_model attribute], [],
[static __thread int
- __attribute__((tls_model("initial-exec"))) foo;
+ __attribute__((tls_model("initial-exec"), unused)) foo;
foo = 0;],
[je_cv_tls_model])
CFLAGS="${SAVED_CFLAGS}"
@@ -397,6 +431,36 @@ if test "x${je_cv_tls_model}" = "xyes" ; then
else
AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
fi
+dnl Check for alloc_size attribute support.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
+ [void *foo(size_t size) __attribute__((alloc_size(1)));],
+ [je_cv_alloc_size])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_alloc_size}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
+fi
+dnl Check for format(gnu_printf, ...) attribute support.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
+ [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
+ [je_cv_format_gnu_printf])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
+fi
+dnl Check for format(printf, ...) attribute support.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
+ [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
+ [je_cv_format_printf])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_format_printf}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
+fi
dnl Support optional additions to rpath.
AC_ARG_WITH([rpath],
@@ -428,7 +492,7 @@ AC_PROG_RANLIB
AC_PATH_PROG([LD], [ld], [false], [$PATH])
AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
+public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
dnl Check for allocator-related functions that should be wrapped.
AC_CHECK_FUNC([memalign],
@@ -438,24 +502,6 @@ AC_CHECK_FUNC([valloc],
[AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
public_syms="${public_syms} valloc"])
-dnl Support the experimental API by default.
-AC_ARG_ENABLE([experimental],
- [AS_HELP_STRING([--disable-experimental],
- [Disable support for the experimental API])],
-[if test "x$enable_experimental" = "xno" ; then
- enable_experimental="0"
-else
- enable_experimental="1"
-fi
-],
-[enable_experimental="1"]
-)
-if test "x$enable_experimental" = "x1" ; then
- AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ])
- public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
-fi
-AC_SUBST([enable_experimental])
-
dnl Do not compute test code coverage by default.
GCOV_FLAGS=
AC_ARG_ENABLE([code-coverage],
@@ -501,6 +547,7 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
fi
+AC_SUBST([JEMALLOC_CPREFIX])
AC_ARG_WITH([export],
[AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
@@ -533,48 +580,54 @@ dnl jemalloc_protos_jet.h easy.
je_="je_"
AC_SUBST([je_])
-cfgoutputs_in="${srcroot}Makefile.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in"
+cfgoutputs_in="Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
+cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} test/test.sh.in"
+cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in"
cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} jemalloc.pc"
cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
cfgoutputs_out="${cfgoutputs_out} test/test.sh"
cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h"
cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} jemalloc.pc:jemalloc.pc.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in"
cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in"
-cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.txt"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh"
+cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
cfghdrs_out="include/jemalloc/jemalloc_defs.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
@@ -592,21 +645,20 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h"
cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h"
cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
-dnl Do not silence irrelevant compiler warnings by default, since enabling this
-dnl option incurs a performance penalty.
+dnl Silence irrelevant compiler warnings by default.
AC_ARG_ENABLE([cc-silence],
- [AS_HELP_STRING([--enable-cc-silence],
- [Silence irrelevant compiler warnings])],
+ [AS_HELP_STRING([--disable-cc-silence],
+ [Do not silence irrelevant compiler warnings])],
[if test "x$enable_cc_silence" = "xno" ; then
enable_cc_silence="0"
else
enable_cc_silence="1"
fi
],
-[enable_cc_silence="0"]
+[enable_cc_silence="1"]
)
if test "x$enable_cc_silence" = "x1" ; then
AC_DEFINE([JEMALLOC_CC_SILENCE], [ ])
@@ -614,7 +666,8 @@ fi
dnl Do not compile with debugging by default.
AC_ARG_ENABLE([debug],
- [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])],
+ [AS_HELP_STRING([--enable-debug],
+ [Build debugging code (implies --enable-ivsalloc)])],
[if test "x$enable_debug" = "xno" ; then
enable_debug="0"
else
@@ -625,13 +678,17 @@ fi
)
if test "x$enable_debug" = "x1" ; then
AC_DEFINE([JEMALLOC_DEBUG], [ ])
+fi
+if test "x$enable_debug" = "x1" ; then
+ AC_DEFINE([JEMALLOC_DEBUG], [ ])
enable_ivsalloc="1"
fi
AC_SUBST([enable_debug])
dnl Do not validate pointers by default.
AC_ARG_ENABLE([ivsalloc],
- [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])],
+ [AS_HELP_STRING([--enable-ivsalloc],
+ [Validate pointers passed through the public API])],
[if test "x$enable_ivsalloc" = "xno" ; then
enable_ivsalloc="0"
else
@@ -721,7 +778,7 @@ fi,
if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
if test "x$LUNWIND" = "x-lunwind" ; then
- AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+ AC_CHECK_LIB([unwind], [unw_backtrace], [LIBS="$LIBS $LUNWIND"],
[enable_prof_libunwind="0"])
else
LIBS="$LIBS $LUNWIND"
@@ -782,11 +839,6 @@ fi
AC_MSG_CHECKING([configured backtracing method])
AC_MSG_RESULT([$backtrace_method])
if test "x$enable_prof" = "x1" ; then
- if test "x${force_tls}" = "x0" ; then
- AC_MSG_ERROR([Heap profiling requires TLS]);
- fi
- force_tls="1"
-
if test "x$abi" != "xpecoff"; then
dnl Heap profiling uses the log(3) function.
LIBS="$LIBS -lm"
@@ -812,32 +864,11 @@ if test "x$enable_tcache" = "x1" ; then
fi
AC_SUBST([enable_tcache])
-dnl Disable mremap() for huge realloc() by default.
-AC_ARG_ENABLE([mremap],
- [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])],
-[if test "x$enable_mremap" = "xno" ; then
- enable_mremap="0"
-else
- enable_mremap="1"
+dnl Indicate whether adjacent virtual memory mappings automatically coalesce
+dnl (and fragment on demand).
+if test "x${maps_coalesce}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
fi
-],
-[enable_mremap="0"]
-)
-if test "x$enable_mremap" = "x1" ; then
- JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
-#define _GNU_SOURCE
-#include <sys/mman.h>
-], [
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
-], [je_cv_mremap_fixed])
- if test "x${je_cv_mremap_fixed}" = "xno" ; then
- enable_mremap="0"
- fi
-fi
-if test "x$enable_mremap" = "x1" ; then
- AC_DEFINE([JEMALLOC_MREMAP], [ ])
-fi
-AC_SUBST([enable_mremap])
dnl Enable VM deallocation via munmap() by default.
AC_ARG_ENABLE([munmap],
@@ -855,34 +886,22 @@ if test "x$enable_munmap" = "x1" ; then
fi
AC_SUBST([enable_munmap])
-dnl Do not enable allocation from DSS by default.
-AC_ARG_ENABLE([dss],
- [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])],
-[if test "x$enable_dss" = "xno" ; then
- enable_dss="0"
-else
- enable_dss="1"
-fi
-],
-[enable_dss="0"]
-)
+dnl Enable allocation from DSS if supported by the OS.
+have_dss="1"
dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support.
AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"])
if test "x$have_sbrk" = "x1" ; then
- if test "x$sbrk_deprecated" == "x1" ; then
+ if test "x$sbrk_deprecated" = "x1" ; then
AC_MSG_RESULT([Disabling dss allocation because sbrk is deprecated])
- enable_dss="0"
- else
- AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ])
+ have_dss="0"
fi
else
- enable_dss="0"
+ have_dss="0"
fi
-if test "x$enable_dss" = "x1" ; then
+if test "x$have_dss" = "x1" ; then
AC_DEFINE([JEMALLOC_DSS], [ ])
fi
-AC_SUBST([enable_dss])
dnl Support the junk/zero filling option by default.
AC_ARG_ENABLE([fill],
@@ -974,8 +993,83 @@ if test "x$enable_xmalloc" = "x1" ; then
fi
AC_SUBST([enable_xmalloc])
-AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
- [je_cv_static_page_shift],
+dnl Support cache-oblivious allocation alignment by default.
+AC_ARG_ENABLE([cache-oblivious],
+ [AS_HELP_STRING([--disable-cache-oblivious],
+ [Disable support for cache-oblivious allocation alignment])],
+[if test "x$enable_cache_oblivious" = "xno" ; then
+ enable_cache_oblivious="0"
+else
+ enable_cache_oblivious="1"
+fi
+],
+[enable_cache_oblivious="1"]
+)
+if test "x$enable_cache_oblivious" = "x1" ; then
+ AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
+fi
+AC_SUBST([enable_cache_oblivious])
+
+dnl ============================================================================
+dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found.
+dnl One of those two functions should (theoretically) exist on all platforms
+dnl that jemalloc currently has a chance of functioning on without modification.
+dnl We additionally assume ffs() or __builtin_ffs() are defined if
+dnl ffsl() or __builtin_ffsl() are defined, respectively.
+JE_COMPILABLE([a program using __builtin_ffsl], [
+#include <stdio.h>
+#include <strings.h>
+#include <string.h>
+], [
+ {
+ int rv = __builtin_ffsl(0x08);
+ printf("%d\n", rv);
+ }
+], [je_cv_gcc_builtin_ffsl])
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
+else
+ JE_COMPILABLE([a program using ffsl], [
+ #include <stdio.h>
+ #include <strings.h>
+ #include <string.h>
+ ], [
+ {
+ int rv = ffsl(0x08);
+ printf("%d\n", rv);
+ }
+ ], [je_cv_function_ffsl])
+ if test "x${je_cv_function_ffsl}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
+ else
+ AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
+ fi
+fi
+
+AC_ARG_WITH([lg_tiny_min],
+ [AS_HELP_STRING([--with-lg-tiny-min=<lg-tiny-min>],
+ [Base 2 log of minimum tiny size class to support])],
+ [LG_TINY_MIN="$with_lg_tiny_min"],
+ [LG_TINY_MIN="3"])
+AC_DEFINE_UNQUOTED([LG_TINY_MIN], [$LG_TINY_MIN])
+
+AC_ARG_WITH([lg_quantum],
+ [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
+ [Base 2 log of minimum allocation alignment])],
+ [LG_QUANTA="$with_lg_quantum"],
+ [LG_QUANTA="3 4"])
+if test "x$with_lg_quantum" != "x" ; then
+ AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
+fi
+
+AC_ARG_WITH([lg_page],
+ [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
+ [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+if test "x$LG_PAGE" = "xdetect"; then
+ AC_CACHE_CHECK([LG_PAGE],
+ [je_cv_lg_page],
AC_RUN_IFELSE([AC_LANG_PROGRAM(
[[
#include <strings.h>
@@ -1000,7 +1094,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
if (result == -1) {
return 1;
}
- result = ffsl(result) - 1;
+ result = JEMALLOC_INTERNAL_FFSL(result) - 1;
f = fopen("conftest.out", "w");
if (f == NULL) {
@@ -1011,24 +1105,65 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
return 0;
]])],
- [je_cv_static_page_shift=`cat conftest.out`],
- [je_cv_static_page_shift=undefined]))
-
-if test "x$je_cv_static_page_shift" != "xundefined"; then
- AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift])
+ [je_cv_lg_page=`cat conftest.out`],
+ [je_cv_lg_page=undefined],
+ [je_cv_lg_page=12]))
+fi
+if test "x${je_cv_lg_page}" != "x" ; then
+ LG_PAGE="${je_cv_lg_page}"
+fi
+if test "x${LG_PAGE}" != "xundefined" ; then
+ AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
else
- AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT])
+ AC_MSG_ERROR([cannot determine value for LG_PAGE])
fi
+AC_ARG_WITH([lg_page_sizes],
+ [AS_HELP_STRING([--with-lg-page-sizes=<lg-page-sizes>],
+ [Base 2 logs of system page sizes to support])],
+ [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"])
+
+AC_ARG_WITH([lg_size_class_group],
+ [AS_HELP_STRING([--with-lg-size-class-group=<lg-size-class-group>],
+ [Base 2 log of size classes per doubling])],
+ [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"],
+ [LG_SIZE_CLASS_GROUP="2"])
+
dnl ============================================================================
dnl jemalloc configuration.
dnl
-dnl Set VERSION if source directory has an embedded git repository.
-if test -d "${srcroot}.git" ; then
- git describe --long --abbrev=40 > ${srcroot}VERSION
+dnl Set VERSION if source directory is inside a git repository.
+if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+ dnl Pattern globs aren't powerful enough to match both single- and
+ dnl double-digit version numbers, so iterate over patterns to support up to
+ dnl version 99.99.99 without any accidental matches.
+ rm -f "${objroot}VERSION"
+ for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+ '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+ if test ! -e "${objroot}VERSION" ; then
+ (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+ if test $? -eq 0 ; then
+ mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+ break
+ fi
+ fi
+ done
fi
-jemalloc_version=`cat ${srcroot}VERSION`
+rm -f "${objroot}VERSION.tmp"
+if test ! -e "${objroot}VERSION" ; then
+ if test ! -e "${srcroot}VERSION" ; then
+ AC_MSG_RESULT(
+ [Missing VERSION file, and unable to generate it; creating bogus VERSION])
+ echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+ else
+ cp ${srcroot}VERSION ${objroot}VERSION
+ fi
+fi
+jemalloc_version=`cat "${objroot}VERSION"`
jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
@@ -1055,6 +1190,32 @@ fi
CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+dnl Check whether clock_gettime(2) is in libc or librt. This function is only
+dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
+SAVED_LIBS="${LIBS}"
+LIBS=
+AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
+AC_SUBST([TESTLIBS])
+LIBS="${SAVED_LIBS}"
+
+dnl Check if the GNU-specific secure_getenv function exists.
+AC_CHECK_FUNC([secure_getenv],
+ [have_secure_getenv="1"],
+ [have_secure_getenv="0"]
+ )
+if test "x$have_secure_getenv" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
+fi
+
+dnl Check if the Solaris/BSD issetugid function exists.
+AC_CHECK_FUNC([issetugid],
+ [have_issetugid="1"],
+ [have_issetugid="0"]
+ )
+if test "x$have_issetugid" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ])
+fi
+
dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use
dnl it rather than pthreads TSD cleanup functions to support cleanup during
dnl thread exit, in order to avoid pthreads library recursion during
@@ -1089,9 +1250,9 @@ else
enable_lazy_lock="1"
fi
],
-[enable_lazy_lock="0"]
+[enable_lazy_lock=""]
)
-if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
+if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
enable_lazy_lock="1"
fi
@@ -1104,6 +1265,8 @@ if test "x$enable_lazy_lock" = "x1" ; then
])
fi
AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+else
+ enable_lazy_lock="0"
fi
AC_SUBST([enable_lazy_lock])
@@ -1115,15 +1278,18 @@ else
enable_tls="1"
fi
,
-enable_tls="1"
+enable_tls=""
)
-if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
- AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
- enable_tls="1"
-fi
-if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
- AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
- enable_tls="0"
+if test "x${enable_tls}" = "x" ; then
+ if test "x${force_tls}" = "x1" ; then
+ AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
+ enable_tls="1"
+ elif test "x${force_tls}" = "x0" ; then
+ AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
+ enable_tls="0"
+ else
+ enable_tls="1"
+ fi
fi
if test "x${enable_tls}" = "x1" ; then
AC_MSG_CHECKING([for TLS])
@@ -1138,30 +1304,38 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
AC_MSG_RESULT([yes]),
AC_MSG_RESULT([no])
enable_tls="0")
+else
+ enable_tls="0"
fi
AC_SUBST([enable_tls])
if test "x${enable_tls}" = "x1" ; then
+ if test "x${force_tls}" = "x0" ; then
+ AC_MSG_WARN([TLS enabled despite being marked unusable on this platform])
+ fi
AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
elif test "x${force_tls}" = "x1" ; then
- AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function])
+ AC_MSG_WARN([TLS disabled despite being marked critical on this platform])
fi
dnl ============================================================================
-dnl Check for ffsl(3), and fail if not found. This function exists on all
-dnl platforms that jemalloc currently has a chance of functioning on without
-dnl modification.
-JE_COMPILABLE([a program using ffsl], [
-#include <stdio.h>
-#include <strings.h>
-#include <string.h>
+dnl Check for C11 atomics.
+
+JE_COMPILABLE([C11 atomics], [
+#include <stdint.h>
+#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#else
+#error Atomics not available
+#endif
], [
- {
- int rv = ffsl(0x08);
- printf("%d\n", rv);
- }
-], [je_cv_function_ffsl])
-if test "x${je_cv_function_ffsl}" != "xyes" ; then
- AC_MSG_ERROR([Cannot build without ffsl(3)])
+ uint64_t *p = (uint64_t *)0;
+ uint64_t x = 1;
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ uint64_t r = atomic_fetch_add(a, x) + x;
+ return (r == 0);
+], [je_cv_c11atomics])
+if test "x${je_cv_c11atomics}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_C11ATOMICS])
fi
dnl ============================================================================
@@ -1210,6 +1384,20 @@ if test "x${je_cv_osatomic}" = "xyes" ; then
fi
dnl ============================================================================
+dnl Check for madvise(2).
+
+JE_COMPILABLE([madvise(2)], [
+#include <sys/mman.h>
+], [
+ {
+ madvise((void *)0, 0, 0);
+ }
+], [je_cv_madvise])
+if test "x${je_cv_madvise}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+fi
+
+dnl ============================================================================
dnl Check whether __sync_{add,sub}_and_fetch() are available despite
dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@@ -1244,6 +1432,29 @@ if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
fi
dnl ============================================================================
+dnl Check for __builtin_clz() and __builtin_clzl().
+
+AC_CACHE_CHECK([for __builtin_clz],
+ [je_cv_builtin_clz],
+ [AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+ [
+ {
+ unsigned x = 0;
+ int y = __builtin_clz(x);
+ }
+ {
+ unsigned long x = 0;
+ int y = __builtin_clzl(x);
+ }
+ ])],
+ [je_cv_builtin_clz=yes],
+ [je_cv_builtin_clz=no])])
+
+if test "x${je_cv_builtin_clz}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+fi
+
+dnl ============================================================================
dnl Check for spinlock(3) operations as provided on Darwin.
JE_COMPILABLE([Darwin OSSpin*()], [
@@ -1281,7 +1492,6 @@ if test "x${enable_zone_allocator}" = "x1" ; then
if test "x${abi}" != "xmacho"; then
AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
fi
- AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
AC_DEFINE([JEMALLOC_ZONE], [ ])
dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
@@ -1291,7 +1501,7 @@ if test "x${enable_zone_allocator}" = "x1" ; then
AC_DEFUN([JE_ZONE_PROGRAM],
[AC_LANG_PROGRAM(
[#include <malloc/malloc.h>],
- [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
+ [static int foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
)])
AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[
@@ -1317,6 +1527,49 @@ if test "x${enable_zone_allocator}" = "x1" ; then
fi
dnl ============================================================================
+dnl Check for glibc malloc hooks
+
+JE_COMPILABLE([glibc malloc hook], [
+#include <stddef.h>
+
+extern void (* __free_hook)(void *ptr);
+extern void *(* __malloc_hook)(size_t size);
+extern void *(* __realloc_hook)(void *ptr, size_t size);
+], [
+ void *ptr = 0L;
+ if (__malloc_hook) ptr = __malloc_hook(1);
+ if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+ if (__free_hook && ptr) __free_hook(ptr);
+], [je_cv_glibc_malloc_hook])
+if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+fi
+
+JE_COMPILABLE([glibc memalign hook], [
+#include <stddef.h>
+
+extern void *(* __memalign_hook)(size_t alignment, size_t size);
+], [
+ void *ptr = 0L;
+ if (__memalign_hook) ptr = __memalign_hook(16, 7);
+], [je_cv_glibc_memalign_hook])
+if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+fi
+
+JE_COMPILABLE([pthreads adaptive mutexes], [
+#include <pthread.h>
+], [
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ pthread_mutexattr_destroy(&attr);
+], [je_cv_pthread_mutex_adaptive_np])
+if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
+fi
+
+dnl ============================================================================
dnl Check for typedefs, structures, and compiler characteristics.
AC_HEADER_STDBOOL
@@ -1376,10 +1629,15 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
])
AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+ "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
], [
+ SHELL="${SHELL}"
srcdir="${srcdir}"
objroot="${objroot}"
+ LG_QUANTA="${LG_QUANTA}"
+ LG_TINY_MIN=${LG_TINY_MIN}
+ LG_PAGE_SIZES="${LG_PAGE_SIZES}"
+ LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP}
])
AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
mkdir -p "${objroot}include/jemalloc"
@@ -1426,7 +1684,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup])
dnl ============================================================================
dnl Generate outputs.
-AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh])
+AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc-config bin/jemalloc.sh bin/jeprof])
AC_SUBST([cfgoutputs_in])
AC_SUBST([cfgoutputs_out])
AC_OUTPUT
@@ -1437,12 +1695,14 @@ AC_MSG_RESULT([=================================================================
AC_MSG_RESULT([jemalloc version : ${jemalloc_version}])
AC_MSG_RESULT([library revision : ${rev}])
AC_MSG_RESULT([])
+AC_MSG_RESULT([CONFIG : ${CONFIG}])
AC_MSG_RESULT([CC : ${CC}])
-AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}])
AC_MSG_RESULT([CFLAGS : ${CFLAGS}])
+AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}])
AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}])
AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}])
AC_MSG_RESULT([LIBS : ${LIBS}])
+AC_MSG_RESULT([TESTLIBS : ${TESTLIBS}])
AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}])
AC_MSG_RESULT([])
AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}])
@@ -1450,9 +1710,9 @@ AC_MSG_RESULT([XSLROOT : ${XSLROOT}])
AC_MSG_RESULT([])
AC_MSG_RESULT([PREFIX : ${PREFIX}])
AC_MSG_RESULT([BINDIR : ${BINDIR}])
+AC_MSG_RESULT([DATADIR : ${DATADIR}])
AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}])
AC_MSG_RESULT([LIBDIR : ${LIBDIR}])
-AC_MSG_RESULT([DATADIR : ${DATADIR}])
AC_MSG_RESULT([MANDIR : ${MANDIR}])
AC_MSG_RESULT([])
AC_MSG_RESULT([srcroot : ${srcroot}])
@@ -1465,7 +1725,6 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}])
AC_MSG_RESULT([install_suffix : ${install_suffix}])
AC_MSG_RESULT([autogen : ${enable_autogen}])
-AC_MSG_RESULT([experimental : ${enable_experimental}])
AC_MSG_RESULT([cc-silence : ${enable_cc_silence}])
AC_MSG_RESULT([debug : ${enable_debug}])
AC_MSG_RESULT([code-coverage : ${enable_code_coverage}])
@@ -1479,9 +1738,8 @@ AC_MSG_RESULT([fill : ${enable_fill}])
AC_MSG_RESULT([utrace : ${enable_utrace}])
AC_MSG_RESULT([valgrind : ${enable_valgrind}])
AC_MSG_RESULT([xmalloc : ${enable_xmalloc}])
-AC_MSG_RESULT([mremap : ${enable_mremap}])
AC_MSG_RESULT([munmap : ${enable_munmap}])
-AC_MSG_RESULT([dss : ${enable_dss}])
AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}])
AC_MSG_RESULT([tls : ${enable_tls}])
+AC_MSG_RESULT([cache-oblivious : ${enable_cache_oblivious}])
AC_MSG_RESULT([===============================================================================])
diff --git a/deps/jemalloc/doc/jemalloc.3 b/deps/jemalloc/doc/jemalloc.3
index d04fbb498..2e6b2c0e8 100644
--- a/deps/jemalloc/doc/jemalloc.3
+++ b/deps/jemalloc/doc/jemalloc.3
@@ -2,12 +2,12 @@
.\" Title: JEMALLOC
.\" Author: Jason Evans
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\" Date: 03/31/2014
+.\" Date: 09/24/2015
.\" Manual: User Manual
-.\" Source: jemalloc 3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340
+.\" Source: jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c
.\" Language: English
.\"
-.TH "JEMALLOC" "3" "03/31/2014" "jemalloc 3.6.0-0-g46c0af68bd24" "User Manual"
+.TH "JEMALLOC" "3" "09/24/2015" "jemalloc 4.0.3-0-ge9192eacf893" "User Manual"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@@ -31,13 +31,12 @@
jemalloc \- general purpose memory allocation functions
.SH "LIBRARY"
.PP
-This manual describes jemalloc 3\&.6\&.0\-0\-g46c0af68bd248b04df75e4f92d5fb804c3d75340\&. More information can be found at the
+This manual describes jemalloc 4\&.0\&.3\-0\-ge9192eacf8935e29fc62fddc2701f7942b1cc02c\&. More information can be found at the
\m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
.SH "SYNOPSIS"
.sp
.ft B
.nf
-#include <stdlib\&.h>
#include <jemalloc/jemalloc\&.h>
.fi
.ft
@@ -65,6 +64,8 @@ This manual describes jemalloc 3\&.6\&.0\-0\-g46c0af68bd248b04df75e4f92d5fb804c3
.BI "size_t sallocx(void\ *" "ptr" ", int\ " "flags" ");"
.HP \w'void\ dallocx('u
.BI "void dallocx(void\ *" "ptr" ", int\ " "flags" ");"
+.HP \w'void\ sdallocx('u
+.BI "void sdallocx(void\ *" "ptr" ", size_t\ " "size" ", int\ " "flags" ");"
.HP \w'size_t\ nallocx('u
.BI "size_t nallocx(size_t\ " "size" ", int\ " "flags" ");"
.HP \w'int\ mallctl('u
@@ -81,17 +82,6 @@ This manual describes jemalloc 3\&.6\&.0\-0\-g46c0af68bd248b04df75e4f92d5fb804c3
.BI "void (*malloc_message)(void\ *" "cbopaque" ", const\ char\ *" "s" ");"
.PP
const char *\fImalloc_conf\fR;
-.SS "Experimental API"
-.HP \w'int\ allocm('u
-.BI "int allocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
-.HP \w'int\ rallocm('u
-.BI "int rallocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");"
-.HP \w'int\ sallocm('u
-.BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");"
-.HP \w'int\ dallocm('u
-.BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");"
-.HP \w'int\ nallocm('u
-.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
.SH "DESCRIPTION"
.SS "Standard API"
.PP
@@ -118,7 +108,7 @@ The
\fBposix_memalign\fR\fB\fR
function allocates
\fIsize\fR
-bytes of memory such that the allocation\*(Aqs base address is an even multiple of
+bytes of memory such that the allocation\*(Aqs base address is a multiple of
\fIalignment\fR, and returns the allocation in the value pointed to by
\fIptr\fR\&. The requested
\fIalignment\fR
@@ -129,7 +119,7 @@ The
\fBaligned_alloc\fR\fB\fR
function allocates
\fIsize\fR
-bytes of memory such that the allocation\*(Aqs base address is an even multiple of
+bytes of memory such that the allocation\*(Aqs base address is a multiple of
\fIalignment\fR\&. The requested
\fIalignment\fR
must be a power of 2\&. Behavior is undefined if
@@ -172,7 +162,8 @@ The
\fBrallocx\fR\fB\fR,
\fBxallocx\fR\fB\fR,
\fBsallocx\fR\fB\fR,
-\fBdallocx\fR\fB\fR, and
+\fBdallocx\fR\fB\fR,
+\fBsdallocx\fR\fB\fR, and
\fBnallocx\fR\fB\fR
functions all have a
\fIflags\fR
@@ -201,11 +192,32 @@ is a power of 2\&.
Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&.
.RE
.PP
+\fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB) \fR
+.RS 4
+Use the thread\-specific cache (tcache) specified by the identifier
+\fItc\fR, which must have been acquired via the
+"tcache\&.create"
+mallctl\&. This macro does not validate that
+\fItc\fR
+specifies a valid identifier\&.
+.RE
+.PP
+\fBMALLOCX_TCACHE_NONE\fR
+.RS 4
+Do not use a thread\-specific cache (tcache)\&. Unless
+\fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR
+or
+\fBMALLOCX_TCACHE_NONE\fR
+is specified, an automatically managed tcache will be used under many circumstances\&. This macro cannot be used in the same
+\fIflags\fR
+argument as
+\fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR\&.
+.RE
+.PP
\fBMALLOCX_ARENA(\fR\fB\fIa\fR\fR\fB) \fR
.RS 4
Use the arena specified by the index
-\fIa\fR
-(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that
+\fIa\fR\&. This macro has no effect for regions that were allocated via an arena other than the one specified\&. This macro does not validate that
\fIa\fR
specifies an arena index in the valid range\&.
.RE
@@ -258,6 +270,17 @@ function causes the memory referenced by
to be made available for future allocations\&.
.PP
The
+\fBsdallocx\fR\fB\fR
+function is an extension of
+\fBdallocx\fR\fB\fR
+with a
+\fIsize\fR
+parameter to allow the caller to pass in the allocation size as an optimization\&. The minimum valid input size is the original requested size of the allocation, and the maximum valid input size is the corresponding value returned by
+\fBnallocx\fR\fB\fR
+or
+\fBsallocx\fR\fB\fR\&.
+.PP
+The
\fBnallocx\fR\fB\fR
function allocates no memory, but it performs the same size computation as the
\fBmallocx\fR\fB\fR
@@ -351,7 +374,7 @@ uses the
\fBmallctl*\fR\fB\fR
functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If
\fB\-\-enable\-stats\fR
-is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&.
+is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq, \(lql\(rq, and \(lqh\(rq can be specified to omit per size class statistics for bins, large objects, and huge objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&.
.PP
The
\fBmalloc_usable_size\fR\fB\fR
@@ -362,126 +385,6 @@ function is not a mechanism for in\-place
\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by
\fBmalloc_usable_size\fR\fB\fR
should not be depended on, since such behavior is entirely implementation\-dependent\&.
-.SS "Experimental API"
-.PP
-The experimental API is subject to change or removal without regard for backward compatibility\&. If
-\fB\-\-disable\-experimental\fR
-is specified during configuration, the experimental API is omitted\&.
-.PP
-The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR,
-\fBdallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions all have a
-\fIflags\fR
-argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following:
-.PP
-\fBALLOCM_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-(1 << \fIla\fR)\&. This macro does not validate that
-\fIla\fR
-is within the valid range\&.
-.RE
-.PP
-\fBALLOCM_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-\fIa\fR, where
-\fIa\fR
-is a power of two\&. This macro does not validate that
-\fIa\fR
-is a power of 2\&.
-.RE
-.PP
-\fBALLOCM_ZERO\fR
-.RS 4
-Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&.
-.RE
-.PP
-\fBALLOCM_NO_MOVE\fR
-.RS 4
-For reallocation, fail rather than moving the object\&. This constraint can apply to both growth and shrinkage\&.
-.RE
-.PP
-\fBALLOCM_ARENA(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Use the arena specified by the index
-\fIa\fR
-(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that
-\fIa\fR
-specifies an arena index in the valid range\&.
-.RE
-.PP
-The
-\fBallocm\fR\fB\fR
-function allocates at least
-\fIsize\fR
-bytes of memory, sets
-\fI*ptr\fR
-to the base address of the allocation, and sets
-\fI*rsize\fR
-to the real size of the allocation if
-\fIrsize\fR
-is not
-\fBNULL\fR\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.PP
-The
-\fBrallocm\fR\fB\fR
-function resizes the allocation at
-\fI*ptr\fR
-to be at least
-\fIsize\fR
-bytes, sets
-\fI*ptr\fR
-to the base address of the allocation if it moved, and sets
-\fI*rsize\fR
-to the real size of the allocation if
-\fIrsize\fR
-is not
-\fBNULL\fR\&. If
-\fIextra\fR
-is non\-zero, an attempt is made to resize the allocation to be at least
-(\fIsize\fR + \fIextra\fR)
-bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, if request size overflows due to size class and/or alignment constraints, or if
-(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&.
-.PP
-The
-\fBsallocm\fR\fB\fR
-function sets
-\fI*rsize\fR
-to the real size of the allocation\&.
-.PP
-The
-\fBdallocm\fR\fB\fR
-function causes the memory referenced by
-\fIptr\fR
-to be made available for future allocations\&.
-.PP
-The
-\fBnallocm\fR\fB\fR
-function allocates no memory, but it performs the same size computation as the
-\fBallocm\fR\fB\fR
-function, and if
-\fIrsize\fR
-is not
-\fBNULL\fR
-it sets
-\fI*rsize\fR
-to the real size of the allocation that would result from the equivalent
-\fBallocm\fR\fB\fR
-function call\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
.SH "TUNING"
.PP
Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&.
@@ -519,8 +422,8 @@ options\&. Some options have boolean values (true/false), others have integer va
Traditionally, allocators have used
\fBsbrk\fR(2)
to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If
-\fB\-\-enable\-dss\fR
-is specified during configuration, this allocator uses both
+\fBsbrk\fR(2)
+is supported by the operating system, this allocator uses both
\fBmmap\fR(2)
and
\fBsbrk\fR(2), in that order of preference; otherwise only
@@ -535,18 +438,29 @@ is specified during configuration, this allocator supports thread\-specific cach
.PP
Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&.
.PP
-User objects are broken into three categories according to size: small, large, and huge\&. Small objects are smaller than one page\&. Large objects are smaller than the chunk size\&. Huge objects are a multiple of the chunk size\&. Small and large objects are managed by arenas; huge objects are managed separately in a single data structure that is shared by all threads\&. Huge objects are used by applications infrequently enough that this single data structure is not a scalability issue\&.
+User objects are broken into three categories according to size: small, large, and huge\&. Small and large objects are managed entirely by arenas; huge objects are additionally aggregated in a single data structure that is shared by all threads\&. Huge objects are typically used by applications infrequently enough that this single data structure is not a scalability issue\&.
.PP
Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
.PP
-Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
-sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the
+Small objects are managed in groups by page runs\&. Each run maintains a bitmap to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
+sizeof(\fBdouble\fR)\&. All other object size classes are multiples of the quantum, spaced such that there are four size classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes\&. Small size classes are smaller than four times the page size, large size classes are smaller than the chunk size (see the
"opt\&.lg_chunk"
-option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&.
+option), and huge size classes extend from the chunk size up to one size class less than the full address space size\&.
.PP
Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&.
.PP
-Assuming 4 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in
+The
+\fBrealloc\fR\fB\fR,
+\fBrallocx\fR\fB\fR, and
+\fBxallocx\fR\fB\fR
+functions may resize allocations without moving them under limited circumstances\&. Unlike the
+\fB*allocx\fR\fB\fR
+API, the standard API does not officially round up the usable size of an allocation to the nearest size class, so technically it is necessary to call
+\fBrealloc\fR\fB\fR
+to grow e\&.g\&. a 9\-byte allocation to 16 bytes, or shrink a 16\-byte allocation to 9 bytes\&. Growth and shrinkage trivially succeeds in place as long as the pre\-size and post\-size both round up to the same size class\&. No other API guarantees are made regarding in\-place resizing, but the current implementation also tries to resize large and huge allocations in place, as long as the pre\-size and post\-size are both large or both huge\&. In such cases shrinkage always succeeds for large size classes, but for huge size classes the chunk allocator must support splitting (see
+"arena\&.<i>\&.chunk_hooks")\&. Growth only succeeds if the trailing memory is currently available, and additionally for huge size classes the chunk allocator must support merging\&.
+.PP
+Assuming 2 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in
Table 1\&.
.sp
.it 1 an-trap
@@ -572,8 +486,23 @@ l r l
^ r l
^ r l
^ r l
+^ r l
+^ r l
l r l
-l r l.
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+l r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l.
T{
Small
T}:T{
@@ -584,7 +513,7 @@ T}
:T{
16
T}:T{
-[16, 32, 48, \&.\&.\&., 128]
+[16, 32, 48, 64, 80, 96, 112, 128]
T}
:T{
32
@@ -609,21 +538,96 @@ T}
:T{
512
T}:T{
-[2560, 3072, 3584]
+[2560, 3072, 3584, 4096]
+T}
+:T{
+1 KiB
+T}:T{
+[5 KiB, 6 KiB, 7 KiB, 8 KiB]
+T}
+:T{
+2 KiB
+T}:T{
+[10 KiB, 12 KiB, 14 KiB]
T}
T{
Large
T}:T{
+2 KiB
+T}:T{
+[16 KiB]
+T}
+:T{
4 KiB
T}:T{
-[4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB]
+[20 KiB, 24 KiB, 28 KiB, 32 KiB]
+T}
+:T{
+8 KiB
+T}:T{
+[40 KiB, 48 KiB, 54 KiB, 64 KiB]
+T}
+:T{
+16 KiB
+T}:T{
+[80 KiB, 96 KiB, 112 KiB, 128 KiB]
+T}
+:T{
+32 KiB
+T}:T{
+[160 KiB, 192 KiB, 224 KiB, 256 KiB]
+T}
+:T{
+64 KiB
+T}:T{
+[320 KiB, 384 KiB, 448 KiB, 512 KiB]
+T}
+:T{
+128 KiB
+T}:T{
+[640 KiB, 768 KiB, 896 KiB, 1 MiB]
+T}
+:T{
+256 KiB
+T}:T{
+[1280 KiB, 1536 KiB, 1792 KiB]
T}
T{
Huge
T}:T{
+256 KiB
+T}:T{
+[2 MiB]
+T}
+:T{
+512 KiB
+T}:T{
+[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]
+T}
+:T{
+1 MiB
+T}:T{
+[5 MiB, 6 MiB, 7 MiB, 8 MiB]
+T}
+:T{
+2 MiB
+T}:T{
+[10 MiB, 12 MiB, 14 MiB, 16 MiB]
+T}
+:T{
4 MiB
T}:T{
-[4 MiB, 8 MiB, 12 MiB, \&.\&.\&.]
+[20 MiB, 24 MiB, 28 MiB, 32 MiB]
+T}
+:T{
+8 MiB
+T}:T{
+[40 MiB, 48 MiB, 56 MiB, 64 MiB]
+T}
+:T{
+\&.\&.\&.
+T}:T{
+\&.\&.\&.
T}
.TE
.sp 1
@@ -660,15 +664,15 @@ If a value is passed in, refresh the data from which the
functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&.
.RE
.PP
-"config\&.debug" (\fBbool\fR) r\-
+"config\&.cache_oblivious" (\fBbool\fR) r\-
.RS 4
-\fB\-\-enable\-debug\fR
+\fB\-\-enable\-cache\-oblivious\fR
was specified during build configuration\&.
.RE
.PP
-"config\&.dss" (\fBbool\fR) r\-
+"config\&.debug" (\fBbool\fR) r\-
.RS 4
-\fB\-\-enable\-dss\fR
+\fB\-\-enable\-debug\fR
was specified during build configuration\&.
.RE
.PP
@@ -684,12 +688,6 @@ was specified during build configuration\&.
was specified during build configuration\&.
.RE
.PP
-"config\&.mremap" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-mremap\fR
-was specified during build configuration\&.
-.RE
-.PP
"config\&.munmap" (\fBbool\fR) r\-
.RS 4
\fB\-\-enable\-munmap\fR
@@ -763,14 +761,16 @@ is specified during configuration, in which case it is enabled by default\&.
.RS 4
dss (\fBsbrk\fR(2)) allocation precedence as related to
\fBmmap\fR(2)
-allocation\&. The following settings are supported: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq\&. The default is \(lqsecondary\(rq if
-"config\&.dss"
-is true, \(lqdisabled\(rq otherwise\&.
+allocation\&. The following settings are supported if
+\fBsbrk\fR(2)
+is supported by the operating system: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq; otherwise only \(lqdisabled\(rq is supported\&. The default is \(lqsecondary\(rq if
+\fBsbrk\fR(2)
+is supported by the operating system; \(lqdisabled\(rq otherwise\&.
.RE
.PP
"opt\&.lg_chunk" (\fBsize_t\fR) r\-
.RS 4
-Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 4 MiB (2^22)\&.
+Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 2 MiB (2^21)\&.
.RE
.PP
"opt\&.narenas" (\fBsize_t\fR) r\-
@@ -782,7 +782,11 @@ Maximum number of arenas to use for automatic multiplexing of threads and arenas
.RS 4
Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via
\fBmadvise\fR(2)
-or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&.
+or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&. See
+"arenas\&.lg_dirty_mult"
+and
+"arena\&.<i>\&.lg_dirty_mult"
+for related dynamic control options\&.
.RE
.PP
"opt\&.stats_print" (\fBbool\fR) r\-
@@ -793,16 +797,21 @@ function is called at program exit via an
\fBatexit\fR(3)
function\&. If
\fB\-\-enable\-stats\fR
-is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&.
+is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Furthermore,
+\fBatexit\fR\fB\fR
+may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls
+\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own
+\fBatexit\fR\fB\fR
+function with equivalent functionality)\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&.
.RE
.PP
-"opt\&.junk" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
+"opt\&.junk" (\fBconst char *\fR) r\- [\fB\-\-enable\-fill\fR]
.RS 4
-Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to
-0xa5\&. All deallocated memory will be initialized to
-0x5a\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default unless
+Junk filling\&. If set to "alloc", each byte of uninitialized allocated memory will be initialized to
+0xa5\&. If set to "free", all deallocated memory will be initialized to
+0x5a\&. If set to "true", both allocated and deallocated memory will be initialized, and if set to "false", junk filling be disabled entirely\&. This is intended for debugging and will impact performance negatively\&. This option is "false" by default unless
\fB\-\-enable\-debug\fR
-is specified during configuration, in which case it is enabled by default unless running inside
+is specified during configuration, in which case it is "true" by default unless running inside
\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&.
.RE
.PP
@@ -825,10 +834,9 @@ option is enabled, the redzones are checked for corruption during deallocation\&
"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
.RS 4
Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so
-\fBrealloc\fR\fB\fR,
-\fBrallocx\fR\fB\fR
+\fBrealloc\fR\fB\fR
and
-\fBrallocm\fR\fB\fR
+\fBrallocx\fR\fB\fR
calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
.RE
.PP
@@ -839,12 +847,6 @@ Allocation tracing based on
enabled/disabled\&. This option is disabled by default\&.
.RE
.PP
-"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR]
-.RS 4
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
-support enabled/disabled\&. This option is vestigal because jemalloc auto\-detects whether it is running inside Valgrind\&. This option is disabled by default, unless running inside Valgrind\&.
-.RE
-.PP
"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR]
.RS 4
Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on
@@ -867,15 +869,15 @@ This option is disabled by default\&.
.PP
"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR]
.RS 4
-Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the
+Thread\-specific caching (tcache) enabled/disabled\&. When there are multiple threads, each thread uses a tcache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the
"opt\&.lg_tcache_max"
option for related tuning information\&. This option is enabled by default unless running inside
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&.
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, in which case it is forcefully disabled\&.
.RE
.PP
"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
.RS 4
-Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&.
+Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&.
.RE
.PP
"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -892,9 +894,11 @@ option for information on interval\-triggered profile dumping, the
"opt\&.prof_gdump"
option for information on high\-water\-triggered profile dumping, and the
"opt\&.prof_final"
-option for final profile dumping\&. Profile output is compatible with the included
+option for final profile dumping\&. Profile output is compatible with the
+\fBjeprof\fR
+command, which is based on the
\fBpprof\fR
-Perl script, which originates from the
+that is developed as part of the
\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&.
.RE
.PP
@@ -904,7 +908,7 @@ Filename prefix for profile dumps\&. If the prefix is set to the empty string, n
jeprof\&.
.RE
.PP
-"opt\&.prof_active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
+"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
.RS 4
Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the
"opt\&.prof"
@@ -913,7 +917,16 @@ option) but inactive, then toggle profiling at any time during program execution
mallctl\&. This option is enabled by default\&.
.RE
.PP
-"opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
+"opt\&.prof_thread_active_init" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
+.RS 4
+Initial setting for
+"thread\&.prof\&.active"
+in newly created threads\&. The initial setting for newly created threads can also be changed during execution via the
+"prof\&.thread_active_init"
+mallctl\&. This option is enabled by default\&.
+.RE
+.PP
+"opt\&.lg_prof_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR]
.RS 4
Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&.
.RE
@@ -935,12 +948,8 @@ option\&. By default, interval\-triggered profile dumping is disabled (encoded a
.PP
"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
.RS 4
-Trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.u<useq>\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. This option is disabled by default\&.
+Set the initial state of
+"prof\&.gdump", which when enabled triggers a memory profile dump every time the total virtual memory exceeds the previous maximum\&. This option is disabled by default\&.
.RE
.PP
"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -952,7 +961,12 @@ function to dump final memory usage to a file named according to the pattern
<prefix>
is controlled by the
"opt\&.prof_prefix"
-option\&. This option is enabled by default\&.
+option\&. Note that
+\fBatexit\fR\fB\fR
+may allocate memory during application initialization and then deadlock internally when jemalloc in turn calls
+\fBatexit\fR\fB\fR, so this option is not univerally usable (though the application can register its own
+\fBatexit\fR\fB\fR
+function with equivalent functionality)\&. This option is disabled by default\&.
.RE
.PP
"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -1007,10 +1021,42 @@ Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed a
.PP
"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
.RS 4
-Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
+Flush calling thread\*(Aqs thread\-specific cache (tcache)\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs tcache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
+.RE
+.PP
+"thread\&.prof\&.name" (\fBconst char *\fR) r\- or \-w [\fB\-\-enable\-prof\fR]
+.RS 4
+Get/set the descriptive name associated with the calling thread in memory profile dumps\&. An internal copy of the name string is created, so the input string need not be maintained after this interface completes execution\&. The output string of this interface should be copied for non\-ephemeral uses, because multiple implementation details can cause asynchronous string deallocation\&. Furthermore, each invocation of this interface can only read or write; simultaneous read/write is not supported due to string lifetime limitations\&. The name string must nil\-terminated and comprised only of characters in the sets recognized by
+\fBisgraph\fR(3)
+and
+\fBisblank\fR(3)\&.
+.RE
+.PP
+"thread\&.prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
+.RS 4
+Control whether sampling is currently active for the calling thread\&. This is an activation mechanism in addition to
+"prof\&.active"; both must be active for the calling thread to sample\&. This flag is enabled by default\&.
+.RE
+.PP
+"tcache\&.create" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR]
+.RS 4
+Create an explicit thread\-specific cache (tcache) and return an identifier that can be passed to the
+\fBMALLOCX_TCACHE(\fR\fB\fItc\fR\fR\fB)\fR
+macro to explicitly use the specified cache rather than the automatically managed one that is used by default\&. Each explicit cache can be used by only one thread at a time; the application must assure that this constraint holds\&.
+.RE
+.PP
+"tcache\&.flush" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR]
+.RS 4
+Flush the specified thread\-specific cache (tcache)\&. The same considerations apply to this interface as to
+"thread\&.tcache\&.flush", except that the tcache will never be automatically be discarded\&.
.RE
.PP
-"arena\&.<i>\&.purge" (\fBunsigned\fR) \-\-
+"tcache\&.destroy" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR]
+.RS 4
+Flush the specified thread\-specific cache (tcache) and make the identifier available for use during a future tcache creation\&.
+.RE
+.PP
+"arena\&.<i>\&.purge" (\fBvoid\fR) \-\-
.RS 4
Purge unused dirty pages for arena <i>, or for all arenas if <i> equals
"arenas\&.narenas"\&.
@@ -1019,11 +1065,237 @@ Purge unused dirty pages for arena <i>, or for all arenas if <i> equals
"arena\&.<i>\&.dss" (\fBconst char *\fR) rw
.RS 4
Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals
-"arenas\&.narenas"\&. Note that even during huge allocation this setting is read from the arena that would be chosen for small or large allocation so that applications can depend on consistent dss versus mmap allocation regardless of allocation size\&. See
+"arenas\&.narenas"\&. See
"opt\&.dss"
for supported settings\&.
.RE
.PP
+"arena\&.<i>\&.lg_dirty_mult" (\fBssize_t\fR) rw
+.RS 4
+Current per\-arena minimum ratio (log base 2) of active to dirty pages for arena <i>\&. Each time this interface is set and the ratio is increased, pages are synchronously purged as necessary to impose the new ratio\&. See
+"opt\&.lg_dirty_mult"
+for additional information\&.
+.RE
+.PP
+"arena\&.<i>\&.chunk_hooks" (\fBchunk_hooks_t\fR) rw
+.RS 4
+Get or set the chunk management hook functions for arena <i>\&. The functions must be capable of operating on all extant chunks associated with arena <i>, usually by passing unknown chunks to the replaced functions\&. In practice, it is feasible to control allocation for arenas created via
+"arenas\&.extend"
+such that all chunks originate from an application\-supplied chunk allocator (by setting custom chunk hook functions just after arena creation), but the automatically created arenas may have already created chunks prior to the application having an opportunity to take over chunk allocation\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+typedef struct {
+ chunk_alloc_t *alloc;
+ chunk_dalloc_t *dalloc;
+ chunk_commit_t *commit;
+ chunk_decommit_t *decommit;
+ chunk_purge_t *purge;
+ chunk_split_t *split;
+ chunk_merge_t *merge;
+} chunk_hooks_t;
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The
+\fBchunk_hooks_t\fR
+structure comprises function pointers which are described individually below\&. jemalloc uses these functions to manage chunk lifetime, which starts off with allocation of mapped committed memory, in the simplest case followed by deallocation\&. However, there are performance and platform reasons to retain chunks for later reuse\&. Cleanup attempts cascade from deallocation to decommit to purging, which gives the chunk management functions opportunities to reject the most permanent cleanup operations in favor of less permanent (and often less costly) operations\&. The chunk splitting and merging operations can also be opted out of, but this is mainly intended to support platforms on which virtual memory mappings provided by the operating system kernel do not automatically coalesce and split, e\&.g\&. Windows\&.
+.HP \w'typedef\ void\ *(chunk_alloc_t)('u
+.BI "typedef void *(chunk_alloc_t)(void\ *" "chunk" ", size_t\ " "size" ", size_t\ " "alignment" ", bool\ *" "zero" ", bool\ *" "commit" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk allocation function conforms to the
+\fBchunk_alloc_t\fR
+type and upon success returns a pointer to
+\fIsize\fR
+bytes of mapped memory on behalf of arena
+\fIarena_ind\fR
+such that the chunk\*(Aqs base address is a multiple of
+\fIalignment\fR, as well as setting
+\fI*zero\fR
+to indicate whether the chunk is zeroed and
+\fI*commit\fR
+to indicate whether the chunk is committed\&. Upon error the function returns
+\fBNULL\fR
+and leaves
+\fI*zero\fR
+and
+\fI*commit\fR
+unmodified\&. The
+\fIsize\fR
+parameter is always a multiple of the chunk size\&. The
+\fIalignment\fR
+parameter is always a power of two at least as large as the chunk size\&. Zeroing is mandatory if
+\fI*zero\fR
+is true upon function entry\&. Committing is mandatory if
+\fI*commit\fR
+is true upon function entry\&. If
+\fIchunk\fR
+is not
+\fBNULL\fR, the returned pointer must be
+\fIchunk\fR
+on success or
+\fBNULL\fR
+on error\&. Committed memory may be committed in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults\&. Note that replacing the default chunk allocation function makes the arena\*(Aqs
+"arena\&.<i>\&.dss"
+setting irrelevant\&.
+.HP \w'typedef\ bool\ (chunk_dalloc_t)('u
+.BI "typedef bool (chunk_dalloc_t)(void\ *" "chunk" ", size_t\ " "size" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk deallocation function conforms to the
+\fBchunk_dalloc_t\fR
+type and deallocates a
+\fIchunk\fR
+of given
+\fIsize\fR
+with
+\fIcommitted\fR/decommited memory as indicated, on behalf of arena
+\fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates opt\-out from deallocation; the virtual memory mapping associated with the chunk remains mapped, in the same commit state, and available for future use, in which case it will be automatically retained for later reuse\&.
+.HP \w'typedef\ bool\ (chunk_commit_t)('u
+.BI "typedef bool (chunk_commit_t)(void\ *" "chunk" ", size_t\ " "size" ", size_t\ " "offset" ", size_t\ " "length" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk commit function conforms to the
+\fBchunk_commit_t\fR
+type and commits zeroed physical memory to back pages within a
+\fIchunk\fR
+of given
+\fIsize\fR
+at
+\fIoffset\fR
+bytes, extending for
+\fIlength\fR
+on behalf of arena
+\fIarena_ind\fR, returning false upon success\&. Committed memory may be committed in absolute terms as on a system that does not overcommit, or in implicit terms as on a system that overcommits and satisfies physical memory needs on demand via soft page faults\&. If the function returns true, this indicates insufficient physical memory to satisfy the request\&.
+.HP \w'typedef\ bool\ (chunk_decommit_t)('u
+.BI "typedef bool (chunk_decommit_t)(void\ *" "chunk" ", size_t\ " "size" ", size_t\ " "offset" ", size_t\ " "length" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk decommit function conforms to the
+\fBchunk_decommit_t\fR
+type and decommits any physical memory that is backing pages within a
+\fIchunk\fR
+of given
+\fIsize\fR
+at
+\fIoffset\fR
+bytes, extending for
+\fIlength\fR
+on behalf of arena
+\fIarena_ind\fR, returning false upon success, in which case the pages will be committed via the chunk commit function before being reused\&. If the function returns true, this indicates opt\-out from decommit; the memory remains committed and available for future use, in which case it will be automatically retained for later reuse\&.
+.HP \w'typedef\ bool\ (chunk_purge_t)('u
+.BI "typedef bool (chunk_purge_t)(void\ *" "chunk" ", size_t" "size" ", size_t\ " "offset" ", size_t\ " "length" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk purge function conforms to the
+\fBchunk_purge_t\fR
+type and optionally discards physical pages within the virtual memory mapping associated with
+\fIchunk\fR
+of given
+\fIsize\fR
+at
+\fIoffset\fR
+bytes, extending for
+\fIlength\fR
+on behalf of arena
+\fIarena_ind\fR, returning false if pages within the purged virtual memory range will be zero\-filled the next time they are accessed\&.
+.HP \w'typedef\ bool\ (chunk_split_t)('u
+.BI "typedef bool (chunk_split_t)(void\ *" "chunk" ", size_t\ " "size" ", size_t\ " "size_a" ", size_t\ " "size_b" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk split function conforms to the
+\fBchunk_split_t\fR
+type and optionally splits
+\fIchunk\fR
+of given
+\fIsize\fR
+into two adjacent chunks, the first of
+\fIsize_a\fR
+bytes, and the second of
+\fIsize_b\fR
+bytes, operating on
+\fIcommitted\fR/decommitted memory as indicated, on behalf of arena
+\fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates that the chunk remains unsplit and therefore should continue to be operated on as a whole\&.
+.HP \w'typedef\ bool\ (chunk_merge_t)('u
+.BI "typedef bool (chunk_merge_t)(void\ *" "chunk_a" ", size_t\ " "size_a" ", void\ *" "chunk_b" ", size_t\ " "size_b" ", bool\ " "committed" ", unsigned\ " "arena_ind" ");"
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+A chunk merge function conforms to the
+\fBchunk_merge_t\fR
+type and optionally merges adjacent chunks,
+\fIchunk_a\fR
+of given
+\fIsize_a\fR
+and
+\fIchunk_b\fR
+of given
+\fIsize_b\fR
+into one contiguous chunk, operating on
+\fIcommitted\fR/decommitted memory as indicated, on behalf of arena
+\fIarena_ind\fR, returning false upon success\&. If the function returns true, this indicates that the chunks remain distinct mappings and therefore should continue to be operated on independently\&.
+.RE
+.PP
"arenas\&.narenas" (\fBunsigned\fR) r\-
.RS 4
Current limit on number of arenas\&.
@@ -1036,6 +1308,15 @@ An array of
booleans\&. Each boolean indicates whether the corresponding arena is initialized\&.
.RE
.PP
+"arenas\&.lg_dirty_mult" (\fBssize_t\fR) rw
+.RS 4
+Current default per\-arena minimum ratio (log base 2) of active to dirty pages, used to initialize
+"arena\&.<i>\&.lg_dirty_mult"
+during arena creation\&. See
+"opt\&.lg_dirty_mult"
+for additional information\&.
+.RE
+.PP
"arenas\&.quantum" (\fBsize_t\fR) r\-
.RS 4
Quantum size\&.
@@ -1076,7 +1357,7 @@ Number of regions per page run\&.
Number of bytes per page run\&.
.RE
.PP
-"arenas\&.nlruns" (\fBsize_t\fR) r\-
+"arenas\&.nlruns" (\fBunsigned\fR) r\-
.RS 4
Total number of large size classes\&.
.RE
@@ -1086,9 +1367,14 @@ Total number of large size classes\&.
Maximum size supported by this large size class\&.
.RE
.PP
-"arenas\&.purge" (\fBunsigned\fR) \-w
+"arenas\&.nhchunks" (\fBunsigned\fR) r\-
+.RS 4
+Total number of huge size classes\&.
+.RE
+.PP
+"arenas\&.hchunk\&.<i>\&.size" (\fBsize_t\fR) r\-
.RS 4
-Purge unused dirty pages for the specified arena, or for all arenas if none is specified\&.
+Maximum size supported by this huge size class\&.
.RE
.PP
"arenas\&.extend" (\fBunsigned\fR) r\-
@@ -1096,11 +1382,22 @@ Purge unused dirty pages for the specified arena, or for all arenas if none is s
Extend the array of arenas by appending a new arena, and returning the new arena index\&.
.RE
.PP
+"prof\&.thread_active_init" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
+.RS 4
+Control the initial setting for
+"thread\&.prof\&.active"
+in newly created threads\&. See the
+"opt\&.prof_thread_active_init"
+option for additional information\&.
+.RE
+.PP
"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
.RS 4
Control whether sampling is currently active\&. See the
"opt\&.prof_active"
-option for additional information\&.
+option for additional information, as well as the interrelated
+"thread\&.prof\&.active"
+mallctl\&.
.RE
.PP
"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR]
@@ -1113,6 +1410,30 @@ is controlled by the
option\&.
.RE
.PP
+"prof\&.gdump" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
+.RS 4
+When enabled, trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern
+<prefix>\&.<pid>\&.<seq>\&.u<useq>\&.heap, where
+<prefix>
+is controlled by the
+"opt\&.prof_prefix"
+option\&.
+.RE
+.PP
+"prof\&.reset" (\fBsize_t\fR) \-w [\fB\-\-enable\-prof\fR]
+.RS 4
+Reset all memory profile statistics, and optionally update the sample rate (see
+"opt\&.lg_prof_sample"
+and
+"prof\&.lg_sample")\&.
+.RE
+.PP
+"prof\&.lg_sample" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR]
+.RS 4
+Get the current sample rate (see
+"opt\&.lg_prof_sample")\&.
+.RE
+.PP
"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR]
.RS 4
Average number of bytes allocated between inverval\-based profile dumps\&. See the
@@ -1122,7 +1443,7 @@ option for additional information\&.
.PP
"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
-Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up to the nearest multiple of the chunk size when computing its contribution to the counter\&. Note that the
+Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up when computing its contribution to the counter\&. Note that the
"epoch"
mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&.
.RE
@@ -1136,44 +1457,27 @@ Total number of bytes allocated by the application\&.
.RS 4
Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to
"stats\&.allocated"\&. This does not include
-"stats\&.arenas\&.<i>\&.pdirty"
-and pages entirely devoted to allocator metadata\&.
-.RE
-.PP
-"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as
-"stats\&.active"\&. This does not include inactive chunks\&.
-.RE
-.PP
-"stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks\&.
-.RE
-.PP
-"stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of chunks allocated\&.
-.RE
-.PP
-"stats\&.chunks\&.high" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of active chunks at any time thus far\&.
+"stats\&.arenas\&.<i>\&.pdirty", nor pages entirely devoted to allocator metadata\&.
.RE
.PP
-"stats\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+"stats\&.metadata" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
-Number of bytes currently allocated by huge objects\&.
+Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive internal allocator data structures, arena chunk headers (see
+"stats\&.arenas\&.<i>\&.metadata\&.mapped"), and internal allocations (see
+"stats\&.arenas\&.<i>\&.metadata\&.allocated")\&.
.RE
.PP
-"stats\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+"stats\&.resident" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
-Cumulative number of huge allocation requests\&.
+Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size, and is larger than
+"stats\&.active"\&.
.RE
.PP
-"stats\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
-Cumulative number of huge deallocation requests\&.
+Total number of bytes in active chunks mapped by the allocator\&. This is a multiple of the chunk size, and is larger than
+"stats\&.active"\&. This does not include inactive chunks, even those that contain unused dirty pages, which means that there is no strict ordering between this and
+"stats\&.resident"\&.
.RE
.PP
"stats\&.arenas\&.<i>\&.dss" (\fBconst char *\fR) r\-
@@ -1185,6 +1489,13 @@ allocation\&. See
for details\&.
.RE
.PP
+"stats\&.arenas\&.<i>\&.lg_dirty_mult" (\fBssize_t\fR) r\-
+.RS 4
+Minimum ratio (log base 2) of active to dirty pages\&. See
+"opt\&.lg_dirty_mult"
+for details\&.
+.RE
+.PP
"stats\&.arenas\&.<i>\&.nthreads" (\fBunsigned\fR) r\-
.RS 4
Number of threads currently assigned to arena\&.
@@ -1207,6 +1518,24 @@ or similar has not been called\&.
Number of mapped bytes\&.
.RE
.PP
+"stats\&.arenas\&.<i>\&.metadata\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Number of mapped bytes in arena chunk headers, which track the states of the non\-metadata pages\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.metadata\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&. This statistic is reported separately from
+"stats\&.metadata"
+and
+"stats\&.arenas\&.<i>\&.metadata\&.mapped"
+because it overlaps with e\&.g\&. the
+"stats\&.allocated"
+and
+"stats\&.active"
+statistics, whereas the other metadata statistics do not\&.
+.RE
+.PP
"stats\&.arenas\&.<i>\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
Number of dirty page purge sweeps performed\&.
@@ -1264,9 +1593,24 @@ Cumulative number of large deallocation requests served directly by the arena\&.
Cumulative number of large allocation requests\&.
.RE
.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+"stats\&.arenas\&.<i>\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Number of bytes currently allocated by huge objects\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Cumulative number of huge allocation requests served directly by the arena\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
.RS 4
-Current number of bytes allocated by bin\&.
+Cumulative number of huge deallocation requests served directly by the arena\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.huge\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Cumulative number of huge allocation requests\&.
.RE
.PP
"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
@@ -1284,6 +1628,11 @@ Cumulative number of allocations returned to bin\&.
Cumulative number of allocation requests\&.
.RE
.PP
+"stats\&.arenas\&.<i>\&.bins\&.<j>\&.curregs" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Current number of regions for this size class\&.
+.RE
+.PP
"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR]
.RS 4
Cumulative number of tcache fills\&.
@@ -1328,6 +1677,26 @@ Cumulative number of allocation requests for this size class\&.
.RS 4
Current number of runs for this size class\&.
.RE
+.PP
+"stats\&.arenas\&.<i>\&.hchunks\&.<j>\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Cumulative number of allocation requests for this size class served directly by the arena\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.hchunks\&.<j>\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Cumulative number of deallocation requests for this size class served directly by the arena\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.hchunks\&.<j>\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Cumulative number of allocation requests for this size class\&.
+.RE
+.PP
+"stats\&.arenas\&.<i>\&.hchunks\&.<j>\&.curhchunks" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Current number of huge allocations for this size class\&.
+.RE
.SH "DEBUGGING MALLOC PROBLEMS"
.PP
When debugging, it is a good idea to configure/build jemalloc with the
@@ -1513,44 +1882,6 @@ The
\fBmalloc_usable_size\fR\fB\fR
function returns the usable size of the allocation pointed to by
\fIptr\fR\&.
-.SS "Experimental API"
-.PP
-The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR,
-\fBdallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions return
-\fBALLOCM_SUCCESS\fR
-on success; otherwise they return an error value\&. The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions will fail if:
-.PP
-ALLOCM_ERR_OOM
-.RS 4
-Out of memory\&. Insufficient contiguous memory was available to service the allocation request\&. The
-\fBallocm\fR\fB\fR
-function additionally sets
-\fI*ptr\fR
-to
-\fBNULL\fR, whereas the
-\fBrallocm\fR\fB\fR
-function leaves
-\fB*ptr\fR
-unmodified\&.
-.RE
-The
-\fBrallocm\fR\fB\fR
-function will also fail if:
-.PP
-ALLOCM_ERR_NOT_MOVED
-.RS 4
-\fBALLOCM_NO_MOVE\fR
-was specified, but the reallocation request could not be serviced without moving the object\&.
-.RE
.SH "ENVIRONMENT"
.PP
The following environment variable affects the execution of the allocation functions:
diff --git a/deps/jemalloc/doc/jemalloc.html b/deps/jemalloc/doc/jemalloc.html
index 5a9fc7789..7b8e2be8c 100644
--- a/deps/jemalloc/doc/jemalloc.html
+++ b/deps/jemalloc/doc/jemalloc.html
@@ -1,8 +1,7 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>JEMALLOC</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="idm316394519664"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>jemalloc &#8212; general purpose memory allocation functions</p></div><div class="refsect1"><a name="library"></a><h2>LIBRARY</h2><p>This manual describes jemalloc 3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340. More information
- can be found at the <a class="ulink" href="http://www.canonware.com/jemalloc/" target="_top">jemalloc website</a>.</p></div><div class="refsynopsisdiv"><h2>SYNOPSIS</h2><div class="funcsynopsis"><pre class="funcsynopsisinfo">#include &lt;<code class="filename">stdlib.h</code>&gt;
-#include &lt;<code class="filename">jemalloc/jemalloc.h</code>&gt;</pre><div class="refsect2"><a name="idm316394002288"></a><h3>Standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">malloc</b>(</code></td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">calloc</b>(</code></td><td>size_t <var class="pdparam">number</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">posix_memalign</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">aligned_alloc</b>(</code></td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">realloc</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">free</b>(</code></td><td>void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="refsect2"><a name="idm316393986160"></a><h3>Non-standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">mallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">rallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">xallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">sallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">dallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">nallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctl</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlnametomib</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">mibp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">miblenp</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlbymib</b>(</code></td><td>const size_t *<var class="pdparam">mib</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">miblen</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">malloc_stats_print</b>(</code></td><td>void <var class="pdparam">(*write_cb)</var>
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>JEMALLOC</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="idp45223136"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>jemalloc &#8212; general purpose memory allocation functions</p></div><div class="refsect1"><a name="library"></a><h2>LIBRARY</h2><p>This manual describes jemalloc 4.0.3-0-ge9192eacf8935e29fc62fddc2701f7942b1cc02c. More information
+ can be found at the <a class="ulink" href="http://www.canonware.com/jemalloc/" target="_top">jemalloc website</a>.</p></div><div class="refsynopsisdiv"><h2>SYNOPSIS</h2><div class="funcsynopsis"><pre class="funcsynopsisinfo">#include &lt;<code class="filename">jemalloc/jemalloc.h</code>&gt;</pre><div class="refsect2"><a name="idp44244480"></a><h3>Standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">malloc</b>(</code></td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">calloc</b>(</code></td><td>size_t <var class="pdparam">number</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">posix_memalign</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">aligned_alloc</b>(</code></td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">realloc</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">free</b>(</code></td><td>void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="refsect2"><a name="idp46062768"></a><h3>Non-standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">mallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">rallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">xallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">sallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">dallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">sdallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">nallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctl</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlnametomib</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">mibp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">miblenp</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlbymib</b>(</code></td><td>const size_t *<var class="pdparam">mib</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">miblen</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">malloc_stats_print</b>(</code></td><td>void <var class="pdparam">(*write_cb)</var>
<code>(</code>void *, const char *<code>)</code>
- , </td></tr><tr><td> </td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">opts</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">malloc_usable_size</b>(</code></td><td>const void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">(*malloc_message)</b>(</code></td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">s</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><p><span class="type">const char *</span><code class="varname">malloc_conf</code>;</p></div><div class="refsect2"><a name="idm316388684112"></a><h3>Experimental API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">allocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">rallocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">sallocm</b>(</code></td><td>const void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">dallocm</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">nallocm</b>(</code></td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div></div></div><div class="refsect1"><a name="description"></a><h2>DESCRIPTION</h2><div class="refsect2"><a name="idm316388663504"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) function allocates
+ , </td></tr><tr><td> </td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">opts</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">malloc_usable_size</b>(</code></td><td>const void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">(*malloc_message)</b>(</code></td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">s</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><p><span class="type">const char *</span><code class="varname">malloc_conf</code>;</p></div></div></div><div class="refsect1"><a name="description"></a><h2>DESCRIPTION</h2><div class="refsect2"><a name="idp46115952"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) function allocates
<em class="parameter"><code>size</code></em> bytes of uninitialized memory. The allocated
space is suitably aligned (after possible pointer coercion) for storage
of any type of object.</p><p>The <code class="function">calloc</code>(<em class="parameter"><code></code></em>) function allocates
@@ -13,13 +12,13 @@
exception that the allocated memory is explicitly initialized to zero
bytes.</p><p>The <code class="function">posix_memalign</code>(<em class="parameter"><code></code></em>) function
allocates <em class="parameter"><code>size</code></em> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<em class="parameter"><code>alignment</code></em>, and returns the allocation in the value
pointed to by <em class="parameter"><code>ptr</code></em>. The requested
- <em class="parameter"><code>alignment</code></em> must be a power of 2 at least as large
- as <code class="code">sizeof(<span class="type">void *</span>)</code>.</p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function
+ <em class="parameter"><code>alignment</code></em> must be a power of 2 at least as large as
+ <code class="code">sizeof(<span class="type">void *</span>)</code>.</p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function
allocates <em class="parameter"><code>size</code></em> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<em class="parameter"><code>alignment</code></em>. The requested
<em class="parameter"><code>alignment</code></em> must be a power of 2. Behavior is
undefined if <em class="parameter"><code>size</code></em> is not an integral multiple of
@@ -38,37 +37,51 @@
<code class="function">malloc</code>(<em class="parameter"><code></code></em>) for the specified size.</p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function causes the
allocated memory referenced by <em class="parameter"><code>ptr</code></em> to be made
available for future allocations. If <em class="parameter"><code>ptr</code></em> is
- <code class="constant">NULL</code>, no action occurs.</p></div><div class="refsect2"><a name="idm316388639904"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>),
+ <code class="constant">NULL</code>, no action occurs.</p></div><div class="refsect2"><a name="idp46144704"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>),
<code class="function">rallocx</code>(<em class="parameter"><code></code></em>),
<code class="function">xallocx</code>(<em class="parameter"><code></code></em>),
<code class="function">sallocx</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocx</code>(<em class="parameter"><code></code></em>), and
+ <code class="function">dallocx</code>(<em class="parameter"><code></code></em>),
+ <code class="function">sdallocx</code>(<em class="parameter"><code></code></em>), and
<code class="function">nallocx</code>(<em class="parameter"><code></code></em>) functions all have a
<em class="parameter"><code>flags</code></em> argument that can be used to specify
options. The functions only check the options that are contextually
relevant. Use bitwise or (<code class="code">|</code>) operations to
specify one or more of the following:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="constant">MALLOCX_LG_ALIGN(<em class="parameter"><code>la</code></em>)
+ </p><div class="variablelist"><dl class="variablelist"><dt><a name="MALLOCX_LG_ALIGN"></a><span class="term"><code class="constant">MALLOCX_LG_ALIGN(<em class="parameter"><code>la</code></em>)
</code></span></dt><dd><p>Align the memory allocation to start at an address
that is a multiple of <code class="code">(1 &lt;&lt;
<em class="parameter"><code>la</code></em>)</code>. This macro does not validate
that <em class="parameter"><code>la</code></em> is within the valid
- range.</p></dd><dt><span class="term"><code class="constant">MALLOCX_ALIGN(<em class="parameter"><code>a</code></em>)
+ range.</p></dd><dt><a name="MALLOCX_ALIGN"></a><span class="term"><code class="constant">MALLOCX_ALIGN(<em class="parameter"><code>a</code></em>)
</code></span></dt><dd><p>Align the memory allocation to start at an address
that is a multiple of <em class="parameter"><code>a</code></em>, where
<em class="parameter"><code>a</code></em> is a power of two. This macro does not
validate that <em class="parameter"><code>a</code></em> is a power of 2.
- </p></dd><dt><span class="term"><code class="constant">MALLOCX_ZERO</code></span></dt><dd><p>Initialize newly allocated memory to contain zero
+ </p></dd><dt><a name="MALLOCX_ZERO"></a><span class="term"><code class="constant">MALLOCX_ZERO</code></span></dt><dd><p>Initialize newly allocated memory to contain zero
bytes. In the growing reallocation case, the real size prior to
reallocation defines the boundary between untouched bytes and those
that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</p></dd><dt><span class="term"><code class="constant">MALLOCX_ARENA(<em class="parameter"><code>a</code></em>)
+ absent, newly allocated memory is uninitialized.</p></dd><dt><a name="MALLOCX_TCACHE"></a><span class="term"><code class="constant">MALLOCX_TCACHE(<em class="parameter"><code>tc</code></em>)
+ </code></span></dt><dd><p>Use the thread-specific cache (tcache) specified by
+ the identifier <em class="parameter"><code>tc</code></em>, which must have been
+ acquired via the <a class="link" href="#tcache.create">
+ "<code class="mallctl">tcache.create</code>"
+ </a>
+ mallctl. This macro does not validate that
+ <em class="parameter"><code>tc</code></em> specifies a valid
+ identifier.</p></dd><dt><a name="MALLOC_TCACHE_NONE"></a><span class="term"><code class="constant">MALLOCX_TCACHE_NONE</code></span></dt><dd><p>Do not use a thread-specific cache (tcache). Unless
+ <code class="constant">MALLOCX_TCACHE(<em class="parameter"><code>tc</code></em>)</code> or
+ <code class="constant">MALLOCX_TCACHE_NONE</code> is specified, an
+ automatically managed tcache will be used under many circumstances.
+ This macro cannot be used in the same <em class="parameter"><code>flags</code></em>
+ argument as
+ <code class="constant">MALLOCX_TCACHE(<em class="parameter"><code>tc</code></em>)</code>.</p></dd><dt><a name="MALLOCX_ARENA"></a><span class="term"><code class="constant">MALLOCX_ARENA(<em class="parameter"><code>a</code></em>)
</code></span></dt><dd><p>Use the arena specified by the index
- <em class="parameter"><code>a</code></em> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <em class="parameter"><code>a</code></em>
- specifies an arena index in the valid range.</p></dd></dl></div><p>
+ <em class="parameter"><code>a</code></em>. This macro has no effect for regions that
+ were allocated via an arena other than the one specified. This
+ macro does not validate that <em class="parameter"><code>a</code></em> specifies an
+ arena index in the valid range.</p></dd></dl></div><p>
</p><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function allocates at
least <em class="parameter"><code>size</code></em> bytes of memory, and returns a pointer
to the base address of the allocation. Behavior is undefined if
@@ -91,7 +104,14 @@
&gt; <code class="constant">SIZE_T_MAX</code>)</code>.</p><p>The <code class="function">sallocx</code>(<em class="parameter"><code></code></em>) function returns the
real size of the allocation at <em class="parameter"><code>ptr</code></em>.</p><p>The <code class="function">dallocx</code>(<em class="parameter"><code></code></em>) function causes the
memory referenced by <em class="parameter"><code>ptr</code></em> to be made available for
- future allocations.</p><p>The <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) function allocates no
+ future allocations.</p><p>The <code class="function">sdallocx</code>(<em class="parameter"><code></code></em>) function is an
+ extension of <code class="function">dallocx</code>(<em class="parameter"><code></code></em>) with a
+ <em class="parameter"><code>size</code></em> parameter to allow the caller to pass in the
+ allocation size as an optimization. The minimum valid input size is the
+ original requested size of the allocation, and the maximum valid input
+ size is the corresponding value returned by
+ <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) or
+ <code class="function">sallocx</code>(<em class="parameter"><code></code></em>).</p><p>The <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) function allocates no
memory, but it performs the same size computation as the
<code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function, and returns the real
size of the allocation that would result from the equivalent
@@ -162,11 +182,12 @@ for (i = 0; i &lt; nbins; i++) {
functions simultaneously. If <code class="option">--enable-stats</code> is
specified during configuration, &#8220;m&#8221; and &#8220;a&#8221; can
be specified to omit merged arena and per arena statistics, respectively;
- &#8220;b&#8221; and &#8220;l&#8221; can be specified to omit per size
- class statistics for bins and large objects, respectively. Unrecognized
- characters are silently ignored. Note that thread caching may prevent
- some statistics from being completely up to date, since extra locking
- would be required to merge counters that track thread cache operations.
+ &#8220;b&#8221;, &#8220;l&#8221;, and &#8220;h&#8221; can be specified to
+ omit per size class statistics for bins, large objects, and huge objects,
+ respectively. Unrecognized characters are silently ignored. Note that
+ thread caching may prevent some statistics from being completely up to
+ date, since extra locking would be required to merge counters that track
+ thread cache operations.
</p><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
returns the usable size of the allocation pointed to by
<em class="parameter"><code>ptr</code></em>. The return value may be larger than the size
@@ -177,74 +198,7 @@ for (i = 0; i &lt; nbins; i++) {
discrepancy between the requested allocation size and the size reported
by <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) should not be
depended on, since such behavior is entirely implementation-dependent.
- </p></div><div class="refsect2"><a name="idm316388574208"></a><h3>Experimental API</h3><p>The experimental API is subject to change or removal without regard
- for backward compatibility. If <code class="option">--disable-experimental</code>
- is specified during configuration, the experimental API is
- omitted.</p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions all have a
- <em class="parameter"><code>flags</code></em> argument that can be used to specify
- options. The functions only check the options that are contextually
- relevant. Use bitwise or (<code class="code">|</code>) operations to
- specify one or more of the following:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="constant">ALLOCM_LG_ALIGN(<em class="parameter"><code>la</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <code class="code">(1 &lt;&lt;
- <em class="parameter"><code>la</code></em>)</code>. This macro does not validate
- that <em class="parameter"><code>la</code></em> is within the valid
- range.</p></dd><dt><span class="term"><code class="constant">ALLOCM_ALIGN(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <em class="parameter"><code>a</code></em>, where
- <em class="parameter"><code>a</code></em> is a power of two. This macro does not
- validate that <em class="parameter"><code>a</code></em> is a power of 2.
- </p></dd><dt><span class="term"><code class="constant">ALLOCM_ZERO</code></span></dt><dd><p>Initialize newly allocated memory to contain zero
- bytes. In the growing reallocation case, the real size prior to
- reallocation defines the boundary between untouched bytes and those
- that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</p></dd><dt><span class="term"><code class="constant">ALLOCM_NO_MOVE</code></span></dt><dd><p>For reallocation, fail rather than moving the
- object. This constraint can apply to both growth and
- shrinkage.</p></dd><dt><span class="term"><code class="constant">ALLOCM_ARENA(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Use the arena specified by the index
- <em class="parameter"><code>a</code></em> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <em class="parameter"><code>a</code></em>
- specifies an arena index in the valid range.</p></dd></dl></div><p>
- </p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function allocates at
- least <em class="parameter"><code>size</code></em> bytes of memory, sets
- <em class="parameter"><code>*ptr</code></em> to the base address of the allocation, and
- sets <em class="parameter"><code>*rsize</code></em> to the real size of the allocation if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>. Behavior
- is undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or
- if request size overflows due to size class and/or alignment
- constraints.</p><p>The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function resizes the
- allocation at <em class="parameter"><code>*ptr</code></em> to be at least
- <em class="parameter"><code>size</code></em> bytes, sets <em class="parameter"><code>*ptr</code></em> to
- the base address of the allocation if it moved, and sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>. If
- <em class="parameter"><code>extra</code></em> is non-zero, an attempt is made to resize
- the allocation to be at least <code class="code">(<em class="parameter"><code>size</code></em> +
- <em class="parameter"><code>extra</code></em>)</code> bytes, though inability to allocate
- the extra byte(s) will not by itself result in failure. Behavior is
- undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, if
- request size overflows due to size class and/or alignment constraints, or
- if <code class="code">(<em class="parameter"><code>size</code></em> +
- <em class="parameter"><code>extra</code></em> &gt;
- <code class="constant">SIZE_T_MAX</code>)</code>.</p><p>The <code class="function">sallocm</code>(<em class="parameter"><code></code></em>) function sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation.</p><p>The <code class="function">dallocm</code>(<em class="parameter"><code></code></em>) function causes the
- memory referenced by <em class="parameter"><code>ptr</code></em> to be made available for
- future allocations.</p><p>The <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) function allocates no
- memory, but it performs the same size computation as the
- <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function, and if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code> it sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation that
- would result from the equivalent <code class="function">allocm</code>(<em class="parameter"><code></code></em>)
- function call. Behavior is undefined if <em class="parameter"><code>size</code></em> is
- <code class="constant">0</code>, or if request size overflows due to size class
- and/or alignment constraints.</p></div></div><div class="refsect1"><a name="tuning"></a><h2>TUNING</h2><p>Once, when the first call is made to one of the memory allocation
+ </p></div></div><div class="refsect1"><a name="tuning"></a><h2>TUNING</h2><p>Once, when the first call is made to one of the memory allocation
routines, the allocator initializes its internals based in part on various
options that can be specified at compile- or run-time.</p><p>The string pointed to by the global variable
<code class="varname">malloc_conf</code>, the &#8220;name&#8221; of the file
@@ -272,8 +226,9 @@ for (i = 0; i &lt; nbins; i++) {
<span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> to obtain memory, which is
suboptimal for several reasons, including race conditions, increased
fragmentation, and artificial limitations on maximum usable memory. If
- <code class="option">--enable-dss</code> is specified during configuration, this
- allocator uses both <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> and
+ <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> is supported by the operating
+ system, this allocator uses both
+ <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> and
<span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>, in that order of preference;
otherwise only <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> is used.</p><p>This allocator uses multiple arenas in order to reduce lock
contention for threaded programs on multi-processor systems. This works
@@ -295,34 +250,52 @@ for (i = 0; i &lt; nbins; i++) {
chunk size is a power of two that is greater than the page size. Chunks
are always aligned to multiples of the chunk size. This alignment makes it
possible to find metadata for user objects very quickly.</p><p>User objects are broken into three categories according to size:
- small, large, and huge. Small objects are smaller than one page. Large
- objects are smaller than the chunk size. Huge objects are a multiple of
- the chunk size. Small and large objects are managed by arenas; huge
- objects are managed separately in a single data structure that is shared by
- all threads. Huge objects are used by applications infrequently enough
- that this single data structure is not a scalability issue.</p><p>Each chunk that is managed by an arena tracks its contents as runs of
+ small, large, and huge. Small and large objects are managed entirely by
+ arenas; huge objects are additionally aggregated in a single data structure
+ that is shared by all threads. Huge objects are typically used by
+ applications infrequently enough that this single data structure is not a
+ scalability issue.</p><p>Each chunk that is managed by an arena tracks its contents as runs of
contiguous pages (unused, backing a set of small objects, or backing one
large object). The combination of chunk alignment and chunk page maps
makes it possible to determine all metadata regarding small and large
allocations in constant time.</p><p>Small objects are managed in groups by page runs. Each run maintains
- a frontier and free list to track which regions are in use. Allocation
- requests that are no more than half the quantum (8 or 16, depending on
- architecture) are rounded up to the nearest power of two that is at least
- <code class="code">sizeof(<span class="type">double</span>)</code>. All other small
- object size classes are multiples of the quantum, spaced such that internal
- fragmentation is limited to approximately 25% for all but the smallest size
- classes. Allocation requests that are larger than the maximum small size
- class, but small enough to fit in an arena-managed chunk (see the <a class="link" href="#opt.lg_chunk">
+ a bitmap to track which regions are in use. Allocation requests that are no
+ more than half the quantum (8 or 16, depending on architecture) are rounded
+ up to the nearest power of two that is at least <code class="code">sizeof(<span class="type">double</span>)</code>. All other object size
+ classes are multiples of the quantum, spaced such that there are four size
+ classes for each doubling in size, which limits internal fragmentation to
+ approximately 20% for all but the smallest size classes. Small size classes
+ are smaller than four times the page size, large size classes are smaller
+ than the chunk size (see the <a class="link" href="#opt.lg_chunk">
"<code class="mallctl">opt.lg_chunk</code>"
- </a> option), are
- rounded up to the nearest run size. Allocation requests that are too large
- to fit in an arena-managed chunk are rounded up to the nearest multiple of
- the chunk size.</p><p>Allocations are packed tightly together, which can be an issue for
+ </a> option), and
+ huge size classes extend from the chunk size up to one size class less than
+ the full address space size.</p><p>Allocations are packed tightly together, which can be an issue for
multi-threaded applications. If you need to assure that allocations do not
suffer from cacheline sharing, round your allocation requests up to the
nearest multiple of the cacheline size, or specify cacheline alignment when
- allocating.</p><p>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
- system, the size classes in each category are as shown in <a class="xref" href="#size_classes" title="Table 1. Size classes">Table 1</a>.</p><div class="table"><a name="size_classes"></a><p class="title"><b>Table 1. Size classes</b></p><div class="table-contents"><table summary="Size classes" border="1"><colgroup><col align="left" class="c1"><col align="right" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Category</th><th align="right">Spacing</th><th align="left">Size</th></tr></thead><tbody><tr><td rowspan="7" align="left">Small</td><td align="right">lg</td><td align="left">[8]</td></tr><tr><td align="right">16</td><td align="left">[16, 32, 48, ..., 128]</td></tr><tr><td align="right">32</td><td align="left">[160, 192, 224, 256]</td></tr><tr><td align="right">64</td><td align="left">[320, 384, 448, 512]</td></tr><tr><td align="right">128</td><td align="left">[640, 768, 896, 1024]</td></tr><tr><td align="right">256</td><td align="left">[1280, 1536, 1792, 2048]</td></tr><tr><td align="right">512</td><td align="left">[2560, 3072, 3584]</td></tr><tr><td align="left">Large</td><td align="right">4 KiB</td><td align="left">[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</td></tr><tr><td align="left">Huge</td><td align="right">4 MiB</td><td align="left">[4 MiB, 8 MiB, 12 MiB, ...]</td></tr></tbody></table></div></div><br class="table-break"></div><div class="refsect1"><a name="mallctl_namespace"></a><h2>MALLCTL NAMESPACE</h2><p>The following names are defined in the namespace accessible via the
+ allocating.</p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>),
+ <code class="function">rallocx</code>(<em class="parameter"><code></code></em>), and
+ <code class="function">xallocx</code>(<em class="parameter"><code></code></em>) functions may resize allocations
+ without moving them under limited circumstances. Unlike the
+ <code class="function">*allocx</code>(<em class="parameter"><code></code></em>) API, the standard API does not
+ officially round up the usable size of an allocation to the nearest size
+ class, so technically it is necessary to call
+ <code class="function">realloc</code>(<em class="parameter"><code></code></em>) to grow e.g. a 9-byte allocation to
+ 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage
+ trivially succeeds in place as long as the pre-size and post-size both round
+ up to the same size class. No other API guarantees are made regarding
+ in-place resizing, but the current implementation also tries to resize large
+ and huge allocations in place, as long as the pre-size and post-size are
+ both large or both huge. In such cases shrinkage always succeeds for large
+ size classes, but for huge size classes the chunk allocator must support
+ splitting (see <a class="link" href="#arena.i.chunk_hooks">
+ "<code class="mallctl">arena.&lt;i&gt;.chunk_hooks</code>"
+ </a>).
+ Growth only succeeds if the trailing memory is currently available, and
+ additionally for huge size classes the chunk allocator must support
+ merging.</p><p>Assuming 2 MiB chunks, 4 KiB pages, and a 16-byte quantum on a
+ 64-bit system, the size classes in each category are as shown in <a class="xref" href="#size_classes" title="Table 1. Size classes">Table 1</a>.</p><div class="table"><a name="size_classes"></a><p class="title"><b>Table 1. Size classes</b></p><div class="table-contents"><table summary="Size classes" border="1"><colgroup><col align="left" class="c1"><col align="right" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Category</th><th align="right">Spacing</th><th align="left">Size</th></tr></thead><tbody><tr><td rowspan="9" align="left">Small</td><td align="right">lg</td><td align="left">[8]</td></tr><tr><td align="right">16</td><td align="left">[16, 32, 48, 64, 80, 96, 112, 128]</td></tr><tr><td align="right">32</td><td align="left">[160, 192, 224, 256]</td></tr><tr><td align="right">64</td><td align="left">[320, 384, 448, 512]</td></tr><tr><td align="right">128</td><td align="left">[640, 768, 896, 1024]</td></tr><tr><td align="right">256</td><td align="left">[1280, 1536, 1792, 2048]</td></tr><tr><td align="right">512</td><td align="left">[2560, 3072, 3584, 4096]</td></tr><tr><td align="right">1 KiB</td><td align="left">[5 KiB, 6 KiB, 7 KiB, 8 KiB]</td></tr><tr><td align="right">2 KiB</td><td align="left">[10 KiB, 12 KiB, 14 KiB]</td></tr><tr><td rowspan="8" align="left">Large</td><td align="right">2 KiB</td><td align="left">[16 KiB]</td></tr><tr><td align="right">4 KiB</td><td align="left">[20 KiB, 24 KiB, 28 KiB, 32 KiB]</td></tr><tr><td align="right">8 KiB</td><td align="left">[40 KiB, 48 KiB, 54 KiB, 64 KiB]</td></tr><tr><td align="right">16 KiB</td><td align="left">[80 KiB, 96 KiB, 112 KiB, 128 KiB]</td></tr><tr><td align="right">32 KiB</td><td align="left">[160 KiB, 192 KiB, 224 KiB, 256 KiB]</td></tr><tr><td align="right">64 KiB</td><td align="left">[320 KiB, 384 KiB, 448 KiB, 512 KiB]</td></tr><tr><td align="right">128 KiB</td><td align="left">[640 KiB, 768 KiB, 896 KiB, 1 MiB]</td></tr><tr><td align="right">256 KiB</td><td align="left">[1280 KiB, 1536 KiB, 1792 KiB]</td></tr><tr><td rowspan="7" align="left">Huge</td><td align="right">256 KiB</td><td align="left">[2 MiB]</td></tr><tr><td align="right">512 KiB</td><td align="left">[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]</td></tr><tr><td align="right">1 MiB</td><td align="left">[5 MiB, 6 MiB, 7 MiB, 8 MiB]</td></tr><tr><td align="right">2 MiB</td><td align="left">[10 MiB, 12 MiB, 14 MiB, 16 MiB]</td></tr><tr><td align="right">4 MiB</td><td align="left">[20 MiB, 24 MiB, 28 MiB, 32 MiB]</td></tr><tr><td align="right">8 MiB</td><td align="left">[40 MiB, 48 MiB, 56 MiB, 64 MiB]</td></tr><tr><td align="right">...</td><td align="left">...</td></tr></tbody></table></div></div><br class="table-break"></div><div class="refsect1"><a name="mallctl_namespace"></a><h2>MALLCTL NAMESPACE</h2><p>The following names are defined in the namespace accessible via the
<code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions. Value types are
specified in parentheses, their readable/writable statuses are encoded as
<code class="literal">rw</code>, <code class="literal">r-</code>, <code class="literal">-w</code>, or
@@ -355,20 +328,20 @@ for (i = 0; i &lt; nbins; i++) {
</span></dt><dd><p>If a value is passed in, refresh the data from which
the <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions report values,
and increment the epoch. Return the current epoch. This is useful for
- detecting whether another thread caused a refresh.</p></dd><dt><a name="config.debug"></a><span class="term">
+ detecting whether another thread caused a refresh.</p></dd><dt><a name="config.cache_oblivious"></a><span class="term">
- "<code class="mallctl">config.debug</code>"
+ "<code class="mallctl">config.cache_oblivious</code>"
(<span class="type">bool</span>)
<code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-debug</code> was specified during
- build configuration.</p></dd><dt><a name="config.dss"></a><span class="term">
+ </span></dt><dd><p><code class="option">--enable-cache-oblivious</code> was specified
+ during build configuration.</p></dd><dt><a name="config.debug"></a><span class="term">
- "<code class="mallctl">config.dss</code>"
+ "<code class="mallctl">config.debug</code>"
(<span class="type">bool</span>)
<code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-dss</code> was specified during
+ </span></dt><dd><p><code class="option">--enable-debug</code> was specified during
build configuration.</p></dd><dt><a name="config.fill"></a><span class="term">
"<code class="mallctl">config.fill</code>"
@@ -383,14 +356,7 @@ for (i = 0; i &lt; nbins; i++) {
(<span class="type">bool</span>)
<code class="literal">r-</code>
</span></dt><dd><p><code class="option">--enable-lazy-lock</code> was specified
- during build configuration.</p></dd><dt><a name="config.mremap"></a><span class="term">
-
- "<code class="mallctl">config.mremap</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-mremap</code> was specified during
- build configuration.</p></dd><dt><a name="config.munmap"></a><span class="term">
+ during build configuration.</p></dd><dt><a name="config.munmap"></a><span class="term">
"<code class="mallctl">config.munmap</code>"
@@ -479,12 +445,13 @@ for (i = 0; i &lt; nbins; i++) {
<code class="literal">r-</code>
</span></dt><dd><p>dss (<span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>) allocation precedence as
related to <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> allocation. The following
- settings are supported: &#8220;disabled&#8221;, &#8220;primary&#8221;,
- and &#8220;secondary&#8221;. The default is &#8220;secondary&#8221; if
- <a class="link" href="#config.dss">
- "<code class="mallctl">config.dss</code>"
- </a> is
- true, &#8220;disabled&#8221; otherwise.
+ settings are supported if
+ <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> is supported by the operating
+ system: &#8220;disabled&#8221;, &#8220;primary&#8221;, and
+ &#8220;secondary&#8221;; otherwise only &#8220;disabled&#8221; is
+ supported. The default is &#8220;secondary&#8221; if
+ <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> is supported by the operating
+ system; &#8220;disabled&#8221; otherwise.
</p></dd><dt><a name="opt.lg_chunk"></a><span class="term">
"<code class="mallctl">opt.lg_chunk</code>"
@@ -494,7 +461,7 @@ for (i = 0; i &lt; nbins; i++) {
</span></dt><dd><p>Virtual memory chunk size (log base 2). If a chunk
size outside the supported size range is specified, the size is
silently clipped to the minimum/maximum supported size. The default
- chunk size is 4 MiB (2^22).
+ chunk size is 2 MiB (2^21).
</p></dd><dt><a name="opt.narenas"></a><span class="term">
"<code class="mallctl">opt.narenas</code>"
@@ -517,7 +484,13 @@ for (i = 0; i &lt; nbins; i++) {
provides the kernel with sufficient information to recycle dirty pages
if physical memory becomes scarce and the pages remain unused. The
default minimum ratio is 8:1 (2^3:1); an option value of -1 will
- disable dirty page purging.</p></dd><dt><a name="opt.stats_print"></a><span class="term">
+ disable dirty page purging. See <a class="link" href="#arenas.lg_dirty_mult">
+ "<code class="mallctl">arenas.lg_dirty_mult</code>"
+ </a>
+ and <a class="link" href="#arena.i.lg_dirty_mult">
+ "<code class="mallctl">arena.&lt;i&gt;.lg_dirty_mult</code>"
+ </a>
+ for related dynamic control options.</p></dd><dt><a name="opt.stats_print"></a><span class="term">
"<code class="mallctl">opt.stats_print</code>"
@@ -530,23 +503,31 @@ for (i = 0; i &lt; nbins; i++) {
<code class="option">--enable-stats</code> is specified during configuration, this
has the potential to cause deadlock for a multi-threaded process that
exits while one or more threads are executing in the memory allocation
- functions. Therefore, this option should only be used with care; it is
- primarily intended as a performance tuning aid during application
+ functions. Furthermore, <code class="function">atexit</code>(<em class="parameter"><code></code></em>) may
+ allocate memory during application initialization and then deadlock
+ internally when jemalloc in turn calls
+ <code class="function">atexit</code>(<em class="parameter"><code></code></em>), so this option is not
+ univerally usable (though the application can register its own
+ <code class="function">atexit</code>(<em class="parameter"><code></code></em>) function with equivalent
+ functionality). Therefore, this option should only be used with care;
+ it is primarily intended as a performance tuning aid during application
development. This option is disabled by default.</p></dd><dt><a name="opt.junk"></a><span class="term">
"<code class="mallctl">opt.junk</code>"
- (<span class="type">bool</span>)
+ (<span class="type">const char *</span>)
<code class="literal">r-</code>
[<code class="option">--enable-fill</code>]
- </span></dt><dd><p>Junk filling enabled/disabled. If enabled, each byte
- of uninitialized allocated memory will be initialized to
- <code class="literal">0xa5</code>. All deallocated memory will be initialized to
- <code class="literal">0x5a</code>. This is intended for debugging and will
- impact performance negatively. This option is disabled by default
- unless <code class="option">--enable-debug</code> is specified during
- configuration, in which case it is enabled by default unless running
- inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>.</p></dd><dt><a name="opt.quarantine"></a><span class="term">
+ </span></dt><dd><p>Junk filling. If set to "alloc", each byte of
+ uninitialized allocated memory will be initialized to
+ <code class="literal">0xa5</code>. If set to "free", all deallocated memory will
+ be initialized to <code class="literal">0x5a</code>. If set to "true", both
+ allocated and deallocated memory will be initialized, and if set to
+ "false", junk filling be disabled entirely. This is intended for
+ debugging and will impact performance negatively. This option is
+ "false" by default unless <code class="option">--enable-debug</code> is specified
+ during configuration, in which case it is "true" by default unless
+ running inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>.</p></dd><dt><a name="opt.quarantine"></a><span class="term">
"<code class="mallctl">opt.quarantine</code>"
@@ -592,9 +573,8 @@ for (i = 0; i &lt; nbins; i++) {
</span></dt><dd><p>Zero filling enabled/disabled. If enabled, each byte
of uninitialized allocated memory will be initialized to 0. Note that
this initialization only happens once for each byte, so
- <code class="function">realloc</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocx</code>(<em class="parameter"><code></code></em>) and
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) calls do not zero memory that
+ <code class="function">realloc</code>(<em class="parameter"><code></code></em>) and
+ <code class="function">rallocx</code>(<em class="parameter"><code></code></em>) calls do not zero memory that
was previously allocated. This is intended for debugging and will
impact performance negatively. This option is disabled by default.
</p></dd><dt><a name="opt.utrace"></a><span class="term">
@@ -606,17 +586,7 @@ for (i = 0; i &lt; nbins; i++) {
[<code class="option">--enable-utrace</code>]
</span></dt><dd><p>Allocation tracing based on
<span class="citerefentry"><span class="refentrytitle">utrace</span>(2)</span> enabled/disabled. This option
- is disabled by default.</p></dd><dt><a name="opt.valgrind"></a><span class="term">
-
- "<code class="mallctl">opt.valgrind</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-valgrind</code>]
- </span></dt><dd><p><a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>
- support enabled/disabled. This option is vestigal because jemalloc
- auto-detects whether it is running inside Valgrind. This option is
- disabled by default, unless running inside Valgrind.</p></dd><dt><a name="opt.xmalloc"></a><span class="term">
+ is disabled by default.</p></dd><dt><a name="opt.xmalloc"></a><span class="term">
"<code class="mallctl">opt.xmalloc</code>"
@@ -639,16 +609,16 @@ malloc_conf = "xmalloc:true";</pre><p>
(<span class="type">bool</span>)
<code class="literal">r-</code>
[<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Thread-specific caching enabled/disabled. When there
- are multiple threads, each thread uses a thread-specific cache for
- objects up to a certain size. Thread-specific caching allows many
- allocations to be satisfied without performing any thread
- synchronization, at the cost of increased memory use. See the
- <a class="link" href="#opt.lg_tcache_max">
+ </span></dt><dd><p>Thread-specific caching (tcache) enabled/disabled. When
+ there are multiple threads, each thread uses a tcache for objects up to
+ a certain size. Thread-specific caching allows many allocations to be
+ satisfied without performing any thread synchronization, at the cost of
+ increased memory use. See the <a class="link" href="#opt.lg_tcache_max">
"<code class="mallctl">opt.lg_tcache_max</code>"
</a>
option for related tuning information. This option is enabled by
- default unless running inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>.</p></dd><dt><a name="opt.lg_tcache_max"></a><span class="term">
+ default unless running inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>, in which case it is
+ forcefully disabled.</p></dd><dt><a name="opt.lg_tcache_max"></a><span class="term">
"<code class="mallctl">opt.lg_tcache_max</code>"
@@ -656,8 +626,8 @@ malloc_conf = "xmalloc:true";</pre><p>
<code class="literal">r-</code>
[<code class="option">--enable-tcache</code>]
</span></dt><dd><p>Maximum size class (log base 2) to cache in the
- thread-specific cache. At a minimum, all small size classes are
- cached, and at a maximum all large size classes are cached. The
+ thread-specific cache (tcache). At a minimum, all small size classes
+ are cached, and at a maximum all large size classes are cached. The
default maximum is 32 KiB (2^15).</p></dd><dt><a name="opt.prof"></a><span class="term">
"<code class="mallctl">opt.prof</code>"
@@ -686,8 +656,8 @@ malloc_conf = "xmalloc:true";</pre><p>
"<code class="mallctl">opt.prof_final</code>"
</a>
option for final profile dumping. Profile output is compatible with
- the included <span class="command"><strong>pprof</strong></span> Perl script, which originates
- from the <a class="ulink" href="http://code.google.com/p/gperftools/" target="_top">gperftools
+ the <span class="command"><strong>jeprof</strong></span> command, which is based on the
+ <span class="command"><strong>pprof</strong></span> that is developed as part of the <a class="ulink" href="http://code.google.com/p/gperftools/" target="_top">gperftools
package</a>.</p></dd><dt><a name="opt.prof_prefix"></a><span class="term">
"<code class="mallctl">opt.prof_prefix</code>"
@@ -704,7 +674,7 @@ malloc_conf = "xmalloc:true";</pre><p>
"<code class="mallctl">opt.prof_active</code>"
(<span class="type">bool</span>)
- <code class="literal">rw</code>
+ <code class="literal">r-</code>
[<code class="option">--enable-prof</code>]
</span></dt><dd><p>Profiling activated/deactivated. This is a secondary
control mechanism that makes it possible to start the application with
@@ -715,11 +685,25 @@ malloc_conf = "xmalloc:true";</pre><p>
with the <a class="link" href="#prof.active">
"<code class="mallctl">prof.active</code>"
</a> mallctl.
- This option is enabled by default.</p></dd><dt><a name="opt.lg_prof_sample"></a><span class="term">
+ This option is enabled by default.</p></dd><dt><a name="opt.prof_thread_active_init"></a><span class="term">
+
+ "<code class="mallctl">opt.prof_thread_active_init</code>"
+
+ (<span class="type">bool</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Initial setting for <a class="link" href="#thread.prof.active">
+ "<code class="mallctl">thread.prof.active</code>"
+ </a>
+ in newly created threads. The initial setting for newly created threads
+ can also be changed during execution via the <a class="link" href="#prof.thread_active_init">
+ "<code class="mallctl">prof.thread_active_init</code>"
+ </a>
+ mallctl. This option is enabled by default.</p></dd><dt><a name="opt.lg_prof_sample"></a><span class="term">
"<code class="mallctl">opt.lg_prof_sample</code>"
- (<span class="type">ssize_t</span>)
+ (<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-prof</code>]
</span></dt><dd><p>Average interval (log base 2) between allocation
@@ -764,14 +748,12 @@ malloc_conf = "xmalloc:true";</pre><p>
(<span class="type">bool</span>)
<code class="literal">r-</code>
[<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Trigger a memory profile dump every time the total
- virtual memory exceeds the previous maximum. Profiles are dumped to
- files named according to the pattern
- <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</code>,
- where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
- "<code class="mallctl">opt.prof_prefix</code>"
- </a>
- option. This option is disabled by default.</p></dd><dt><a name="opt.prof_final"></a><span class="term">
+ </span></dt><dd><p>Set the initial state of <a class="link" href="#prof.gdump">
+ "<code class="mallctl">prof.gdump</code>"
+ </a>, which when
+ enabled triggers a memory profile dump every time the total virtual
+ memory exceeds the previous maximum. This option is disabled by
+ default.</p></dd><dt><a name="opt.prof_final"></a><span class="term">
"<code class="mallctl">opt.prof_final</code>"
@@ -785,7 +767,13 @@ malloc_conf = "xmalloc:true";</pre><p>
where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
"<code class="mallctl">opt.prof_prefix</code>"
</a>
- option. This option is enabled by default.</p></dd><dt><a name="opt.prof_leak"></a><span class="term">
+ option. Note that <code class="function">atexit</code>(<em class="parameter"><code></code></em>) may allocate
+ memory during application initialization and then deadlock internally
+ when jemalloc in turn calls <code class="function">atexit</code>(<em class="parameter"><code></code></em>), so
+ this option is not univerally usable (though the application can
+ register its own <code class="function">atexit</code>(<em class="parameter"><code></code></em>) function with
+ equivalent functionality). This option is disabled by
+ default.</p></dd><dt><a name="opt.prof_leak"></a><span class="term">
"<code class="mallctl">opt.prof_leak</code>"
@@ -864,9 +852,9 @@ malloc_conf = "xmalloc:true";</pre><p>
[<code class="option">--enable-tcache</code>]
</span></dt><dd><p>Enable/disable calling thread's tcache. The tcache is
implicitly flushed as a side effect of becoming
- disabled (see
+ disabled (see <a class="link" href="#thread.tcache.flush">
"<code class="mallctl">thread.tcache.flush</code>"
- ).
+ </a>).
</p></dd><dt><a name="thread.tcache.flush"></a><span class="term">
"<code class="mallctl">thread.tcache.flush</code>"
@@ -874,19 +862,84 @@ malloc_conf = "xmalloc:true";</pre><p>
(<span class="type">void</span>)
<code class="literal">--</code>
[<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Flush calling thread's tcache. This interface releases
- all cached objects and internal data structures associated with the
- calling thread's thread-specific cache. Ordinarily, this interface
+ </span></dt><dd><p>Flush calling thread's thread-specific cache (tcache).
+ This interface releases all cached objects and internal data structures
+ associated with the calling thread's tcache. Ordinarily, this interface
need not be called, since automatic periodic incremental garbage
collection occurs, and the thread cache is automatically discarded when
a thread exits. However, garbage collection is triggered by allocation
activity, so it is possible for a thread that stops
allocating/deallocating to retain its cache indefinitely, in which case
- the developer may find manual flushing useful.</p></dd><dt><a name="arena.i.purge"></a><span class="term">
+ the developer may find manual flushing useful.</p></dd><dt><a name="thread.prof.name"></a><span class="term">
- "<code class="mallctl">arena.&lt;i&gt;.purge</code>"
+ "<code class="mallctl">thread.prof.name</code>"
+
+ (<span class="type">const char *</span>)
+ <code class="literal">r-</code> or
+ <code class="literal">-w</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Get/set the descriptive name associated with the calling
+ thread in memory profile dumps. An internal copy of the name string is
+ created, so the input string need not be maintained after this interface
+ completes execution. The output string of this interface should be
+ copied for non-ephemeral uses, because multiple implementation details
+ can cause asynchronous string deallocation. Furthermore, each
+ invocation of this interface can only read or write; simultaneous
+ read/write is not supported due to string lifetime limitations. The
+ name string must nil-terminated and comprised only of characters in the
+ sets recognized
+ by <span class="citerefentry"><span class="refentrytitle">isgraph</span>(3)</span> and
+ <span class="citerefentry"><span class="refentrytitle">isblank</span>(3)</span>.</p></dd><dt><a name="thread.prof.active"></a><span class="term">
+
+ "<code class="mallctl">thread.prof.active</code>"
+
+ (<span class="type">bool</span>)
+ <code class="literal">rw</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Control whether sampling is currently active for the
+ calling thread. This is an activation mechanism in addition to <a class="link" href="#prof.active">
+ "<code class="mallctl">prof.active</code>"
+ </a>; both must
+ be active for the calling thread to sample. This flag is enabled by
+ default.</p></dd><dt><a name="tcache.create"></a><span class="term">
+
+ "<code class="mallctl">tcache.create</code>"
+
+ (<span class="type">unsigned</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-tcache</code>]
+ </span></dt><dd><p>Create an explicit thread-specific cache (tcache) and
+ return an identifier that can be passed to the <a class="link" href="#MALLOCX_TCACHE"><code class="constant">MALLOCX_TCACHE(<em class="parameter"><code>tc</code></em>)</code></a>
+ macro to explicitly use the specified cache rather than the
+ automatically managed one that is used by default. Each explicit cache
+ can be used by only one thread at a time; the application must assure
+ that this constraint holds.
+ </p></dd><dt><a name="tcache.flush"></a><span class="term">
+
+ "<code class="mallctl">tcache.flush</code>"
+
+ (<span class="type">unsigned</span>)
+ <code class="literal">-w</code>
+ [<code class="option">--enable-tcache</code>]
+ </span></dt><dd><p>Flush the specified thread-specific cache (tcache). The
+ same considerations apply to this interface as to <a class="link" href="#thread.tcache.flush">
+ "<code class="mallctl">thread.tcache.flush</code>"
+ </a>,
+ except that the tcache will never be automatically be discarded.
+ </p></dd><dt><a name="tcache.destroy"></a><span class="term">
+
+ "<code class="mallctl">tcache.destroy</code>"
(<span class="type">unsigned</span>)
+ <code class="literal">-w</code>
+ [<code class="option">--enable-tcache</code>]
+ </span></dt><dd><p>Flush the specified thread-specific cache (tcache) and
+ make the identifier available for use during a future tcache creation.
+ </p></dd><dt><a name="arena.i.purge"></a><span class="term">
+
+ "<code class="mallctl">arena.&lt;i&gt;.purge</code>"
+
+ (<span class="type">void</span>)
<code class="literal">--</code>
</span></dt><dd><p>Purge unused dirty pages for arena &lt;i&gt;, or for
all arenas if &lt;i&gt; equals <a class="link" href="#arenas.narenas">
@@ -902,15 +955,138 @@ malloc_conf = "xmalloc:true";</pre><p>
allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
<a class="link" href="#arenas.narenas">
"<code class="mallctl">arenas.narenas</code>"
- </a>. Note
- that even during huge allocation this setting is read from the arena
- that would be chosen for small or large allocation so that applications
- can depend on consistent dss versus mmap allocation regardless of
- allocation size. See <a class="link" href="#opt.dss">
+ </a>. See
+ <a class="link" href="#opt.dss">
"<code class="mallctl">opt.dss</code>"
</a> for supported
- settings.
- </p></dd><dt><a name="arenas.narenas"></a><span class="term">
+ settings.</p></dd><dt><a name="arena.i.lg_dirty_mult"></a><span class="term">
+
+ "<code class="mallctl">arena.&lt;i&gt;.lg_dirty_mult</code>"
+
+ (<span class="type">ssize_t</span>)
+ <code class="literal">rw</code>
+ </span></dt><dd><p>Current per-arena minimum ratio (log base 2) of active
+ to dirty pages for arena &lt;i&gt;. Each time this interface is set and
+ the ratio is increased, pages are synchronously purged as necessary to
+ impose the new ratio. See <a class="link" href="#opt.lg_dirty_mult">
+ "<code class="mallctl">opt.lg_dirty_mult</code>"
+ </a>
+ for additional information.</p></dd><dt><a name="arena.i.chunk_hooks"></a><span class="term">
+
+ "<code class="mallctl">arena.&lt;i&gt;.chunk_hooks</code>"
+
+ (<span class="type">chunk_hooks_t</span>)
+ <code class="literal">rw</code>
+ </span></dt><dd><p>Get or set the chunk management hook functions for arena
+ &lt;i&gt;. The functions must be capable of operating on all extant
+ chunks associated with arena &lt;i&gt;, usually by passing unknown
+ chunks to the replaced functions. In practice, it is feasible to
+ control allocation for arenas created via <a class="link" href="#arenas.extend">
+ "<code class="mallctl">arenas.extend</code>"
+ </a> such
+ that all chunks originate from an application-supplied chunk allocator
+ (by setting custom chunk hook functions just after arena creation), but
+ the automatically created arenas may have already created chunks prior
+ to the application having an opportunity to take over chunk
+ allocation.</p><pre class="programlisting">
+typedef struct {
+ chunk_alloc_t *alloc;
+ chunk_dalloc_t *dalloc;
+ chunk_commit_t *commit;
+ chunk_decommit_t *decommit;
+ chunk_purge_t *purge;
+ chunk_split_t *split;
+ chunk_merge_t *merge;
+} chunk_hooks_t;</pre><p>The <span class="type">chunk_hooks_t</span> structure comprises function
+ pointers which are described individually below. jemalloc uses these
+ functions to manage chunk lifetime, which starts off with allocation of
+ mapped committed memory, in the simplest case followed by deallocation.
+ However, there are performance and platform reasons to retain chunks for
+ later reuse. Cleanup attempts cascade from deallocation to decommit to
+ purging, which gives the chunk management functions opportunities to
+ reject the most permanent cleanup operations in favor of less permanent
+ (and often less costly) operations. The chunk splitting and merging
+ operations can also be opted out of, but this is mainly intended to
+ support platforms on which virtual memory mappings provided by the
+ operating system kernel do not automatically coalesce and split, e.g.
+ Windows.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef void *<b class="fsfunc">(chunk_alloc_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>bool *<var class="pdparam">zero</var>, </td></tr><tr><td> </td><td>bool *<var class="pdparam">commit</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk allocation function conforms to the
+ <span class="type">chunk_alloc_t</span> type and upon success returns a pointer to
+ <em class="parameter"><code>size</code></em> bytes of mapped memory on behalf of arena
+ <em class="parameter"><code>arena_ind</code></em> such that the chunk's base address is a
+ multiple of <em class="parameter"><code>alignment</code></em>, as well as setting
+ <em class="parameter"><code>*zero</code></em> to indicate whether the chunk is zeroed and
+ <em class="parameter"><code>*commit</code></em> to indicate whether the chunk is
+ committed. Upon error the function returns <code class="constant">NULL</code>
+ and leaves <em class="parameter"><code>*zero</code></em> and
+ <em class="parameter"><code>*commit</code></em> unmodified. The
+ <em class="parameter"><code>size</code></em> parameter is always a multiple of the chunk
+ size. The <em class="parameter"><code>alignment</code></em> parameter is always a power
+ of two at least as large as the chunk size. Zeroing is mandatory if
+ <em class="parameter"><code>*zero</code></em> is true upon function entry. Committing is
+ mandatory if <em class="parameter"><code>*commit</code></em> is true upon function entry.
+ If <em class="parameter"><code>chunk</code></em> is not <code class="constant">NULL</code>, the
+ returned pointer must be <em class="parameter"><code>chunk</code></em> on success or
+ <code class="constant">NULL</code> on error. Committed memory may be committed
+ in absolute terms as on a system that does not overcommit, or in
+ implicit terms as on a system that overcommits and satisfies physical
+ memory needs on demand via soft page faults. Note that replacing the
+ default chunk allocation function makes the arena's <a class="link" href="#arena.i.dss">
+ "<code class="mallctl">arena.&lt;i&gt;.dss</code>"
+ </a>
+ setting irrelevant.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_dalloc_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>bool <var class="pdparam">committed</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>
+ A chunk deallocation function conforms to the
+ <span class="type">chunk_dalloc_t</span> type and deallocates a
+ <em class="parameter"><code>chunk</code></em> of given <em class="parameter"><code>size</code></em> with
+ <em class="parameter"><code>committed</code></em>/decommited memory as indicated, on
+ behalf of arena <em class="parameter"><code>arena_ind</code></em>, returning false upon
+ success. If the function returns true, this indicates opt-out from
+ deallocation; the virtual memory mapping associated with the chunk
+ remains mapped, in the same commit state, and available for future use,
+ in which case it will be automatically retained for later reuse.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_commit_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">offset</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">length</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk commit function conforms to the
+ <span class="type">chunk_commit_t</span> type and commits zeroed physical memory to
+ back pages within a <em class="parameter"><code>chunk</code></em> of given
+ <em class="parameter"><code>size</code></em> at <em class="parameter"><code>offset</code></em> bytes,
+ extending for <em class="parameter"><code>length</code></em> on behalf of arena
+ <em class="parameter"><code>arena_ind</code></em>, returning false upon success.
+ Committed memory may be committed in absolute terms as on a system that
+ does not overcommit, or in implicit terms as on a system that
+ overcommits and satisfies physical memory needs on demand via soft page
+ faults. If the function returns true, this indicates insufficient
+ physical memory to satisfy the request.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_decommit_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">offset</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">length</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk decommit function conforms to the
+ <span class="type">chunk_decommit_t</span> type and decommits any physical memory
+ that is backing pages within a <em class="parameter"><code>chunk</code></em> of given
+ <em class="parameter"><code>size</code></em> at <em class="parameter"><code>offset</code></em> bytes,
+ extending for <em class="parameter"><code>length</code></em> on behalf of arena
+ <em class="parameter"><code>arena_ind</code></em>, returning false upon success, in which
+ case the pages will be committed via the chunk commit function before
+ being reused. If the function returns true, this indicates opt-out from
+ decommit; the memory remains committed and available for future use, in
+ which case it will be automatically retained for later reuse.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_purge_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t<var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">offset</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">length</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk purge function conforms to the <span class="type">chunk_purge_t</span>
+ type and optionally discards physical pages within the virtual memory
+ mapping associated with <em class="parameter"><code>chunk</code></em> of given
+ <em class="parameter"><code>size</code></em> at <em class="parameter"><code>offset</code></em> bytes,
+ extending for <em class="parameter"><code>length</code></em> on behalf of arena
+ <em class="parameter"><code>arena_ind</code></em>, returning false if pages within the
+ purged virtual memory range will be zero-filled the next time they are
+ accessed.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_split_t)</b>(</code></td><td>void *<var class="pdparam">chunk</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size_a</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size_b</var>, </td></tr><tr><td> </td><td>bool <var class="pdparam">committed</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk split function conforms to the <span class="type">chunk_split_t</span>
+ type and optionally splits <em class="parameter"><code>chunk</code></em> of given
+ <em class="parameter"><code>size</code></em> into two adjacent chunks, the first of
+ <em class="parameter"><code>size_a</code></em> bytes, and the second of
+ <em class="parameter"><code>size_b</code></em> bytes, operating on
+ <em class="parameter"><code>committed</code></em>/decommitted memory as indicated, on
+ behalf of arena <em class="parameter"><code>arena_ind</code></em>, returning false upon
+ success. If the function returns true, this indicates that the chunk
+ remains unsplit and therefore should continue to be operated on as a
+ whole.</p><div class="funcsynopsis"><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">typedef bool <b class="fsfunc">(chunk_merge_t)</b>(</code></td><td>void *<var class="pdparam">chunk_a</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size_a</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">chunk_b</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size_b</var>, </td></tr><tr><td> </td><td>bool <var class="pdparam">committed</var>, </td></tr><tr><td> </td><td>unsigned <var class="pdparam">arena_ind</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="literallayout"><p></p></div><p>A chunk merge function conforms to the <span class="type">chunk_merge_t</span>
+ type and optionally merges adjacent chunks,
+ <em class="parameter"><code>chunk_a</code></em> of given <em class="parameter"><code>size_a</code></em>
+ and <em class="parameter"><code>chunk_b</code></em> of given
+ <em class="parameter"><code>size_b</code></em> into one contiguous chunk, operating on
+ <em class="parameter"><code>committed</code></em>/decommitted memory as indicated, on
+ behalf of arena <em class="parameter"><code>arena_ind</code></em>, returning false upon
+ success. If the function returns true, this indicates that the chunks
+ remain distinct mappings and therefore should continue to be operated on
+ independently.</p></dd><dt><a name="arenas.narenas"></a><span class="term">
"<code class="mallctl">arenas.narenas</code>"
@@ -926,7 +1102,20 @@ malloc_conf = "xmalloc:true";</pre><p>
"<code class="mallctl">arenas.narenas</code>"
</a>
booleans. Each boolean indicates whether the corresponding arena is
- initialized.</p></dd><dt><a name="arenas.quantum"></a><span class="term">
+ initialized.</p></dd><dt><a name="arenas.lg_dirty_mult"></a><span class="term">
+
+ "<code class="mallctl">arenas.lg_dirty_mult</code>"
+
+ (<span class="type">ssize_t</span>)
+ <code class="literal">rw</code>
+ </span></dt><dd><p>Current default per-arena minimum ratio (log base 2) of
+ active to dirty pages, used to initialize <a class="link" href="#arena.i.lg_dirty_mult">
+ "<code class="mallctl">arena.&lt;i&gt;.lg_dirty_mult</code>"
+ </a>
+ during arena creation. See <a class="link" href="#opt.lg_dirty_mult">
+ "<code class="mallctl">opt.lg_dirty_mult</code>"
+ </a>
+ for additional information.</p></dd><dt><a name="arenas.quantum"></a><span class="term">
"<code class="mallctl">arenas.quantum</code>"
@@ -981,7 +1170,7 @@ malloc_conf = "xmalloc:true";</pre><p>
"<code class="mallctl">arenas.nlruns</code>"
- (<span class="type">size_t</span>)
+ (<span class="type">unsigned</span>)
<code class="literal">r-</code>
</span></dt><dd><p>Total number of large size classes.</p></dd><dt><a name="arenas.lrun.i.size"></a><span class="term">
@@ -990,21 +1179,40 @@ malloc_conf = "xmalloc:true";</pre><p>
(<span class="type">size_t</span>)
<code class="literal">r-</code>
</span></dt><dd><p>Maximum size supported by this large size
- class.</p></dd><dt><a name="arenas.purge"></a><span class="term">
+ class.</p></dd><dt><a name="arenas.nhchunks"></a><span class="term">
- "<code class="mallctl">arenas.purge</code>"
+ "<code class="mallctl">arenas.nhchunks</code>"
(<span class="type">unsigned</span>)
- <code class="literal">-w</code>
- </span></dt><dd><p>Purge unused dirty pages for the specified arena, or
- for all arenas if none is specified.</p></dd><dt><a name="arenas.extend"></a><span class="term">
+ <code class="literal">r-</code>
+ </span></dt><dd><p>Total number of huge size classes.</p></dd><dt><a name="arenas.hchunk.i.size"></a><span class="term">
+
+ "<code class="mallctl">arenas.hchunk.&lt;i&gt;.size</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ </span></dt><dd><p>Maximum size supported by this huge size
+ class.</p></dd><dt><a name="arenas.extend"></a><span class="term">
"<code class="mallctl">arenas.extend</code>"
(<span class="type">unsigned</span>)
<code class="literal">r-</code>
</span></dt><dd><p>Extend the array of arenas by appending a new arena,
- and returning the new arena index.</p></dd><dt><a name="prof.active"></a><span class="term">
+ and returning the new arena index.</p></dd><dt><a name="prof.thread_active_init"></a><span class="term">
+
+ "<code class="mallctl">prof.thread_active_init</code>"
+
+ (<span class="type">bool</span>)
+ <code class="literal">rw</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Control the initial setting for <a class="link" href="#thread.prof.active">
+ "<code class="mallctl">thread.prof.active</code>"
+ </a>
+ in newly created threads. See the <a class="link" href="#opt.prof_thread_active_init">
+ "<code class="mallctl">opt.prof_thread_active_init</code>"
+ </a>
+ option for additional information.</p></dd><dt><a name="prof.active"></a><span class="term">
"<code class="mallctl">prof.active</code>"
@@ -1015,8 +1223,10 @@ malloc_conf = "xmalloc:true";</pre><p>
<a class="link" href="#opt.prof_active">
"<code class="mallctl">opt.prof_active</code>"
</a>
- option for additional information.
- </p></dd><dt><a name="prof.dump"></a><span class="term">
+ option for additional information, as well as the interrelated <a class="link" href="#thread.prof.active">
+ "<code class="mallctl">thread.prof.active</code>"
+ </a>
+ mallctl.</p></dd><dt><a name="prof.dump"></a><span class="term">
"<code class="mallctl">prof.dump</code>"
@@ -1030,7 +1240,45 @@ malloc_conf = "xmalloc:true";</pre><p>
<a class="link" href="#opt.prof_prefix">
"<code class="mallctl">opt.prof_prefix</code>"
</a>
- option.</p></dd><dt><a name="prof.interval"></a><span class="term">
+ option.</p></dd><dt><a name="prof.gdump"></a><span class="term">
+
+ "<code class="mallctl">prof.gdump</code>"
+
+ (<span class="type">bool</span>)
+ <code class="literal">rw</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>When enabled, trigger a memory profile dump every time
+ the total virtual memory exceeds the previous maximum. Profiles are
+ dumped to files named according to the pattern
+ <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</code>,
+ where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
+ "<code class="mallctl">opt.prof_prefix</code>"
+ </a>
+ option.</p></dd><dt><a name="prof.reset"></a><span class="term">
+
+ "<code class="mallctl">prof.reset</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">-w</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Reset all memory profile statistics, and optionally
+ update the sample rate (see <a class="link" href="#opt.lg_prof_sample">
+ "<code class="mallctl">opt.lg_prof_sample</code>"
+ </a>
+ and <a class="link" href="#prof.lg_sample">
+ "<code class="mallctl">prof.lg_sample</code>"
+ </a>).
+ </p></dd><dt><a name="prof.lg_sample"></a><span class="term">
+
+ "<code class="mallctl">prof.lg_sample</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-prof</code>]
+ </span></dt><dd><p>Get the current sample rate (see <a class="link" href="#opt.lg_prof_sample">
+ "<code class="mallctl">opt.lg_prof_sample</code>"
+ </a>).
+ </p></dd><dt><a name="prof.interval"></a><span class="term">
"<code class="mallctl">prof.interval</code>"
@@ -1051,9 +1299,8 @@ malloc_conf = "xmalloc:true";</pre><p>
[<code class="option">--enable-stats</code>]
</span></dt><dd><p>Pointer to a counter that contains an approximate count
of the current number of bytes in active pages. The estimate may be
- high, but never low, because each arena rounds up to the nearest
- multiple of the chunk size when computing its contribution to the
- counter. Note that the <a class="link" href="#epoch">
+ high, but never low, because each arena rounds up when computing its
+ contribution to the counter. Note that the <a class="link" href="#epoch">
"<code class="mallctl">epoch</code>"
</a> mallctl has no bearing
on this counter. Furthermore, counter consistency is maintained via
@@ -1082,68 +1329,53 @@ malloc_conf = "xmalloc:true";</pre><p>
This does not include <a class="link" href="#stats.arenas.i.pdirty">
"<code class="mallctl">stats.arenas.&lt;i&gt;.pdirty</code>"
- </a> and pages
- entirely devoted to allocator metadata.</p></dd><dt><a name="stats.mapped"></a><span class="term">
-
- "<code class="mallctl">stats.mapped</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of bytes in chunks mapped on behalf of the
- application. This is a multiple of the chunk size, and is at least as
- large as <a class="link" href="#stats.active">
- "<code class="mallctl">stats.active</code>"
- </a>. This
- does not include inactive chunks.</p></dd><dt><a name="stats.chunks.current"></a><span class="term">
+ </a>, nor pages
+ entirely devoted to allocator metadata.</p></dd><dt><a name="stats.metadata"></a><span class="term">
- "<code class="mallctl">stats.chunks.current</code>"
+ "<code class="mallctl">stats.metadata</code>"
(<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of chunks actively mapped on behalf of the
- application. This does not include inactive chunks.
- </p></dd><dt><a name="stats.chunks.total"></a><span class="term">
-
- "<code class="mallctl">stats.chunks.total</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of chunks allocated.</p></dd><dt><a name="stats.chunks.high"></a><span class="term">
+ </span></dt><dd><p>Total number of bytes dedicated to metadata, which
+ comprise base allocations used for bootstrap-sensitive internal
+ allocator data structures, arena chunk headers (see <a class="link" href="#stats.arenas.i.metadata.mapped">
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.metadata.mapped</code>"
+ </a>),
+ and internal allocations (see <a class="link" href="#stats.arenas.i.metadata.allocated">
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.metadata.allocated</code>"
+ </a>).</p></dd><dt><a name="stats.resident"></a><span class="term">
- "<code class="mallctl">stats.chunks.high</code>"
+ "<code class="mallctl">stats.resident</code>"
(<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Maximum number of active chunks at any time thus far.
- </p></dd><dt><a name="stats.huge.allocated"></a><span class="term">
+ </span></dt><dd><p>Maximum number of bytes in physically resident data
+ pages mapped by the allocator, comprising all pages dedicated to
+ allocator metadata, pages backing active allocations, and unused dirty
+ pages. This is a maximum rather than precise because pages may not
+ actually be physically resident if they correspond to demand-zeroed
+ virtual memory that has not yet been touched. This is a multiple of the
+ page size, and is larger than <a class="link" href="#stats.active">
+ "<code class="mallctl">stats.active</code>"
+ </a>.</p></dd><dt><a name="stats.mapped"></a><span class="term">
- "<code class="mallctl">stats.huge.allocated</code>"
+ "<code class="mallctl">stats.mapped</code>"
(<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of bytes currently allocated by huge objects.
- </p></dd><dt><a name="stats.huge.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.huge.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of huge allocation requests.
- </p></dd><dt><a name="stats.huge.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.huge.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of huge deallocation requests.
- </p></dd><dt><a name="stats.arenas.i.dss"></a><span class="term">
+ </span></dt><dd><p>Total number of bytes in active chunks mapped by the
+ allocator. This is a multiple of the chunk size, and is larger than
+ <a class="link" href="#stats.active">
+ "<code class="mallctl">stats.active</code>"
+ </a>.
+ This does not include inactive chunks, even those that contain unused
+ dirty pages, which means that there is no strict ordering between this
+ and <a class="link" href="#stats.resident">
+ "<code class="mallctl">stats.resident</code>"
+ </a>.</p></dd><dt><a name="stats.arenas.i.dss"></a><span class="term">
"<code class="mallctl">stats.arenas.&lt;i&gt;.dss</code>"
@@ -1153,7 +1385,17 @@ malloc_conf = "xmalloc:true";</pre><p>
related to <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> allocation. See <a class="link" href="#opt.dss">
"<code class="mallctl">opt.dss</code>"
</a> for details.
- </p></dd><dt><a name="stats.arenas.i.nthreads"></a><span class="term">
+ </p></dd><dt><a name="stats.arenas.i.lg_dirty_mult"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.lg_dirty_mult</code>"
+
+ (<span class="type">ssize_t</span>)
+ <code class="literal">r-</code>
+ </span></dt><dd><p>Minimum ratio (log base 2) of active to dirty pages.
+ See <a class="link" href="#opt.lg_dirty_mult">
+ "<code class="mallctl">opt.lg_dirty_mult</code>"
+ </a>
+ for details.</p></dd><dt><a name="stats.arenas.i.nthreads"></a><span class="term">
"<code class="mallctl">stats.arenas.&lt;i&gt;.nthreads</code>"
@@ -1182,7 +1424,38 @@ malloc_conf = "xmalloc:true";</pre><p>
(<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of mapped bytes.</p></dd><dt><a name="stats.arenas.i.npurge"></a><span class="term">
+ </span></dt><dd><p>Number of mapped bytes.</p></dd><dt><a name="stats.arenas.i.metadata.mapped"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.metadata.mapped</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Number of mapped bytes in arena chunk headers, which
+ track the states of the non-metadata pages.</p></dd><dt><a name="stats.arenas.i.metadata.allocated"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.metadata.allocated</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Number of bytes dedicated to internal allocations.
+ Internal allocations differ from application-originated allocations in
+ that they are for internal use, and that they are omitted from heap
+ profiles. This statistic is reported separately from <a class="link" href="#stats.metadata">
+ "<code class="mallctl">stats.metadata</code>"
+ </a> and
+ <a class="link" href="#stats.arenas.i.metadata.mapped">
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.metadata.mapped</code>"
+ </a>
+ because it overlaps with e.g. the <a class="link" href="#stats.allocated">
+ "<code class="mallctl">stats.allocated</code>"
+ </a> and
+ <a class="link" href="#stats.active">
+ "<code class="mallctl">stats.active</code>"
+ </a>
+ statistics, whereas the other metadata statistics do
+ not.</p></dd><dt><a name="stats.arenas.i.npurge"></a><span class="term">
"<code class="mallctl">stats.arenas.&lt;i&gt;.npurge</code>"
@@ -1270,15 +1543,39 @@ malloc_conf = "xmalloc:true";</pre><p>
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
</span></dt><dd><p>Cumulative number of large allocation requests.
- </p></dd><dt><a name="stats.arenas.i.bins.j.allocated"></a><span class="term">
+ </p></dd><dt><a name="stats.arenas.i.huge.allocated"></a><span class="term">
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.allocated</code>"
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.huge.allocated</code>"
(<span class="type">size_t</span>)
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Current number of bytes allocated by
- bin.</p></dd><dt><a name="stats.arenas.i.bins.j.nmalloc"></a><span class="term">
+ </span></dt><dd><p>Number of bytes currently allocated by huge objects.
+ </p></dd><dt><a name="stats.arenas.i.huge.nmalloc"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.huge.nmalloc</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of huge allocation requests served
+ directly by the arena.</p></dd><dt><a name="stats.arenas.i.huge.ndalloc"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.huge.ndalloc</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of huge deallocation requests served
+ directly by the arena.</p></dd><dt><a name="stats.arenas.i.huge.nrequests"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.huge.nrequests</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of huge allocation requests.
+ </p></dd><dt><a name="stats.arenas.i.bins.j.nmalloc"></a><span class="term">
"<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</code>"
@@ -1302,7 +1599,15 @@ malloc_conf = "xmalloc:true";</pre><p>
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
</span></dt><dd><p>Cumulative number of allocation
- requests.</p></dd><dt><a name="stats.arenas.i.bins.j.nfills"></a><span class="term">
+ requests.</p></dd><dt><a name="stats.arenas.i.bins.j.curregs"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curregs</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Current number of regions for this size
+ class.</p></dd><dt><a name="stats.arenas.i.bins.j.nfills"></a><span class="term">
"<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</code>"
@@ -1370,6 +1675,38 @@ malloc_conf = "xmalloc:true";</pre><p>
<code class="literal">r-</code>
[<code class="option">--enable-stats</code>]
</span></dt><dd><p>Current number of runs for this size class.
+ </p></dd><dt><a name="stats.arenas.i.hchunks.j.nmalloc"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nmalloc</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of allocation requests for this size
+ class served directly by the arena.</p></dd><dt><a name="stats.arenas.i.hchunks.j.ndalloc"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.ndalloc</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of deallocation requests for this
+ size class served directly by the arena.</p></dd><dt><a name="stats.arenas.i.hchunks.j.nrequests"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nrequests</code>"
+
+ (<span class="type">uint64_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Cumulative number of allocation requests for this size
+ class.</p></dd><dt><a name="stats.arenas.i.hchunks.j.curhchunks"></a><span class="term">
+
+ "<code class="mallctl">stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.curhchunks</code>"
+
+ (<span class="type">size_t</span>)
+ <code class="literal">r-</code>
+ [<code class="option">--enable-stats</code>]
+ </span></dt><dd><p>Current number of huge allocations for this size class.
</p></dd></dl></div></div><div class="refsect1"><a name="debugging_malloc_problems"></a><h2>DEBUGGING MALLOC PROBLEMS</h2><p>When debugging, it is a good idea to configure/build jemalloc with
the <code class="option">--enable-debug</code> and <code class="option">--enable-fill</code>
options, and recompile the program with suitable options and symbols for
@@ -1406,7 +1743,7 @@ malloc_conf = "xmalloc:true";</pre><p>
<code class="function">malloc_stats_print</code>(<em class="parameter"><code></code></em>), followed by a string
pointer. Please note that doing anything which tries to allocate memory in
this function is likely to result in a crash or deadlock.</p><p>All messages are prefixed by
- &#8220;<code class="computeroutput">&lt;jemalloc&gt;: </code>&#8221;.</p></div><div class="refsect1"><a name="return_values"></a><h2>RETURN VALUES</h2><div class="refsect2"><a name="idm316388028784"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) and
+ &#8220;<code class="computeroutput">&lt;jemalloc&gt;: </code>&#8221;.</p></div><div class="refsect1"><a name="return_values"></a><h2>RETURN VALUES</h2><div class="refsect2"><a name="idp46949776"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) and
<code class="function">calloc</code>(<em class="parameter"><code></code></em>) functions return a pointer to the
allocated memory if successful; otherwise a <code class="constant">NULL</code>
pointer is returned and <code class="varname">errno</code> is set to
@@ -1434,7 +1771,7 @@ malloc_conf = "xmalloc:true";</pre><p>
allocation failure. The <code class="function">realloc</code>(<em class="parameter"><code></code></em>)
function always leaves the original buffer intact when an error occurs.
</p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function returns no
- value.</p></div><div class="refsect2"><a name="idm316388003104"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) and
+ value.</p></div><div class="refsect2"><a name="idp46974576"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) and
<code class="function">rallocx</code>(<em class="parameter"><code></code></em>) functions return a pointer to
the allocated memory if successful; otherwise a <code class="constant">NULL</code>
pointer is returned to indicate insufficient contiguous memory was
@@ -1465,27 +1802,7 @@ malloc_conf = "xmalloc:true";</pre><p>
read/write processing.</p></dd></dl></div><p>
</p><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
returns the usable size of the allocation pointed to by
- <em class="parameter"><code>ptr</code></em>. </p></div><div class="refsect2"><a name="idm316387973360"></a><h3>Experimental API</h3><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions return
- <code class="constant">ALLOCM_SUCCESS</code> on success; otherwise they return an
- error value. The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions will fail if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">ALLOCM_ERR_OOM</span></span></dt><dd><p>Out of memory. Insufficient contiguous memory was
- available to service the allocation request. The
- <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function additionally sets
- <em class="parameter"><code>*ptr</code></em> to <code class="constant">NULL</code>, whereas
- the <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function leaves
- <code class="constant">*ptr</code> unmodified.</p></dd></dl></div><p>
- The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function will also
- fail if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">ALLOCM_ERR_NOT_MOVED</span></span></dt><dd><p><code class="constant">ALLOCM_NO_MOVE</code> was specified,
- but the reallocation request could not be serviced without moving
- the object.</p></dd></dl></div><p>
- </p></div></div><div class="refsect1"><a name="environment"></a><h2>ENVIRONMENT</h2><p>The following environment variable affects the execution of the
+ <em class="parameter"><code>ptr</code></em>. </p></div></div><div class="refsect1"><a name="environment"></a><h2>ENVIRONMENT</h2><p>The following environment variable affects the execution of the
allocation functions:
</p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="envar">MALLOC_CONF</code></span></dt><dd><p>If the environment variable
<code class="envar">MALLOC_CONF</code> is set, the characters it contains
diff --git a/deps/jemalloc/doc/jemalloc.xml.in b/deps/jemalloc/doc/jemalloc.xml.in
index d8e2e711f..8fc774b18 100644
--- a/deps/jemalloc/doc/jemalloc.xml.in
+++ b/deps/jemalloc/doc/jemalloc.xml.in
@@ -38,17 +38,13 @@
<refname>xallocx</refname>
<refname>sallocx</refname>
<refname>dallocx</refname>
+ <refname>sdallocx</refname>
<refname>nallocx</refname>
<refname>mallctl</refname>
<refname>mallctlnametomib</refname>
<refname>mallctlbymib</refname>
<refname>malloc_stats_print</refname>
<refname>malloc_usable_size</refname>
- <refname>allocm</refname>
- <refname>rallocm</refname>
- <refname>sallocm</refname>
- <refname>dallocm</refname>
- <refname>nallocm</refname>
-->
<refpurpose>general purpose memory allocation functions</refpurpose>
</refnamediv>
@@ -61,8 +57,7 @@
<refsynopsisdiv>
<title>SYNOPSIS</title>
<funcsynopsis>
- <funcsynopsisinfo>#include &lt;<filename class="headerfile">stdlib.h</filename>&gt;
-#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
+ <funcsynopsisinfo>#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
<refsect2>
<title>Standard API</title>
<funcprototype>
@@ -126,6 +121,12 @@
<paramdef>int <parameter>flags</parameter></paramdef>
</funcprototype>
<funcprototype>
+ <funcdef>void <function>sdallocx</function></funcdef>
+ <paramdef>void *<parameter>ptr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ </funcprototype>
+ <funcprototype>
<funcdef>size_t <function>nallocx</function></funcdef>
<paramdef>size_t <parameter>size</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
@@ -172,41 +173,6 @@
</funcprototype>
<para><type>const char *</type><varname>malloc_conf</varname>;</para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <funcprototype>
- <funcdef>int <function>allocm</function></funcdef>
- <paramdef>void **<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>rallocm</function></funcdef>
- <paramdef>void **<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>size_t <parameter>extra</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>sallocm</function></funcdef>
- <paramdef>const void *<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>dallocm</function></funcdef>
- <paramdef>void *<parameter>ptr</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>nallocm</function></funcdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- </refsect2>
</funcsynopsis>
</refsynopsisdiv>
<refsect1 id="description">
@@ -229,15 +195,15 @@
<para>The <function>posix_memalign<parameter/></function> function
allocates <parameter>size</parameter> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<parameter>alignment</parameter>, and returns the allocation in the value
pointed to by <parameter>ptr</parameter>. The requested
- <parameter>alignment</parameter> must be a power of 2 at least as large
- as <code language="C">sizeof(<type>void *</type>)</code>.</para>
+ <parameter>alignment</parameter> must be a power of 2 at least as large as
+ <code language="C">sizeof(<type>void *</type>)</code>.</para>
<para>The <function>aligned_alloc<parameter/></function> function
allocates <parameter>size</parameter> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<parameter>alignment</parameter>. The requested
<parameter>alignment</parameter> must be a power of 2. Behavior is
undefined if <parameter>size</parameter> is not an integral multiple of
@@ -268,14 +234,15 @@
<function>rallocx<parameter/></function>,
<function>xallocx<parameter/></function>,
<function>sallocx<parameter/></function>,
- <function>dallocx<parameter/></function>, and
+ <function>dallocx<parameter/></function>,
+ <function>sdallocx<parameter/></function>, and
<function>nallocx<parameter/></function> functions all have a
<parameter>flags</parameter> argument that can be used to specify
options. The functions only check the options that are contextually
relevant. Use bitwise or (<code language="C">|</code>) operations to
specify one or more of the following:
<variablelist>
- <varlistentry>
+ <varlistentry id="MALLOCX_LG_ALIGN">
<term><constant>MALLOCX_LG_ALIGN(<parameter>la</parameter>)
</constant></term>
@@ -285,7 +252,7 @@
that <parameter>la</parameter> is within the valid
range.</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_ALIGN">
<term><constant>MALLOCX_ALIGN(<parameter>a</parameter>)
</constant></term>
@@ -295,7 +262,7 @@
validate that <parameter>a</parameter> is a power of 2.
</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_ZERO">
<term><constant>MALLOCX_ZERO</constant></term>
<listitem><para>Initialize newly allocated memory to contain zero
@@ -304,16 +271,38 @@
that are initialized to contain zero bytes. If this macro is
absent, newly allocated memory is uninitialized.</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_TCACHE">
+ <term><constant>MALLOCX_TCACHE(<parameter>tc</parameter>)
+ </constant></term>
+
+ <listitem><para>Use the thread-specific cache (tcache) specified by
+ the identifier <parameter>tc</parameter>, which must have been
+ acquired via the <link
+ linkend="tcache.create"><mallctl>tcache.create</mallctl></link>
+ mallctl. This macro does not validate that
+ <parameter>tc</parameter> specifies a valid
+ identifier.</para></listitem>
+ </varlistentry>
+ <varlistentry id="MALLOC_TCACHE_NONE">
+ <term><constant>MALLOCX_TCACHE_NONE</constant></term>
+
+ <listitem><para>Do not use a thread-specific cache (tcache). Unless
+ <constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant> or
+ <constant>MALLOCX_TCACHE_NONE</constant> is specified, an
+ automatically managed tcache will be used under many circumstances.
+ This macro cannot be used in the same <parameter>flags</parameter>
+ argument as
+ <constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant>.</para></listitem>
+ </varlistentry>
+ <varlistentry id="MALLOCX_ARENA">
<term><constant>MALLOCX_ARENA(<parameter>a</parameter>)
</constant></term>
<listitem><para>Use the arena specified by the index
- <parameter>a</parameter> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <parameter>a</parameter>
- specifies an arena index in the valid range.</para></listitem>
+ <parameter>a</parameter>. This macro has no effect for regions that
+ were allocated via an arena other than the one specified. This
+ macro does not validate that <parameter>a</parameter> specifies an
+ arena index in the valid range.</para></listitem>
</varlistentry>
</variablelist>
</para>
@@ -352,6 +341,15 @@
memory referenced by <parameter>ptr</parameter> to be made available for
future allocations.</para>
+ <para>The <function>sdallocx<parameter/></function> function is an
+ extension of <function>dallocx<parameter/></function> with a
+ <parameter>size</parameter> parameter to allow the caller to pass in the
+ allocation size as an optimization. The minimum valid input size is the
+ original requested size of the allocation, and the maximum valid input
+ size is the corresponding value returned by
+ <function>nallocx<parameter/></function> or
+ <function>sallocx<parameter/></function>.</para>
+
<para>The <function>nallocx<parameter/></function> function allocates no
memory, but it performs the same size computation as the
<function>mallocx<parameter/></function> function, and returns the real
@@ -430,11 +428,12 @@ for (i = 0; i < nbins; i++) {
functions simultaneously. If <option>--enable-stats</option> is
specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
be specified to omit merged arena and per arena statistics, respectively;
- &ldquo;b&rdquo; and &ldquo;l&rdquo; can be specified to omit per size
- class statistics for bins and large objects, respectively. Unrecognized
- characters are silently ignored. Note that thread caching may prevent
- some statistics from being completely up to date, since extra locking
- would be required to merge counters that track thread cache operations.
+ &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
+ omit per size class statistics for bins, large objects, and huge objects,
+ respectively. Unrecognized characters are silently ignored. Note that
+ thread caching may prevent some statistics from being completely up to
+ date, since extra locking would be required to merge counters that track
+ thread cache operations.
</para>
<para>The <function>malloc_usable_size<parameter/></function> function
@@ -449,116 +448,6 @@ for (i = 0; i < nbins; i++) {
depended on, since such behavior is entirely implementation-dependent.
</para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <para>The experimental API is subject to change or removal without regard
- for backward compatibility. If <option>--disable-experimental</option>
- is specified during configuration, the experimental API is
- omitted.</para>
-
- <para>The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>,
- <function>sallocm<parameter/></function>,
- <function>dallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions all have a
- <parameter>flags</parameter> argument that can be used to specify
- options. The functions only check the options that are contextually
- relevant. Use bitwise or (<code language="C">|</code>) operations to
- specify one or more of the following:
- <variablelist>
- <varlistentry>
- <term><constant>ALLOCM_LG_ALIGN(<parameter>la</parameter>)
- </constant></term>
-
- <listitem><para>Align the memory allocation to start at an address
- that is a multiple of <code language="C">(1 &lt;&lt;
- <parameter>la</parameter>)</code>. This macro does not validate
- that <parameter>la</parameter> is within the valid
- range.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ALIGN(<parameter>a</parameter>)
- </constant></term>
-
- <listitem><para>Align the memory allocation to start at an address
- that is a multiple of <parameter>a</parameter>, where
- <parameter>a</parameter> is a power of two. This macro does not
- validate that <parameter>a</parameter> is a power of 2.
- </para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ZERO</constant></term>
-
- <listitem><para>Initialize newly allocated memory to contain zero
- bytes. In the growing reallocation case, the real size prior to
- reallocation defines the boundary between untouched bytes and those
- that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_NO_MOVE</constant></term>
-
- <listitem><para>For reallocation, fail rather than moving the
- object. This constraint can apply to both growth and
- shrinkage.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ARENA(<parameter>a</parameter>)
- </constant></term>
-
- <listitem><para>Use the arena specified by the index
- <parameter>a</parameter> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <parameter>a</parameter>
- specifies an arena index in the valid range.</para></listitem>
- </varlistentry>
- </variablelist>
- </para>
-
- <para>The <function>allocm<parameter/></function> function allocates at
- least <parameter>size</parameter> bytes of memory, sets
- <parameter>*ptr</parameter> to the base address of the allocation, and
- sets <parameter>*rsize</parameter> to the real size of the allocation if
- <parameter>rsize</parameter> is not <constant>NULL</constant>. Behavior
- is undefined if <parameter>size</parameter> is <constant>0</constant>, or
- if request size overflows due to size class and/or alignment
- constraints.</para>
-
- <para>The <function>rallocm<parameter/></function> function resizes the
- allocation at <parameter>*ptr</parameter> to be at least
- <parameter>size</parameter> bytes, sets <parameter>*ptr</parameter> to
- the base address of the allocation if it moved, and sets
- <parameter>*rsize</parameter> to the real size of the allocation if
- <parameter>rsize</parameter> is not <constant>NULL</constant>. If
- <parameter>extra</parameter> is non-zero, an attempt is made to resize
- the allocation to be at least <code
- language="C">(<parameter>size</parameter> +
- <parameter>extra</parameter>)</code> bytes, though inability to allocate
- the extra byte(s) will not by itself result in failure. Behavior is
- undefined if <parameter>size</parameter> is <constant>0</constant>, if
- request size overflows due to size class and/or alignment constraints, or
- if <code language="C">(<parameter>size</parameter> +
- <parameter>extra</parameter> &gt;
- <constant>SIZE_T_MAX</constant>)</code>.</para>
-
- <para>The <function>sallocm<parameter/></function> function sets
- <parameter>*rsize</parameter> to the real size of the allocation.</para>
-
- <para>The <function>dallocm<parameter/></function> function causes the
- memory referenced by <parameter>ptr</parameter> to be made available for
- future allocations.</para>
-
- <para>The <function>nallocm<parameter/></function> function allocates no
- memory, but it performs the same size computation as the
- <function>allocm<parameter/></function> function, and if
- <parameter>rsize</parameter> is not <constant>NULL</constant> it sets
- <parameter>*rsize</parameter> to the real size of the allocation that
- would result from the equivalent <function>allocm<parameter/></function>
- function call. Behavior is undefined if <parameter>size</parameter> is
- <constant>0</constant>, or if request size overflows due to size class
- and/or alignment constraints.</para>
- </refsect2>
</refsect1>
<refsect1 id="tuning">
<title>TUNING</title>
@@ -598,8 +487,10 @@ for (i = 0; i < nbins; i++) {
<manvolnum>2</manvolnum></citerefentry> to obtain memory, which is
suboptimal for several reasons, including race conditions, increased
fragmentation, and artificial limitations on maximum usable memory. If
- <option>--enable-dss</option> is specified during configuration, this
- allocator uses both <citerefentry><refentrytitle>mmap</refentrytitle>
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system, this allocator uses both
+ <citerefentry><refentrytitle>mmap</refentrytitle>
<manvolnum>2</manvolnum></citerefentry> and
<citerefentry><refentrytitle>sbrk</refentrytitle>
<manvolnum>2</manvolnum></citerefentry>, in that order of preference;
@@ -632,12 +523,11 @@ for (i = 0; i < nbins; i++) {
possible to find metadata for user objects very quickly.</para>
<para>User objects are broken into three categories according to size:
- small, large, and huge. Small objects are smaller than one page. Large
- objects are smaller than the chunk size. Huge objects are a multiple of
- the chunk size. Small and large objects are managed by arenas; huge
- objects are managed separately in a single data structure that is shared by
- all threads. Huge objects are used by applications infrequently enough
- that this single data structure is not a scalability issue.</para>
+ small, large, and huge. Small and large objects are managed entirely by
+ arenas; huge objects are additionally aggregated in a single data structure
+ that is shared by all threads. Huge objects are typically used by
+ applications infrequently enough that this single data structure is not a
+ scalability issue.</para>
<para>Each chunk that is managed by an arena tracks its contents as runs of
contiguous pages (unused, backing a set of small objects, or backing one
@@ -646,18 +536,18 @@ for (i = 0; i < nbins; i++) {
allocations in constant time.</para>
<para>Small objects are managed in groups by page runs. Each run maintains
- a frontier and free list to track which regions are in use. Allocation
- requests that are no more than half the quantum (8 or 16, depending on
- architecture) are rounded up to the nearest power of two that is at least
- <code language="C">sizeof(<type>double</type>)</code>. All other small
- object size classes are multiples of the quantum, spaced such that internal
- fragmentation is limited to approximately 25% for all but the smallest size
- classes. Allocation requests that are larger than the maximum small size
- class, but small enough to fit in an arena-managed chunk (see the <link
- linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), are
- rounded up to the nearest run size. Allocation requests that are too large
- to fit in an arena-managed chunk are rounded up to the nearest multiple of
- the chunk size.</para>
+ a bitmap to track which regions are in use. Allocation requests that are no
+ more than half the quantum (8 or 16, depending on architecture) are rounded
+ up to the nearest power of two that is at least <code
+ language="C">sizeof(<type>double</type>)</code>. All other object size
+ classes are multiples of the quantum, spaced such that there are four size
+ classes for each doubling in size, which limits internal fragmentation to
+ approximately 20% for all but the smallest size classes. Small size classes
+ are smaller than four times the page size, large size classes are smaller
+ than the chunk size (see the <link
+ linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
+ huge size classes extend from the chunk size up to one size class less than
+ the full address space size.</para>
<para>Allocations are packed tightly together, which can be an issue for
multi-threaded applications. If you need to assure that allocations do not
@@ -665,8 +555,29 @@ for (i = 0; i < nbins; i++) {
nearest multiple of the cacheline size, or specify cacheline alignment when
allocating.</para>
- <para>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
- system, the size classes in each category are as shown in <xref
+ <para>The <function>realloc<parameter/></function>,
+ <function>rallocx<parameter/></function>, and
+ <function>xallocx<parameter/></function> functions may resize allocations
+ without moving them under limited circumstances. Unlike the
+ <function>*allocx<parameter/></function> API, the standard API does not
+ officially round up the usable size of an allocation to the nearest size
+ class, so technically it is necessary to call
+ <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+ 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage
+ trivially succeeds in place as long as the pre-size and post-size both round
+ up to the same size class. No other API guarantees are made regarding
+ in-place resizing, but the current implementation also tries to resize large
+ and huge allocations in place, as long as the pre-size and post-size are
+ both large or both huge. In such cases shrinkage always succeeds for large
+ size classes, but for huge size classes the chunk allocator must support
+ splitting (see <link
+ linkend="arena.i.chunk_hooks"><mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl></link>).
+ Growth only succeeds if the trailing memory is currently available, and
+ additionally for huge size classes the chunk allocator must support
+ merging.</para>
+
+ <para>Assuming 2 MiB chunks, 4 KiB pages, and a 16-byte quantum on a
+ 64-bit system, the size classes in each category are as shown in <xref
linkend="size_classes" xrefstyle="template:Table %n"/>.</para>
<table xml:id="size_classes" frame="all">
@@ -684,13 +595,13 @@ for (i = 0; i < nbins; i++) {
</thead>
<tbody>
<row>
- <entry morerows="6">Small</entry>
+ <entry morerows="8">Small</entry>
<entry>lg</entry>
<entry>[8]</entry>
</row>
<row>
<entry>16</entry>
- <entry>[16, 32, 48, ..., 128]</entry>
+ <entry>[16, 32, 48, 64, 80, 96, 112, 128]</entry>
</row>
<row>
<entry>32</entry>
@@ -710,17 +621,77 @@ for (i = 0; i < nbins; i++) {
</row>
<row>
<entry>512</entry>
- <entry>[2560, 3072, 3584]</entry>
+ <entry>[2560, 3072, 3584, 4096]</entry>
+ </row>
+ <row>
+ <entry>1 KiB</entry>
+ <entry>[5 KiB, 6 KiB, 7 KiB, 8 KiB]</entry>
+ </row>
+ <row>
+ <entry>2 KiB</entry>
+ <entry>[10 KiB, 12 KiB, 14 KiB]</entry>
+ </row>
+ <row>
+ <entry morerows="7">Large</entry>
+ <entry>2 KiB</entry>
+ <entry>[16 KiB]</entry>
</row>
<row>
- <entry>Large</entry>
<entry>4 KiB</entry>
- <entry>[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</entry>
+ <entry>[20 KiB, 24 KiB, 28 KiB, 32 KiB]</entry>
+ </row>
+ <row>
+ <entry>8 KiB</entry>
+ <entry>[40 KiB, 48 KiB, 54 KiB, 64 KiB]</entry>
+ </row>
+ <row>
+ <entry>16 KiB</entry>
+ <entry>[80 KiB, 96 KiB, 112 KiB, 128 KiB]</entry>
+ </row>
+ <row>
+ <entry>32 KiB</entry>
+ <entry>[160 KiB, 192 KiB, 224 KiB, 256 KiB]</entry>
+ </row>
+ <row>
+ <entry>64 KiB</entry>
+ <entry>[320 KiB, 384 KiB, 448 KiB, 512 KiB]</entry>
+ </row>
+ <row>
+ <entry>128 KiB</entry>
+ <entry>[640 KiB, 768 KiB, 896 KiB, 1 MiB]</entry>
+ </row>
+ <row>
+ <entry>256 KiB</entry>
+ <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
+ </row>
+ <row>
+ <entry morerows="6">Huge</entry>
+ <entry>256 KiB</entry>
+ <entry>[2 MiB]</entry>
+ </row>
+ <row>
+ <entry>512 KiB</entry>
+ <entry>[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]</entry>
+ </row>
+ <row>
+ <entry>1 MiB</entry>
+ <entry>[5 MiB, 6 MiB, 7 MiB, 8 MiB]</entry>
+ </row>
+ <row>
+ <entry>2 MiB</entry>
+ <entry>[10 MiB, 12 MiB, 14 MiB, 16 MiB]</entry>
</row>
<row>
- <entry>Huge</entry>
<entry>4 MiB</entry>
- <entry>[4 MiB, 8 MiB, 12 MiB, ...]</entry>
+ <entry>[20 MiB, 24 MiB, 28 MiB, 32 MiB]</entry>
+ </row>
+ <row>
+ <entry>8 MiB</entry>
+ <entry>[40 MiB, 48 MiB, 56 MiB, 64 MiB]</entry>
+ </row>
+ <row>
+ <entry>...</entry>
+ <entry>...</entry>
</row>
</tbody>
</tgroup>
@@ -765,23 +736,23 @@ for (i = 0; i < nbins; i++) {
detecting whether another thread caused a refresh.</para></listitem>
</varlistentry>
- <varlistentry id="config.debug">
+ <varlistentry id="config.cache_oblivious">
<term>
- <mallctl>config.debug</mallctl>
+ <mallctl>config.cache_oblivious</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-debug</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para><option>--enable-cache-oblivious</option> was specified
+ during build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.dss">
+ <varlistentry id="config.debug">
<term>
- <mallctl>config.dss</mallctl>
+ <mallctl>config.debug</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-dss</option> was specified during
+ <listitem><para><option>--enable-debug</option> was specified during
build configuration.</para></listitem>
</varlistentry>
@@ -805,16 +776,6 @@ for (i = 0; i < nbins; i++) {
during build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.mremap">
- <term>
- <mallctl>config.mremap</mallctl>
- (<type>bool</type>)
- <literal>r-</literal>
- </term>
- <listitem><para><option>--enable-mremap</option> was specified during
- build configuration.</para></listitem>
- </varlistentry>
-
<varlistentry id="config.munmap">
<term>
<mallctl>config.munmap</mallctl>
@@ -940,10 +901,15 @@ for (i = 0; i < nbins; i++) {
<manvolnum>2</manvolnum></citerefentry>) allocation precedence as
related to <citerefentry><refentrytitle>mmap</refentrytitle>
<manvolnum>2</manvolnum></citerefentry> allocation. The following
- settings are supported: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;,
- and &ldquo;secondary&rdquo;. The default is &ldquo;secondary&rdquo; if
- <link linkend="config.dss"><mallctl>config.dss</mallctl></link> is
- true, &ldquo;disabled&rdquo; otherwise.
+ settings are supported if
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
+ &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
+ supported. The default is &ldquo;secondary&rdquo; if
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system; &ldquo;disabled&rdquo; otherwise.
</para></listitem>
</varlistentry>
@@ -956,7 +922,7 @@ for (i = 0; i < nbins; i++) {
<listitem><para>Virtual memory chunk size (log base 2). If a chunk
size outside the supported size range is specified, the size is
silently clipped to the minimum/maximum supported size. The default
- chunk size is 4 MiB (2^22).
+ chunk size is 2 MiB (2^21).
</para></listitem>
</varlistentry>
@@ -986,7 +952,11 @@ for (i = 0; i < nbins; i++) {
provides the kernel with sufficient information to recycle dirty pages
if physical memory becomes scarce and the pages remain unused. The
default minimum ratio is 8:1 (2^3:1); an option value of -1 will
- disable dirty page purging.</para></listitem>
+ disable dirty page purging. See <link
+ linkend="arenas.lg_dirty_mult"><mallctl>arenas.lg_dirty_mult</mallctl></link>
+ and <link
+ linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
+ for related dynamic control options.</para></listitem>
</varlistentry>
<varlistentry id="opt.stats_print">
@@ -1003,26 +973,34 @@ for (i = 0; i < nbins; i++) {
<option>--enable-stats</option> is specified during configuration, this
has the potential to cause deadlock for a multi-threaded process that
exits while one or more threads are executing in the memory allocation
- functions. Therefore, this option should only be used with care; it is
- primarily intended as a performance tuning aid during application
+ functions. Furthermore, <function>atexit<parameter/></function> may
+ allocate memory during application initialization and then deadlock
+ internally when jemalloc in turn calls
+ <function>atexit<parameter/></function>, so this option is not
+ univerally usable (though the application can register its own
+ <function>atexit<parameter/></function> function with equivalent
+ functionality). Therefore, this option should only be used with care;
+ it is primarily intended as a performance tuning aid during application
development. This option is disabled by default.</para></listitem>
</varlistentry>
<varlistentry id="opt.junk">
<term>
<mallctl>opt.junk</mallctl>
- (<type>bool</type>)
+ (<type>const char *</type>)
<literal>r-</literal>
[<option>--enable-fill</option>]
</term>
- <listitem><para>Junk filling enabled/disabled. If enabled, each byte
- of uninitialized allocated memory will be initialized to
- <literal>0xa5</literal>. All deallocated memory will be initialized to
- <literal>0x5a</literal>. This is intended for debugging and will
- impact performance negatively. This option is disabled by default
- unless <option>--enable-debug</option> is specified during
- configuration, in which case it is enabled by default unless running
- inside <ulink
+ <listitem><para>Junk filling. If set to "alloc", each byte of
+ uninitialized allocated memory will be initialized to
+ <literal>0xa5</literal>. If set to "free", all deallocated memory will
+ be initialized to <literal>0x5a</literal>. If set to "true", both
+ allocated and deallocated memory will be initialized, and if set to
+ "false", junk filling be disabled entirely. This is intended for
+ debugging and will impact performance negatively. This option is
+ "false" by default unless <option>--enable-debug</option> is specified
+ during configuration, in which case it is "true" by default unless
+ running inside <ulink
url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
</varlistentry>
@@ -1076,9 +1054,8 @@ for (i = 0; i < nbins; i++) {
<listitem><para>Zero filling enabled/disabled. If enabled, each byte
of uninitialized allocated memory will be initialized to 0. Note that
this initialization only happens once for each byte, so
- <function>realloc<parameter/></function>,
- <function>rallocx<parameter/></function> and
- <function>rallocm<parameter/></function> calls do not zero memory that
+ <function>realloc<parameter/></function> and
+ <function>rallocx<parameter/></function> calls do not zero memory that
was previously allocated. This is intended for debugging and will
impact performance negatively. This option is disabled by default.
</para></listitem>
@@ -1097,19 +1074,6 @@ for (i = 0; i < nbins; i++) {
is disabled by default.</para></listitem>
</varlistentry>
- <varlistentry id="opt.valgrind">
- <term>
- <mallctl>opt.valgrind</mallctl>
- (<type>bool</type>)
- <literal>r-</literal>
- [<option>--enable-valgrind</option>]
- </term>
- <listitem><para><ulink url="http://valgrind.org/">Valgrind</ulink>
- support enabled/disabled. This option is vestigal because jemalloc
- auto-detects whether it is running inside Valgrind. This option is
- disabled by default, unless running inside Valgrind.</para></listitem>
- </varlistentry>
-
<varlistentry id="opt.xmalloc">
<term>
<mallctl>opt.xmalloc</mallctl>
@@ -1137,16 +1101,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-tcache</option>]
</term>
- <listitem><para>Thread-specific caching enabled/disabled. When there
- are multiple threads, each thread uses a thread-specific cache for
- objects up to a certain size. Thread-specific caching allows many
- allocations to be satisfied without performing any thread
- synchronization, at the cost of increased memory use. See the
- <link
+ <listitem><para>Thread-specific caching (tcache) enabled/disabled. When
+ there are multiple threads, each thread uses a tcache for objects up to
+ a certain size. Thread-specific caching allows many allocations to be
+ satisfied without performing any thread synchronization, at the cost of
+ increased memory use. See the <link
linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
option for related tuning information. This option is enabled by
default unless running inside <ulink
- url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
+ url="http://valgrind.org/">Valgrind</ulink>, in which case it is
+ forcefully disabled.</para></listitem>
</varlistentry>
<varlistentry id="opt.lg_tcache_max">
@@ -1157,8 +1121,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
[<option>--enable-tcache</option>]
</term>
<listitem><para>Maximum size class (log base 2) to cache in the
- thread-specific cache. At a minimum, all small size classes are
- cached, and at a maximum all large size classes are cached. The
+ thread-specific cache (tcache). At a minimum, all small size classes
+ are cached, and at a maximum all large size classes are cached. The
default maximum is 32 KiB (2^15).</para></listitem>
</varlistentry>
@@ -1183,8 +1147,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option for information on high-water-triggered profile dumping, and the
<link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>
option for final profile dumping. Profile output is compatible with
- the included <command>pprof</command> Perl script, which originates
- from the <ulink url="http://code.google.com/p/gperftools/">gperftools
+ the <command>jeprof</command> command, which is based on the
+ <command>pprof</command> that is developed as part of the <ulink
+ url="http://code.google.com/p/gperftools/">gperftools
package</ulink>.</para></listitem>
</varlistentry>
@@ -1206,7 +1171,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<term>
<mallctl>opt.prof_active</mallctl>
(<type>bool</type>)
- <literal>rw</literal>
+ <literal>r-</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Profiling activated/deactivated. This is a secondary
@@ -1219,10 +1184,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
This option is enabled by default.</para></listitem>
</varlistentry>
+ <varlistentry id="opt.prof_thread_active_init">
+ <term>
+ <mallctl>opt.prof_thread_active_init</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Initial setting for <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ in newly created threads. The initial setting for newly created threads
+ can also be changed during execution via the <link
+ linkend="prof.thread_active_init"><mallctl>prof.thread_active_init</mallctl></link>
+ mallctl. This option is enabled by default.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.lg_prof_sample">
<term>
<mallctl>opt.lg_prof_sample</mallctl>
- (<type>ssize_t</type>)
+ (<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-prof</option>]
</term>
@@ -1276,13 +1256,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-prof</option>]
</term>
- <listitem><para>Trigger a memory profile dump every time the total
- virtual memory exceeds the previous maximum. Profiles are dumped to
- files named according to the pattern
- <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
- where <literal>&lt;prefix&gt;</literal> is controlled by the <link
- linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. This option is disabled by default.</para></listitem>
+ <listitem><para>Set the initial state of <link
+ linkend="prof.gdump"><mallctl>prof.gdump</mallctl></link>, which when
+ enabled triggers a memory profile dump every time the total virtual
+ memory exceeds the previous maximum. This option is disabled by
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.prof_final">
@@ -1299,7 +1277,13 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the <link
linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. This option is enabled by default.</para></listitem>
+ option. Note that <function>atexit<parameter/></function> may allocate
+ memory during application initialization and then deadlock internally
+ when jemalloc in turn calls <function>atexit<parameter/></function>, so
+ this option is not univerally usable (though the application can
+ register its own <function>atexit<parameter/></function> function with
+ equivalent functionality). This option is disabled by
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.prof_leak">
@@ -1396,7 +1380,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Enable/disable calling thread's tcache. The tcache is
implicitly flushed as a side effect of becoming
disabled (see <link
- lenkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>).
+ linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>).
</para></listitem>
</varlistentry>
@@ -1407,9 +1391,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>--</literal>
[<option>--enable-tcache</option>]
</term>
- <listitem><para>Flush calling thread's tcache. This interface releases
- all cached objects and internal data structures associated with the
- calling thread's thread-specific cache. Ordinarily, this interface
+ <listitem><para>Flush calling thread's thread-specific cache (tcache).
+ This interface releases all cached objects and internal data structures
+ associated with the calling thread's tcache. Ordinarily, this interface
need not be called, since automatic periodic incremental garbage
collection occurs, and the thread cache is automatically discarded when
a thread exits. However, garbage collection is triggered by allocation
@@ -1418,10 +1402,91 @@ malloc_conf = "xmalloc:true";]]></programlisting>
the developer may find manual flushing useful.</para></listitem>
</varlistentry>
+ <varlistentry id="thread.prof.name">
+ <term>
+ <mallctl>thread.prof.name</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal> or
+ <literal>-w</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Get/set the descriptive name associated with the calling
+ thread in memory profile dumps. An internal copy of the name string is
+ created, so the input string need not be maintained after this interface
+ completes execution. The output string of this interface should be
+ copied for non-ephemeral uses, because multiple implementation details
+ can cause asynchronous string deallocation. Furthermore, each
+ invocation of this interface can only read or write; simultaneous
+ read/write is not supported due to string lifetime limitations. The
+ name string must nil-terminated and comprised only of characters in the
+ sets recognized
+ by <citerefentry><refentrytitle>isgraph</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry> and
+ <citerefentry><refentrytitle>isblank</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="thread.prof.active">
+ <term>
+ <mallctl>thread.prof.active</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Control whether sampling is currently active for the
+ calling thread. This is an activation mechanism in addition to <link
+ linkend="prof.active"><mallctl>prof.active</mallctl></link>; both must
+ be active for the calling thread to sample. This flag is enabled by
+ default.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.create">
+ <term>
+ <mallctl>tcache.create</mallctl>
+ (<type>unsigned</type>)
+ <literal>r-</literal>
+ [<option>--enable-tcache</option>]
+ </term>
+ <listitem><para>Create an explicit thread-specific cache (tcache) and
+ return an identifier that can be passed to the <link
+ linkend="MALLOCX_TCACHE"><constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant></link>
+ macro to explicitly use the specified cache rather than the
+ automatically managed one that is used by default. Each explicit cache
+ can be used by only one thread at a time; the application must assure
+ that this constraint holds.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.flush">
+ <term>
+ <mallctl>tcache.flush</mallctl>
+ (<type>unsigned</type>)
+ <literal>-w</literal>
+ [<option>--enable-tcache</option>]
+ </term>
+ <listitem><para>Flush the specified thread-specific cache (tcache). The
+ same considerations apply to this interface as to <link
+ linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>,
+ except that the tcache will never be automatically be discarded.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.destroy">
+ <term>
+ <mallctl>tcache.destroy</mallctl>
+ (<type>unsigned</type>)
+ <literal>-w</literal>
+ [<option>--enable-tcache</option>]
+ </term>
+ <listitem><para>Flush the specified thread-specific cache (tcache) and
+ make the identifier available for use during a future tcache creation.
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id="arena.i.purge">
<term>
<mallctl>arena.&lt;i&gt;.purge</mallctl>
- (<type>unsigned</type>)
+ (<type>void</type>)
<literal>--</literal>
</term>
<listitem><para>Purge unused dirty pages for arena &lt;i&gt;, or for
@@ -1439,14 +1504,222 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Set the precedence of dss allocation as related to mmap
allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
<link
- linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>. Note
- that even during huge allocation this setting is read from the arena
- that would be chosen for small or large allocation so that applications
- can depend on consistent dss versus mmap allocation regardless of
- allocation size. See <link
- linkend="opt.dss"><mallctl>opt.dss</mallctl></link> for supported
- settings.
- </para></listitem>
+ linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>. See
+ <link linkend="opt.dss"><mallctl>opt.dss</mallctl></link> for supported
+ settings.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.lg_dirty_mult">
+ <term>
+ <mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Current per-arena minimum ratio (log base 2) of active
+ to dirty pages for arena &lt;i&gt;. Each time this interface is set and
+ the ratio is increased, pages are synchronously purged as necessary to
+ impose the new ratio. See <link
+ linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
+ for additional information.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.chunk_hooks">
+ <term>
+ <mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl>
+ (<type>chunk_hooks_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Get or set the chunk management hook functions for arena
+ &lt;i&gt;. The functions must be capable of operating on all extant
+ chunks associated with arena &lt;i&gt;, usually by passing unknown
+ chunks to the replaced functions. In practice, it is feasible to
+ control allocation for arenas created via <link
+ linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link> such
+ that all chunks originate from an application-supplied chunk allocator
+ (by setting custom chunk hook functions just after arena creation), but
+ the automatically created arenas may have already created chunks prior
+ to the application having an opportunity to take over chunk
+ allocation.</para>
+
+ <programlisting language="C"><![CDATA[
+typedef struct {
+ chunk_alloc_t *alloc;
+ chunk_dalloc_t *dalloc;
+ chunk_commit_t *commit;
+ chunk_decommit_t *decommit;
+ chunk_purge_t *purge;
+ chunk_split_t *split;
+ chunk_merge_t *merge;
+} chunk_hooks_t;]]></programlisting>
+ <para>The <type>chunk_hooks_t</type> structure comprises function
+ pointers which are described individually below. jemalloc uses these
+ functions to manage chunk lifetime, which starts off with allocation of
+ mapped committed memory, in the simplest case followed by deallocation.
+ However, there are performance and platform reasons to retain chunks for
+ later reuse. Cleanup attempts cascade from deallocation to decommit to
+ purging, which gives the chunk management functions opportunities to
+ reject the most permanent cleanup operations in favor of less permanent
+ (and often less costly) operations. The chunk splitting and merging
+ operations can also be opted out of, but this is mainly intended to
+ support platforms on which virtual memory mappings provided by the
+ operating system kernel do not automatically coalesce and split, e.g.
+ Windows.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef void *<function>(chunk_alloc_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>alignment</parameter></paramdef>
+ <paramdef>bool *<parameter>zero</parameter></paramdef>
+ <paramdef>bool *<parameter>commit</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk allocation function conforms to the
+ <type>chunk_alloc_t</type> type and upon success returns a pointer to
+ <parameter>size</parameter> bytes of mapped memory on behalf of arena
+ <parameter>arena_ind</parameter> such that the chunk's base address is a
+ multiple of <parameter>alignment</parameter>, as well as setting
+ <parameter>*zero</parameter> to indicate whether the chunk is zeroed and
+ <parameter>*commit</parameter> to indicate whether the chunk is
+ committed. Upon error the function returns <constant>NULL</constant>
+ and leaves <parameter>*zero</parameter> and
+ <parameter>*commit</parameter> unmodified. The
+ <parameter>size</parameter> parameter is always a multiple of the chunk
+ size. The <parameter>alignment</parameter> parameter is always a power
+ of two at least as large as the chunk size. Zeroing is mandatory if
+ <parameter>*zero</parameter> is true upon function entry. Committing is
+ mandatory if <parameter>*commit</parameter> is true upon function entry.
+ If <parameter>chunk</parameter> is not <constant>NULL</constant>, the
+ returned pointer must be <parameter>chunk</parameter> on success or
+ <constant>NULL</constant> on error. Committed memory may be committed
+ in absolute terms as on a system that does not overcommit, or in
+ implicit terms as on a system that overcommits and satisfies physical
+ memory needs on demand via soft page faults. Note that replacing the
+ default chunk allocation function makes the arena's <link
+ linkend="arena.i.dss"><mallctl>arena.&lt;i&gt;.dss</mallctl></link>
+ setting irrelevant.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_dalloc_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>
+ A chunk deallocation function conforms to the
+ <type>chunk_dalloc_t</type> type and deallocates a
+ <parameter>chunk</parameter> of given <parameter>size</parameter> with
+ <parameter>committed</parameter>/decommited memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates opt-out from
+ deallocation; the virtual memory mapping associated with the chunk
+ remains mapped, in the same commit state, and available for future use,
+ in which case it will be automatically retained for later reuse.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_commit_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk commit function conforms to the
+ <type>chunk_commit_t</type> type and commits zeroed physical memory to
+ back pages within a <parameter>chunk</parameter> of given
+ <parameter>size</parameter> at <parameter>offset</parameter> bytes,
+ extending for <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>, returning false upon success.
+ Committed memory may be committed in absolute terms as on a system that
+ does not overcommit, or in implicit terms as on a system that
+ overcommits and satisfies physical memory needs on demand via soft page
+ faults. If the function returns true, this indicates insufficient
+ physical memory to satisfy the request.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_decommit_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk decommit function conforms to the
+ <type>chunk_decommit_t</type> type and decommits any physical memory
+ that is backing pages within a <parameter>chunk</parameter> of given
+ <parameter>size</parameter> at <parameter>offset</parameter> bytes,
+ extending for <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>, returning false upon success, in which
+ case the pages will be committed via the chunk commit function before
+ being reused. If the function returns true, this indicates opt-out from
+ decommit; the memory remains committed and available for future use, in
+ which case it will be automatically retained for later reuse.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_purge_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t<parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk purge function conforms to the <type>chunk_purge_t</type>
+ type and optionally discards physical pages within the virtual memory
+ mapping associated with <parameter>chunk</parameter> of given
+ <parameter>size</parameter> at <parameter>offset</parameter> bytes,
+ extending for <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>, returning false if pages within the
+ purged virtual memory range will be zero-filled the next time they are
+ accessed.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_split_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>size_a</parameter></paramdef>
+ <paramdef>size_t <parameter>size_b</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk split function conforms to the <type>chunk_split_t</type>
+ type and optionally splits <parameter>chunk</parameter> of given
+ <parameter>size</parameter> into two adjacent chunks, the first of
+ <parameter>size_a</parameter> bytes, and the second of
+ <parameter>size_b</parameter> bytes, operating on
+ <parameter>committed</parameter>/decommitted memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates that the chunk
+ remains unsplit and therefore should continue to be operated on as a
+ whole.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(chunk_merge_t)</function></funcdef>
+ <paramdef>void *<parameter>chunk_a</parameter></paramdef>
+ <paramdef>size_t <parameter>size_a</parameter></paramdef>
+ <paramdef>void *<parameter>chunk_b</parameter></paramdef>
+ <paramdef>size_t <parameter>size_b</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>A chunk merge function conforms to the <type>chunk_merge_t</type>
+ type and optionally merges adjacent chunks,
+ <parameter>chunk_a</parameter> of given <parameter>size_a</parameter>
+ and <parameter>chunk_b</parameter> of given
+ <parameter>size_b</parameter> into one contiguous chunk, operating on
+ <parameter>committed</parameter>/decommitted memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates that the chunks
+ remain distinct mappings and therefore should continue to be operated on
+ independently.</para>
+ </listitem>
</varlistentry>
<varlistentry id="arenas.narenas">
@@ -1470,6 +1743,20 @@ malloc_conf = "xmalloc:true";]]></programlisting>
initialized.</para></listitem>
</varlistentry>
+ <varlistentry id="arenas.lg_dirty_mult">
+ <term>
+ <mallctl>arenas.lg_dirty_mult</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Current default per-arena minimum ratio (log base 2) of
+ active to dirty pages, used to initialize <link
+ linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
+ during arena creation. See <link
+ linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
+ for additional information.</para></listitem>
+ </varlistentry>
+
<varlistentry id="arenas.quantum">
<term>
<mallctl>arenas.quantum</mallctl>
@@ -1548,7 +1835,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<varlistentry id="arenas.nlruns">
<term>
<mallctl>arenas.nlruns</mallctl>
- (<type>size_t</type>)
+ (<type>unsigned</type>)
<literal>r-</literal>
</term>
<listitem><para>Total number of large size classes.</para></listitem>
@@ -1564,14 +1851,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
class.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.purge">
+ <varlistentry id="arenas.nhchunks">
<term>
- <mallctl>arenas.purge</mallctl>
+ <mallctl>arenas.nhchunks</mallctl>
(<type>unsigned</type>)
- <literal>-w</literal>
+ <literal>r-</literal>
</term>
- <listitem><para>Purge unused dirty pages for the specified arena, or
- for all arenas if none is specified.</para></listitem>
+ <listitem><para>Total number of huge size classes.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arenas.hchunk.i.size">
+ <term>
+ <mallctl>arenas.hchunk.&lt;i&gt;.size</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Maximum size supported by this huge size
+ class.</para></listitem>
</varlistentry>
<varlistentry id="arenas.extend">
@@ -1584,6 +1880,20 @@ malloc_conf = "xmalloc:true";]]></programlisting>
and returning the new arena index.</para></listitem>
</varlistentry>
+ <varlistentry id="prof.thread_active_init">
+ <term>
+ <mallctl>prof.thread_active_init</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Control the initial setting for <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ in newly created threads. See the <link
+ linkend="opt.prof_thread_active_init"><mallctl>opt.prof_thread_active_init</mallctl></link>
+ option for additional information.</para></listitem>
+ </varlistentry>
+
<varlistentry id="prof.active">
<term>
<mallctl>prof.active</mallctl>
@@ -1594,8 +1904,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Control whether sampling is currently active. See the
<link
linkend="opt.prof_active"><mallctl>opt.prof_active</mallctl></link>
- option for additional information.
- </para></listitem>
+ option for additional information, as well as the interrelated <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ mallctl.</para></listitem>
</varlistentry>
<varlistentry id="prof.dump">
@@ -1614,6 +1925,49 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option.</para></listitem>
</varlistentry>
+ <varlistentry id="prof.gdump">
+ <term>
+ <mallctl>prof.gdump</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>When enabled, trigger a memory profile dump every time
+ the total virtual memory exceeds the previous maximum. Profiles are
+ dumped to files named according to the pattern
+ <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
+ where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+ linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+ option.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.reset">
+ <term>
+ <mallctl>prof.reset</mallctl>
+ (<type>size_t</type>)
+ <literal>-w</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Reset all memory profile statistics, and optionally
+ update the sample rate (see <link
+ linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>
+ and <link
+ linkend="prof.lg_sample"><mallctl>prof.lg_sample</mallctl></link>).
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.lg_sample">
+ <term>
+ <mallctl>prof.lg_sample</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Get the current sample rate (see <link
+ linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id="prof.interval">
<term>
<mallctl>prof.interval</mallctl>
@@ -1637,9 +1991,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</term>
<listitem><para>Pointer to a counter that contains an approximate count
of the current number of bytes in active pages. The estimate may be
- high, but never low, because each arena rounds up to the nearest
- multiple of the chunk size when computing its contribution to the
- counter. Note that the <link
+ high, but never low, because each arena rounds up when computing its
+ contribution to the counter. Note that the <link
linkend="epoch"><mallctl>epoch</mallctl></link> mallctl has no bearing
on this counter. Furthermore, counter consistency is maintained via
atomic operations, so it is necessary to use an atomic operation in
@@ -1670,88 +2023,56 @@ malloc_conf = "xmalloc:true";]]></programlisting>
equal to <link
linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
This does not include <link linkend="stats.arenas.i.pdirty">
- <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link> and pages
+ <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link>, nor pages
entirely devoted to allocator metadata.</para></listitem>
</varlistentry>
- <varlistentry id="stats.mapped">
+ <varlistentry id="stats.metadata">
<term>
- <mallctl>stats.mapped</mallctl>
+ <mallctl>stats.metadata</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Total number of bytes in chunks mapped on behalf of the
- application. This is a multiple of the chunk size, and is at least as
- large as <link
- linkend="stats.active"><mallctl>stats.active</mallctl></link>. This
- does not include inactive chunks.</para></listitem>
- </varlistentry>
-
- <varlistentry id="stats.chunks.current">
- <term>
- <mallctl>stats.chunks.current</mallctl>
- (<type>size_t</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Total number of chunks actively mapped on behalf of the
- application. This does not include inactive chunks.
- </para></listitem>
- </varlistentry>
-
- <varlistentry id="stats.chunks.total">
- <term>
- <mallctl>stats.chunks.total</mallctl>
- (<type>uint64_t</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Cumulative number of chunks allocated.</para></listitem>
+ <listitem><para>Total number of bytes dedicated to metadata, which
+ comprise base allocations used for bootstrap-sensitive internal
+ allocator data structures, arena chunk headers (see <link
+ linkend="stats.arenas.i.metadata.mapped"><mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl></link>),
+ and internal allocations (see <link
+ linkend="stats.arenas.i.metadata.allocated"><mallctl>stats.arenas.&lt;i&gt;.metadata.allocated</mallctl></link>).</para></listitem>
</varlistentry>
- <varlistentry id="stats.chunks.high">
+ <varlistentry id="stats.resident">
<term>
- <mallctl>stats.chunks.high</mallctl>
+ <mallctl>stats.resident</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Maximum number of active chunks at any time thus far.
- </para></listitem>
+ <listitem><para>Maximum number of bytes in physically resident data
+ pages mapped by the allocator, comprising all pages dedicated to
+ allocator metadata, pages backing active allocations, and unused dirty
+ pages. This is a maximum rather than precise because pages may not
+ actually be physically resident if they correspond to demand-zeroed
+ virtual memory that has not yet been touched. This is a multiple of the
+ page size, and is larger than <link
+ linkend="stats.active"><mallctl>stats.active</mallctl></link>.</para></listitem>
</varlistentry>
- <varlistentry id="stats.huge.allocated">
+ <varlistentry id="stats.mapped">
<term>
- <mallctl>stats.huge.allocated</mallctl>
+ <mallctl>stats.mapped</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Number of bytes currently allocated by huge objects.
- </para></listitem>
- </varlistentry>
-
- <varlistentry id="stats.huge.nmalloc">
- <term>
- <mallctl>stats.huge.nmalloc</mallctl>
- (<type>uint64_t</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Cumulative number of huge allocation requests.
- </para></listitem>
- </varlistentry>
-
- <varlistentry id="stats.huge.ndalloc">
- <term>
- <mallctl>stats.huge.ndalloc</mallctl>
- (<type>uint64_t</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Cumulative number of huge deallocation requests.
- </para></listitem>
+ <listitem><para>Total number of bytes in active chunks mapped by the
+ allocator. This is a multiple of the chunk size, and is larger than
+ <link linkend="stats.active"><mallctl>stats.active</mallctl></link>.
+ This does not include inactive chunks, even those that contain unused
+ dirty pages, which means that there is no strict ordering between this
+ and <link
+ linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.dss">
@@ -1768,6 +2089,18 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem>
</varlistentry>
+ <varlistentry id="stats.arenas.i.lg_dirty_mult">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.lg_dirty_mult</mallctl>
+ (<type>ssize_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Minimum ratio (log base 2) of active to dirty pages.
+ See <link
+ linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
+ for details.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.arenas.i.nthreads">
<term>
<mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
@@ -1809,6 +2142,38 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Number of mapped bytes.</para></listitem>
</varlistentry>
+ <varlistentry id="stats.arenas.i.metadata.mapped">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of mapped bytes in arena chunk headers, which
+ track the states of the non-metadata pages.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.metadata.allocated">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.metadata.allocated</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of bytes dedicated to internal allocations.
+ Internal allocations differ from application-originated allocations in
+ that they are for internal use, and that they are omitted from heap
+ profiles. This statistic is reported separately from <link
+ linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link> and
+ <link
+ linkend="stats.arenas.i.metadata.mapped"><mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl></link>
+ because it overlaps with e.g. the <link
+ linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link> and
+ <link linkend="stats.active"><mallctl>stats.active</mallctl></link>
+ statistics, whereas the other metadata statistics do
+ not.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.arenas.i.npurge">
<term>
<mallctl>stats.arenas.&lt;i&gt;.npurge</mallctl>
@@ -1930,15 +2295,48 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.bins.j.allocated">
+ <varlistentry id="stats.arenas.i.huge.allocated">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.allocated</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.huge.allocated</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Current number of bytes allocated by
- bin.</para></listitem>
+ <listitem><para>Number of bytes currently allocated by huge objects.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.huge.nmalloc">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.huge.nmalloc</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of huge allocation requests served
+ directly by the arena.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.huge.ndalloc">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.huge.ndalloc</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of huge deallocation requests served
+ directly by the arena.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.huge.nrequests">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.huge.nrequests</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of huge allocation requests.
+ </para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.bins.j.nmalloc">
@@ -1974,6 +2372,17 @@ malloc_conf = "xmalloc:true";]]></programlisting>
requests.</para></listitem>
</varlistentry>
+ <varlistentry id="stats.arenas.i.bins.j.curregs">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curregs</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Current number of regions for this size
+ class.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.arenas.i.bins.j.nfills">
<term>
<mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</mallctl>
@@ -2068,6 +2477,50 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Current number of runs for this size class.
</para></listitem>
</varlistentry>
+
+ <varlistentry id="stats.arenas.i.hchunks.j.nmalloc">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nmalloc</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of allocation requests for this size
+ class served directly by the arena.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.hchunks.j.ndalloc">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.ndalloc</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of deallocation requests for this
+ size class served directly by the arena.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.hchunks.j.nrequests">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nrequests</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Cumulative number of allocation requests for this size
+ class.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.hchunks.j.curhchunks">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.curhchunks</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Current number of huge allocations for this size class.
+ </para></listitem>
+ </varlistentry>
</variablelist>
</refsect1>
<refsect1 id="debugging_malloc_problems">
@@ -2253,42 +2706,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
returns the usable size of the allocation pointed to by
<parameter>ptr</parameter>. </para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <para>The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>,
- <function>sallocm<parameter/></function>,
- <function>dallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions return
- <constant>ALLOCM_SUCCESS</constant> on success; otherwise they return an
- error value. The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions will fail if:
- <variablelist>
- <varlistentry>
- <term><errorname>ALLOCM_ERR_OOM</errorname></term>
-
- <listitem><para>Out of memory. Insufficient contiguous memory was
- available to service the allocation request. The
- <function>allocm<parameter/></function> function additionally sets
- <parameter>*ptr</parameter> to <constant>NULL</constant>, whereas
- the <function>rallocm<parameter/></function> function leaves
- <constant>*ptr</constant> unmodified.</para></listitem>
- </varlistentry>
- </variablelist>
- The <function>rallocm<parameter/></function> function will also
- fail if:
- <variablelist>
- <varlistentry>
- <term><errorname>ALLOCM_ERR_NOT_MOVED</errorname></term>
-
- <listitem><para><constant>ALLOCM_NO_MOVE</constant> was specified,
- but the reallocation request could not be serviced without moving
- the object.</para></listitem>
- </varlistentry>
- </variablelist>
- </para>
- </refsect2>
</refsect1>
<refsect1 id="environment">
<title>ENVIRONMENT</title>
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h
index 9d000c03d..12c617979 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena.h
@@ -1,30 +1,10 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
-/*
- * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
- * as small as possible such that this setting is still honored, without
- * violating other constraints. The goal is to make runs as small as possible
- * without exceeding a per run external fragmentation threshold.
- *
- * We use binary fixed point math for overhead computations, where the binary
- * point is implicitly RUN_BFP bits to the left.
- *
- * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since when heap profiling is enabled
- * there is one pointer of header overhead per object (plus a constant). This
- * constraint is relaxed (ignored) for runs that are so small that the
- * per-region overhead is greater than:
- *
- * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP))
- */
-#define RUN_BFP 12
-/* \/ Implicit binary fixed point. */
-#define RUN_MAX_OVRHD 0x0000003dU
-#define RUN_MAX_OVRHD_RELAX 0x00001800U
+#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)
/* Maximum number of regions in one run. */
-#define LG_RUN_MAXREGS 11
+#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN)
#define RUN_MAXREGS (1U << LG_RUN_MAXREGS)
/*
@@ -36,16 +16,18 @@
/*
* The minimum ratio of active:dirty pages per arena is computed as:
*
- * (nactive >> opt_lg_dirty_mult) >= ndirty
+ * (nactive >> lg_dirty_mult) >= ndirty
*
- * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times
- * as many active pages as dirty pages.
+ * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as
+ * many active pages as dirty pages.
*/
#define LG_DIRTY_MULT_DEFAULT 3
-typedef struct arena_chunk_map_s arena_chunk_map_t;
-typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
typedef struct arena_run_s arena_run_t;
+typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
+typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
+typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
@@ -54,54 +36,34 @@ typedef struct arena_s arena_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
-/* Each element of the chunk map corresponds to one page within the chunk. */
-struct arena_chunk_map_s {
-#ifndef JEMALLOC_PROF
- /*
- * Overlay prof_ctx in order to allow it to be referenced by dead code.
- * Such antics aren't warranted for per arena data structures, but
- * chunk map overhead accounts for a percentage of memory, rather than
- * being just a fixed cost.
- */
- union {
-#endif
- union {
- /*
- * Linkage for run trees. There are two disjoint uses:
- *
- * 1) arena_t's runs_avail tree.
- * 2) arena_run_t conceptually uses this linkage for in-use
- * non-full runs, rather than directly embedding linkage.
- */
- rb_node(arena_chunk_map_t) rb_link;
- /*
- * List of runs currently in purgatory. arena_chunk_purge()
- * temporarily allocates runs that contain dirty pages while
- * purging, so that other threads cannot use the runs while the
- * purging thread is operating without the arena lock held.
- */
- ql_elm(arena_chunk_map_t) ql_link;
- } u;
+#ifdef JEMALLOC_ARENA_STRUCTS_A
+struct arena_run_s {
+ /* Index of bin this run is associated with. */
+ szind_t binind;
- /* Profile counters, used for large object runs. */
- prof_ctx_t *prof_ctx;
-#ifndef JEMALLOC_PROF
- }; /* union { ... }; */
-#endif
+ /* Number of free regions in run. */
+ unsigned nfree;
+ /* Per region allocated/deallocated bitmap. */
+ bitmap_t bitmap[BITMAP_GROUPS_MAX];
+};
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_bits_s {
/*
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
*
- * ???????? ???????? ????nnnn nnnndula
+ * ???????? ???????? ???nnnnn nnndumla
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages.
* Small: Run page offset.
- * Large: Run size for first page, unset for trailing pages.
+ * Large: Run page count for first page, unset for trailing pages.
* n : binind for small size class, BININD_INVALID for large size class.
* d : dirty?
* u : unzeroed?
+ * m : decommitted?
* l : large?
* a : allocated?
*
@@ -110,78 +72,109 @@ struct arena_chunk_map_s {
* p : run page offset
* s : run size
* n : binind for size class; large objects set these to BININD_INVALID
- * except for promoted allocations (see prof_promote)
* x : don't care
* - : 0
* + : 1
- * [DULA] : bit set
- * [dula] : bit unset
+ * [DUMLA] : bit set
+ * [dumla] : bit unset
*
* Unallocated (clean):
- * ssssssss ssssssss ssss++++ ++++du-a
- * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
- * ssssssss ssssssss ssss++++ ++++dU-a
+ * ssssssss ssssssss sss+++++ +++dum-a
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx
+ * ssssssss ssssssss sss+++++ +++dUm-a
*
* Unallocated (dirty):
- * ssssssss ssssssss ssss++++ ++++D--a
+ * ssssssss ssssssss sss+++++ +++D-m-a
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * ssssssss ssssssss ssss++++ ++++D--a
+ * ssssssss ssssssss sss+++++ +++D-m-a
*
* Small:
- * pppppppp pppppppp ppppnnnn nnnnd--A
- * pppppppp pppppppp ppppnnnn nnnn---A
- * pppppppp pppppppp ppppnnnn nnnnd--A
+ * pppppppp pppppppp pppnnnnn nnnd---A
+ * pppppppp pppppppp pppnnnnn nnn----A
+ * pppppppp pppppppp pppnnnnn nnnd---A
*
* Large:
- * ssssssss ssssssss ssss++++ ++++D-LA
+ * ssssssss ssssssss sss+++++ +++D--LA
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * -------- -------- ----++++ ++++D-LA
+ * -------- -------- ---+++++ +++D--LA
*
- * Large (sampled, size <= PAGE):
- * ssssssss ssssssss ssssnnnn nnnnD-LA
+ * Large (sampled, size <= LARGE_MINCLASS):
+ * ssssssss ssssssss sssnnnnn nnnD--LA
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * -------- -------- ---+++++ +++D--LA
*
- * Large (not sampled, size == PAGE):
- * ssssssss ssssssss ssss++++ ++++D-LA
+ * Large (not sampled, size == LARGE_MINCLASS):
+ * ssssssss ssssssss sss+++++ +++D--LA
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * -------- -------- ---+++++ +++D--LA
*/
size_t bits;
-#define CHUNK_MAP_BININD_SHIFT 4
+#define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
+#define CHUNK_MAP_LARGE ((size_t)0x02U)
+#define CHUNK_MAP_STATE_MASK ((size_t)0x3U)
+
+#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U)
+#define CHUNK_MAP_UNZEROED ((size_t)0x08U)
+#define CHUNK_MAP_DIRTY ((size_t)0x10U)
+#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU)
+
+#define CHUNK_MAP_BININD_SHIFT 5
#define BININD_INVALID ((size_t)0xffU)
-/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
-#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U)
+#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT)
#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
-#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU)
-#define CHUNK_MAP_DIRTY ((size_t)0x8U)
-#define CHUNK_MAP_UNZEROED ((size_t)0x4U)
-#define CHUNK_MAP_LARGE ((size_t)0x2U)
-#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
-#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
+
+#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8)
+#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE)
+#define CHUNK_MAP_SIZE_MASK \
+ (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK))
};
-typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
-typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
-typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t;
-/* Arena chunk header. */
-struct arena_chunk_s {
- /* Arena that owns the chunk. */
- arena_t *arena;
+struct arena_runs_dirty_link_s {
+ qr(arena_runs_dirty_link_t) rd_link;
+};
- /* Linkage for tree of arena chunks that contain dirty runs. */
- rb_node(arena_chunk_t) dirty_link;
+/*
+ * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
+ * like arena_chunk_map_bits_t. Two separate arrays are stored within each
+ * chunk header in order to improve cache locality.
+ */
+struct arena_chunk_map_misc_s {
+ /*
+ * Linkage for run trees. There are two disjoint uses:
+ *
+ * 1) arena_t's runs_avail tree.
+ * 2) arena_run_t conceptually uses this linkage for in-use non-full
+ * runs, rather than directly embedding linkage.
+ */
+ rb_node(arena_chunk_map_misc_t) rb_link;
- /* Number of dirty pages. */
- size_t ndirty;
+ union {
+ /* Linkage for list of dirty runs. */
+ arena_runs_dirty_link_t rd;
- /* Number of available runs. */
- size_t nruns_avail;
+ /* Profile counters, used for large object runs. */
+ union {
+ void *prof_tctx_pun;
+ prof_tctx_t *prof_tctx;
+ };
+ /* Small region run metadata. */
+ arena_run_t run;
+ };
+};
+typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
+#endif /* JEMALLOC_ARENA_STRUCTS_A */
+
+#ifdef JEMALLOC_ARENA_STRUCTS_B
+/* Arena chunk header. */
+struct arena_chunk_s {
/*
- * Number of available run adjacencies that purging could coalesce.
- * Clean and dirty available runs are not coalesced, which causes
- * virtual memory fragmentation. The ratio of
- * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this
- * fragmentation.
+ * A pointer to the arena that owns the chunk is stored within the node.
+ * This field as a whole is used by chunks_rtree to support both
+ * ivsalloc() and core-based debugging.
*/
- size_t nruns_adjac;
+ extent_node_t node;
/*
* Map of pages within chunk that keeps track of free/large/small. The
@@ -189,19 +182,7 @@ struct arena_chunk_s {
* need to be tracked in the map. This omission saves a header page
* for common chunk sizes (e.g. 4 MiB).
*/
- arena_chunk_map_t map[1]; /* Dynamically sized. */
-};
-typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
-
-struct arena_run_s {
- /* Bin this run is associated with. */
- arena_bin_t *bin;
-
- /* Index of next region that has never been allocated, or nregs. */
- uint32_t nextind;
-
- /* Number of free regions in run. */
- unsigned nfree;
+ arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */
};
/*
@@ -212,12 +193,7 @@ struct arena_run_s {
* Each run has the following layout:
*
* /--------------------\
- * | arena_run_t header |
- * | ... |
- * bitmap_offset | bitmap |
- * | ... |
- * ctx0_offset | ctx map |
- * | ... |
+ * | pad? |
* |--------------------|
* | redzone |
* reg0_offset | region 0 |
@@ -259,23 +235,11 @@ struct arena_bin_info_s {
uint32_t nregs;
/*
- * Offset of first bitmap_t element in a run header for this bin's size
- * class.
- */
- uint32_t bitmap_offset;
-
- /*
* Metadata used to manipulate bitmaps for runs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
- /*
- * Offset of first (prof_ctx_t *) in a run header for this bin's size
- * class, or 0 if (config_prof == false || opt_prof == false).
- */
- uint32_t ctx0_offset;
-
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
};
@@ -321,8 +285,7 @@ struct arena_s {
/*
* There are three classes of arena operations from a locking
* perspective:
- * 1) Thread asssignment (modifies nthreads) is protected by
- * arenas_lock.
+ * 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
* 2) Bin-related operations are protected by bin locks.
* 3) Chunk- and run-related operations are protected by this mutex.
*/
@@ -331,16 +294,20 @@ struct arena_s {
arena_stats_t stats;
/*
* List of tcaches for extant threads associated with this arena.
- * Stats from these are merged incrementally, and at exit.
+ * Stats from these are merged incrementally, and at exit if
+ * opt_stats_print is enabled.
*/
ql_head(tcache_t) tcache_ql;
uint64_t prof_accumbytes;
- dss_prec_t dss_prec;
+ /*
+ * PRNG state for cache index randomization of large allocation base
+ * pointers.
+ */
+ uint64_t offset_state;
- /* Tree of dirty-page-containing chunks this arena manages. */
- arena_chunk_tree_t chunks_dirty;
+ dss_prec_t dss_prec;
/*
* In order to avoid rapid chunk allocation/deallocation when an arena
@@ -354,7 +321,13 @@ struct arena_s {
*/
arena_chunk_t *spare;
- /* Number of pages in active runs. */
+ /* Minimum ratio (log base 2) of nactive:ndirty. */
+ ssize_t lg_dirty_mult;
+
+ /* True if a thread is currently executing arena_purge(). */
+ bool purging;
+
+ /* Number of pages in active runs and huge regions. */
size_t nactive;
/*
@@ -366,44 +339,116 @@ struct arena_s {
size_t ndirty;
/*
- * Approximate number of pages being purged. It is possible for
- * multiple threads to purge dirty pages concurrently, and they use
- * npurgatory to indicate the total number of pages all threads are
- * attempting to purge.
+ * Size/address-ordered tree of this arena's available runs. The tree
+ * is used for first-best-fit run allocation.
*/
- size_t npurgatory;
+ arena_avail_tree_t runs_avail;
/*
- * Size/address-ordered trees of this arena's available runs. The trees
- * are used for first-best-fit run allocation.
+ * Unused dirty memory this arena manages. Dirty memory is conceptually
+ * tracked as an arbitrarily interleaved LRU of dirty runs and cached
+ * chunks, but the list linkage is actually semi-duplicated in order to
+ * avoid extra arena_chunk_map_misc_t space overhead.
+ *
+ * LRU-----------------------------------------------------------MRU
+ *
+ * /-- arena ---\
+ * | |
+ * | |
+ * |------------| /- chunk -\
+ * ...->|chunks_cache|<--------------------------->| /----\ |<--...
+ * |------------| | |node| |
+ * | | | | | |
+ * | | /- run -\ /- run -\ | | | |
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * |------------| |-------| |-------| | |----| |
+ * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----...
+ * |------------| |-------| |-------| | |----| |
+ * | | | | | | | | | |
+ * | | | | | | | \----/ |
+ * | | \-------/ \-------/ | |
+ * | | | |
+ * | | | |
+ * \------------/ \---------/
*/
- arena_avail_tree_t runs_avail;
+ arena_runs_dirty_link_t runs_dirty;
+ extent_node_t chunks_cache;
+
+ /* Extant huge allocations. */
+ ql_head(extent_node_t) huge;
+ /* Synchronizes all huge allocation/update/deallocation. */
+ malloc_mutex_t huge_mtx;
+
+ /*
+ * Trees of chunks that were previously allocated (trees differ only in
+ * node ordering). These are used when allocating chunks, in an attempt
+ * to re-use address space. Depending on function, different tree
+ * orderings are needed, which is why there are two trees with the same
+ * contents.
+ */
+ extent_tree_t chunks_szad_cached;
+ extent_tree_t chunks_ad_cached;
+ extent_tree_t chunks_szad_retained;
+ extent_tree_t chunks_ad_retained;
+
+ malloc_mutex_t chunks_mtx;
+ /* Cache of nodes that were allocated via base_alloc(). */
+ ql_head(extent_node_t) node_cache;
+ malloc_mutex_t node_cache_mtx;
+
+ /* User-configurable chunk hook functions. */
+ chunk_hooks_t chunk_hooks;
/* bins is used to store trees of free regions. */
arena_bin_t bins[NBINS];
};
+#endif /* JEMALLOC_ARENA_STRUCTS_B */
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-extern ssize_t opt_lg_dirty_mult;
-/*
- * small_size2bin is a compact lookup table that rounds request sizes up to
- * size classes. In order to reduce cache footprint, the table is compressed,
- * and all accesses are via the SMALL_SIZE2BIN macro.
- */
-extern uint8_t const small_size2bin[];
-#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
+static const size_t large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ PAGE
+#else
+ 0
+#endif
+ ;
-extern arena_bin_info_t arena_bin_info[NBINS];
+extern ssize_t opt_lg_dirty_mult;
-/* Number of large size classes. */
-#define nlclasses (chunk_npages - map_bias)
+extern arena_bin_info_t arena_bin_info[NBINS];
+extern size_t map_bias; /* Number of arena chunk header pages. */
+extern size_t map_misc_offset;
+extern size_t arena_maxrun; /* Max run size for arenas. */
+extern size_t large_maxclass; /* Max large size class. */
+extern unsigned nlclasses; /* Number of large size classes. */
+extern unsigned nhclasses; /* Number of huge size classes. */
+
+void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
+ bool cache);
+void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
+ bool cache);
+extent_node_t *arena_node_alloc(arena_t *arena);
+void arena_node_dalloc(arena_t *arena, extent_node_t *node);
+void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
+ bool *zero);
+void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
+void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize);
+void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize);
+bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize, bool *zero);
+ssize_t arena_lg_dirty_mult_get(arena_t *arena);
+bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
+void arena_maybe_purge(arena_t *arena);
void arena_purge_all(arena_t *arena);
void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
- size_t binind, uint64_t prof_accumbytes);
+ szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
bool zero);
#ifdef JEMALLOC_JET
@@ -418,19 +463,22 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
void arena_quarantine_junk_small(void *ptr, size_t usize);
void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
-void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
+void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache);
void arena_prof_promoted(const void *ptr, size_t size);
-void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- arena_chunk_map_t *mapelm);
+void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr, arena_chunk_map_bits_t *bitselm);
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind, arena_chunk_map_t *mapelm);
+ size_t pageind, arena_chunk_map_bits_t *bitselm);
void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind);
#ifdef JEMALLOC_JET
typedef void (arena_dalloc_junk_large_t)(void *, size_t);
extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
+#else
+void arena_dalloc_junk_large(void *ptr, size_t usize);
#endif
-void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
+void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
void *ptr);
void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#ifdef JEMALLOC_JET
@@ -439,16 +487,18 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
#endif
bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
size_t extra, bool zero);
-void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
- bool try_tcache_dalloc);
+void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+ size_t size, size_t alignment, bool zero, tcache_t *tcache);
dss_prec_t arena_dss_prec_get(arena_t *arena);
-void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
- size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
- malloc_large_stats_t *lstats);
-bool arena_new(arena_t *arena, unsigned ind);
-void arena_boot(void);
+bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+ssize_t arena_lg_dirty_mult_default_get(void);
+bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
+void arena_stats_merge(arena_t *arena, const char **dss,
+ ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty,
+ arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+arena_t *arena_new(unsigned ind);
+bool arena_boot(void);
void arena_prefork(arena_t *arena);
void arena_postfork_parent(arena_t *arena);
void arena_postfork_child(arena_t *arena);
@@ -458,64 +508,138 @@ void arena_postfork_child(arena_t *arena);
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk,
+ size_t pageind);
+arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk,
+ size_t pageind);
+size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm);
+void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
+arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run);
size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbitsp_read(size_t *mapbitsp);
size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t arena_mapbits_size_decode(size_t mapbits);
size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
size_t pageind);
size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
+size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
+size_t arena_mapbits_size_encode(size_t size);
void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
size_t size, size_t flags);
void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
size_t size);
+void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
+ size_t flags);
void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
size_t size, size_t flags);
void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind);
+ szind_t binind);
void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
- size_t runind, size_t binind, size_t flags);
-void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed);
+ size_t runind, szind_t binind, size_t flags);
+void arena_metadata_allocated_add(arena_t *arena, size_t size);
+void arena_metadata_allocated_sub(arena_t *arena, size_t size);
+size_t arena_metadata_allocated_get(arena_t *arena);
bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
-size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
+szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
+szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr);
-prof_ctx_t *arena_prof_ctx_get(const void *ptr);
-void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
+prof_tctx_t *arena_prof_tctx_get(const void *ptr);
+void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void arena_prof_tctx_reset(const void *ptr, size_t usize,
+ const void *old_ptr, prof_tctx_t *old_tctx);
+void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache);
+arena_t *arena_aalloc(const void *ptr);
size_t arena_salloc(const void *ptr, bool demote);
-void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- bool try_tcache);
+void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
# ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_t *
-arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
+arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
{
assert(pageind >= map_bias);
assert(pageind < chunk_npages);
- return (&chunk->map[pageind-map_bias]);
+ return (&chunk->map_bits[pageind-map_bias]);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+ assert(pageind >= map_bias);
+ assert(pageind < chunk_npages);
+
+ return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
+ (uintptr_t)map_misc_offset) + pageind-map_bias);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm)
+{
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+ size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
+ map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
+
+ assert(pageind >= map_bias);
+ assert(pageind < chunk_npages);
+
+ return (pageind);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+{
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
+
+ return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
+{
+ arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
+ *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
+
+ assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+ assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+ return (miscelm);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_run_to_miscelm(arena_run_t *run)
+{
+ arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
+ *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run));
+
+ assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+ assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+ return (miscelm);
}
JEMALLOC_ALWAYS_INLINE size_t *
arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
{
- return (&arena_mapp_get(chunk, pageind)->bits);
+ return (&arena_bitselm_get(chunk, pageind)->bits);
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -533,13 +657,29 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
}
JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_decode(size_t mapbits)
+{
+ size_t size;
+
+#if CHUNK_MAP_SIZE_SHIFT > 0
+ size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+ size = mapbits & CHUNK_MAP_SIZE_MASK;
+#else
+ size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
+#endif
+
+ return (size);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- return (mapbits & ~PAGE_MASK);
+ return (arena_mapbits_size_decode(mapbits));
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -550,7 +690,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
(CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
- return (mapbits & ~PAGE_MASK);
+ return (arena_mapbits_size_decode(mapbits));
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -561,14 +701,14 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
CHUNK_MAP_ALLOCATED);
- return (mapbits >> LG_PAGE);
+ return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
}
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE szind_t
arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
- size_t binind;
+ szind_t binind;
mapbits = arena_mapbits_get(chunk, pageind);
binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -582,6 +722,8 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
return (mapbits & CHUNK_MAP_DIRTY);
}
@@ -591,10 +733,23 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
return (mapbits & CHUNK_MAP_UNZEROED);
}
JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+{
+ size_t mapbits;
+
+ mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ return (mapbits & CHUNK_MAP_DECOMMITTED);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
@@ -619,6 +774,23 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits)
*mapbitsp = mapbits;
}
+JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_encode(size_t size)
+{
+ size_t mapbits;
+
+#if CHUNK_MAP_SIZE_SHIFT > 0
+ mapbits = size << CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+ mapbits = size;
+#else
+ mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
+#endif
+
+ assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
+ return (mapbits);
+}
+
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t flags)
@@ -626,9 +798,11 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
assert((size & PAGE_MASK) == 0);
- assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
- assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags);
+ assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
+ assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ CHUNK_MAP_BININD_INVALID | flags);
}
JEMALLOC_ALWAYS_INLINE void
@@ -640,7 +814,17 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
assert((size & PAGE_MASK) == 0);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK));
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ (mapbits & ~CHUNK_MAP_SIZE_MASK));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
+{
+ size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+
+ assert((flags & CHUNK_MAP_UNZEROED) == flags);
+ arena_mapbitsp_write(mapbitsp, flags);
}
JEMALLOC_ALWAYS_INLINE void
@@ -648,54 +832,62 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t flags)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
assert((size & PAGE_MASK) == 0);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags
- | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
+ assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
+ assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED);
}
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind)
+ szind_t binind)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
size_t mapbits = arena_mapbitsp_read(mapbitsp);
assert(binind <= BININD_INVALID);
- assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
+ assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
+ large_pad);
arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
(binind << CHUNK_MAP_BININD_SHIFT));
}
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
- size_t binind, size_t flags)
+ szind_t binind, size_t flags)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
assert(binind < BININD_INVALID);
assert(pageind - runind >= map_bias);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind <<
- CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED);
+ assert((flags & CHUNK_MAP_UNZEROED) == flags);
+ arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) |
+ (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED);
}
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed)
+JEMALLOC_INLINE void
+arena_metadata_allocated_add(arena_t *arena, size_t size)
+{
+
+ atomic_add_z(&arena->stats.metadata_allocated, size);
+}
+
+JEMALLOC_INLINE void
+arena_metadata_allocated_sub(arena_t *arena, size_t size)
{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) |
- unzeroed);
+ atomic_sub_z(&arena->stats.metadata_allocated, size);
+}
+
+JEMALLOC_INLINE size_t
+arena_metadata_allocated_get(arena_t *arena)
+{
+
+ return (atomic_read_z(&arena->stats.metadata_allocated));
}
JEMALLOC_INLINE bool
@@ -719,7 +911,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
cassert(config_prof);
- if (prof_interval == 0)
+ if (likely(prof_interval == 0))
return (false);
return (arena_prof_accum_impl(arena, accumbytes));
}
@@ -730,7 +922,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
cassert(config_prof);
- if (prof_interval == 0)
+ if (likely(prof_interval == 0))
return (false);
{
@@ -743,10 +935,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
}
}
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE szind_t
arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
{
- size_t binind;
+ szind_t binind;
binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -755,27 +947,34 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
arena_t *arena;
size_t pageind;
size_t actual_mapbits;
+ size_t rpages_ind;
arena_run_t *run;
arena_bin_t *bin;
- size_t actual_binind;
+ szind_t run_binind, actual_binind;
arena_bin_info_t *bin_info;
+ arena_chunk_map_misc_t *miscelm;
+ void *rpages;
assert(binind != BININD_INVALID);
assert(binind < NBINS);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- arena = chunk->arena;
+ arena = extent_node_arena_get(&chunk->node);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
actual_mapbits = arena_mapbits_get(chunk, pageind);
assert(mapbits == actual_mapbits);
assert(arena_mapbits_large_get(chunk, pageind) == 0);
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- (actual_mapbits >> LG_PAGE)) << LG_PAGE));
- bin = run->bin;
+ rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
+ pageind);
+ miscelm = arena_miscelm_get(chunk, rpages_ind);
+ run = &miscelm->run;
+ run_binind = run->binind;
+ bin = &arena->bins[run_binind];
actual_binind = bin - arena->bins;
- assert(binind == actual_binind);
+ assert(run_binind == actual_binind);
bin_info = &arena_bin_info[actual_binind];
- assert(((uintptr_t)ptr - ((uintptr_t)run +
+ rpages = arena_miscelm_to_rpages(miscelm);
+ assert(((uintptr_t)ptr - ((uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
== 0);
}
@@ -785,10 +984,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
# endif /* JEMALLOC_ARENA_INLINE_A */
# ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE szind_t
arena_bin_index(arena_t *arena, arena_bin_t *bin)
{
- size_t binind = bin - arena->bins;
+ szind_t binind = bin - arena->bins;
assert(binind < NBINS);
return (binind);
}
@@ -798,24 +997,26 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
{
unsigned shift, diff, regind;
size_t interval;
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+ void *rpages = arena_miscelm_to_rpages(miscelm);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
- assert((uintptr_t)ptr >= (uintptr_t)run +
+ assert((uintptr_t)ptr >= (uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset);
/*
* Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%!
*/
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages -
bin_info->reg0_offset);
/* Rescale (factor powers of 2 out of the numerator and denominator). */
interval = bin_info->reg_interval;
- shift = ffs(interval) - 1;
+ shift = jemalloc_ffs(interval) - 1;
diff >>= shift;
interval >>= shift;
@@ -850,8 +1051,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
};
- if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) +
- 2)) {
+ if (likely(interval <= ((sizeof(interval_invs) /
+ sizeof(unsigned)) + 2))) {
regind = (diff * interval_invs[interval - 3]) >>
SIZE_INV_SHIFT;
} else
@@ -865,113 +1066,138 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
return (regind);
}
-JEMALLOC_INLINE prof_ctx_t *
-arena_prof_ctx_get(const void *ptr)
+JEMALLOC_INLINE prof_tctx_t *
+arena_prof_tctx_get(const void *ptr)
{
- prof_ctx_t *ret;
+ prof_tctx_t *ret;
arena_chunk_t *chunk;
- size_t pageind, mapbits;
cassert(config_prof);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote)
- ret = (prof_ctx_t *)(uintptr_t)1U;
+ if (likely(chunk != ptr)) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ size_t mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
+ ret = (prof_tctx_t *)(uintptr_t)1U;
else {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind = arena_ptr_small_binind_get(ptr,
- mapbits);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
- unsigned regind;
-
- regind = arena_run_regind(run, bin_info, ptr);
- ret = *(prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind *
- sizeof(prof_ctx_t *)));
+ arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
+ pageind);
+ ret = atomic_read_p(&elm->prof_tctx_pun);
}
} else
- ret = arena_mapp_get(chunk, pageind)->prof_ctx;
+ ret = huge_prof_tctx_get(ptr);
return (ret);
}
JEMALLOC_INLINE void
-arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
+arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
arena_chunk_t *chunk;
- size_t pageind;
cassert(config_prof);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-
- if (usize > SMALL_MAXCLASS || (prof_promote &&
- ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk,
- pageind) != 0))) {
- assert(arena_mapbits_large_get(chunk, pageind) != 0);
- arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
- } else {
- assert(arena_mapbits_large_get(chunk, pageind) == 0);
- if (prof_promote == false) {
- size_t mapbits = arena_mapbits_get(chunk, pageind);
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind;
- arena_bin_info_t *bin_info;
- unsigned regind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- bin_info = &arena_bin_info[binind];
- regind = arena_run_regind(run, bin_info, ptr);
-
- *((prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t
- *)))) = ctx;
+ if (likely(chunk != ptr)) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+
+ if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
+ (uintptr_t)1U)) {
+ arena_chunk_map_misc_t *elm;
+
+ assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+ elm = arena_miscelm_get(chunk, pageind);
+ atomic_write_p(&elm->prof_tctx_pun, tctx);
+ } else {
+ /*
+ * tctx must always be initialized for large runs.
+ * Assert that the surrounding conditional logic is
+ * equivalent to checking whether ptr refers to a large
+ * run.
+ */
+ assert(arena_mapbits_large_get(chunk, pageind) == 0);
}
+ } else
+ huge_prof_tctx_set(ptr, tctx);
+}
+
+JEMALLOC_INLINE void
+arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *old_tctx)
+{
+
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
+ (uintptr_t)old_tctx > (uintptr_t)1U))) {
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ size_t pageind;
+ arena_chunk_map_misc_t *elm;
+
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+ LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) !=
+ 0);
+ assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+ elm = arena_miscelm_get(chunk, pageind);
+ atomic_write_p(&elm->prof_tctx_pun,
+ (prof_tctx_t *)(uintptr_t)1U);
+ } else
+ huge_prof_tctx_reset(ptr);
}
}
JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache)
{
- tcache_t *tcache;
assert(size != 0);
- assert(size <= arena_maxclass);
- if (size <= SMALL_MAXCLASS) {
- if (try_tcache && (tcache = tcache_get(true)) != NULL)
- return (tcache_alloc_small(tcache, size, zero));
- else {
- return (arena_malloc_small(choose_arena(arena), size,
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+
+ if (likely(size <= SMALL_MAXCLASS)) {
+ if (likely(tcache != NULL)) {
+ return (tcache_alloc_small(tsd, arena, tcache, size,
zero));
- }
- } else {
+ } else
+ return (arena_malloc_small(arena, size, zero));
+ } else if (likely(size <= large_maxclass)) {
/*
* Initialize tcache after checking size in order to avoid
* infinite recursion during tcache initialization.
*/
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(true)) != NULL)
- return (tcache_alloc_large(tcache, size, zero));
- else {
- return (arena_malloc_large(choose_arena(arena), size,
+ if (likely(tcache != NULL) && size <= tcache_maxclass) {
+ return (tcache_alloc_large(tsd, arena, tcache, size,
zero));
- }
- }
+ } else
+ return (arena_malloc_large(arena, size, zero));
+ } else
+ return (huge_malloc(tsd, arena, size, zero, tcache));
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_aalloc(const void *ptr)
+{
+ arena_chunk_t *chunk;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr))
+ return (extent_node_arena_get(&chunk->node));
+ else
+ return (huge_aalloc(ptr));
}
/* Return the size of the allocation pointed to by ptr. */
@@ -980,81 +1206,139 @@ arena_salloc(const void *ptr, bool demote)
{
size_t ret;
arena_chunk_t *chunk;
- size_t pageind, binind;
+ size_t pageind;
+ szind_t binind;
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- binind = arena_mapbits_binind_get(chunk, pageind);
- if (binind == BININD_INVALID || (config_prof && demote == false &&
- prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
- /*
- * Large allocation. In the common case (demote == true), and
- * as this is an inline function, most callers will only end up
- * looking at binind to determine that ptr is a small
- * allocation.
- */
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
- ret = arena_mapbits_large_size_get(chunk, pageind);
- assert(ret != 0);
- assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
- assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
- pageind+(ret>>LG_PAGE)-1) == 0);
- assert(binind == arena_mapbits_binind_get(chunk,
- pageind+(ret>>LG_PAGE)-1));
- assert(arena_mapbits_dirty_get(chunk, pageind) ==
- arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
- } else {
- /*
- * Small allocation (possibly promoted to a large object due to
- * prof_promote).
- */
- assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
- arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
- pageind)) == binind);
- ret = arena_bin_info[binind].reg_size;
- }
+ if (likely(chunk != ptr)) {
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ binind = arena_mapbits_binind_get(chunk, pageind);
+ if (unlikely(binind == BININD_INVALID || (config_prof && !demote
+ && arena_mapbits_large_get(chunk, pageind) != 0))) {
+ /*
+ * Large allocation. In the common case (demote), and
+ * as this is an inline function, most callers will only
+ * end up looking at binind to determine that ptr is a
+ * small allocation.
+ */
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+ ret = arena_mapbits_large_size_get(chunk, pageind) -
+ large_pad;
+ assert(ret != 0);
+ assert(pageind + ((ret+large_pad)>>LG_PAGE) <=
+ chunk_npages);
+ assert(arena_mapbits_dirty_get(chunk, pageind) ==
+ arena_mapbits_dirty_get(chunk,
+ pageind+((ret+large_pad)>>LG_PAGE)-1));
+ } else {
+ /*
+ * Small allocation (possibly promoted to a large
+ * object).
+ */
+ assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
+ arena_ptr_small_binind_get(ptr,
+ arena_mapbits_get(chunk, pageind)) == binind);
+ ret = index2size(binind);
+ }
+ } else
+ ret = huge_salloc(ptr);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
+arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
+ arena_chunk_t *chunk;
size_t pageind, mapbits;
- tcache_t *tcache;
- assert(arena != NULL);
- assert(chunk->arena == arena);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- /* Small allocation. */
- if (try_tcache && (tcache = tcache_get(false)) != NULL) {
- size_t binind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- tcache_dalloc_small(tcache, ptr, binind);
- } else
- arena_dalloc_small(arena, chunk, ptr, pageind);
- } else {
- size_t size = arena_mapbits_large_size_get(chunk, pageind);
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ mapbits = arena_mapbits_get(chunk, pageind);
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
+ /* Small allocation. */
+ if (likely(tcache != NULL)) {
+ szind_t binind = arena_ptr_small_binind_get(ptr,
+ mapbits);
+ tcache_dalloc_small(tsd, tcache, ptr, binind);
+ } else {
+ arena_dalloc_small(extent_node_arena_get(
+ &chunk->node), chunk, ptr, pageind);
+ }
+ } else {
+ size_t size = arena_mapbits_large_size_get(chunk,
+ pageind);
+
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+
+ if (likely(tcache != NULL) && size - large_pad <=
+ tcache_maxclass) {
+ tcache_dalloc_large(tsd, tcache, ptr, size -
+ large_pad);
+ } else {
+ arena_dalloc_large(extent_node_arena_get(
+ &chunk->node), chunk, ptr);
+ }
+ }
+ } else
+ huge_dalloc(tsd, ptr, tcache);
+}
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+{
+ arena_chunk_t *chunk;
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(false)) != NULL) {
- tcache_dalloc_large(tcache, ptr, size);
- } else
- arena_dalloc_large(arena, chunk, ptr);
- }
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ if (config_prof && opt_prof) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+ LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ if (arena_mapbits_large_get(chunk, pageind) != 0) {
+ /*
+ * Make sure to use promoted size, not request
+ * size.
+ */
+ size = arena_mapbits_large_size_get(chunk,
+ pageind) - large_pad;
+ }
+ }
+ assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+
+ if (likely(size <= SMALL_MAXCLASS)) {
+ /* Small allocation. */
+ if (likely(tcache != NULL)) {
+ szind_t binind = size2index(size);
+ tcache_dalloc_small(tsd, tcache, ptr, binind);
+ } else {
+ size_t pageind = ((uintptr_t)ptr -
+ (uintptr_t)chunk) >> LG_PAGE;
+ arena_dalloc_small(extent_node_arena_get(
+ &chunk->node), chunk, ptr, pageind);
+ }
+ } else {
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+
+ if (likely(tcache != NULL) && size <= tcache_maxclass)
+ tcache_dalloc_large(tsd, tcache, ptr, size);
+ else {
+ arena_dalloc_large(extent_node_arena_get(
+ &chunk->node), chunk, ptr);
+ }
+ }
+ } else
+ huge_dalloc(tsd, ptr, tcache);
}
# endif /* JEMALLOC_ARENA_INLINE_B */
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h
index 11a7b47fe..a9aad35d1 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@@ -11,6 +11,7 @@
#define atomic_read_uint64(p) atomic_add_uint64(p, 0)
#define atomic_read_uint32(p) atomic_add_uint32(p, 0)
+#define atomic_read_p(p) atomic_add_p(p, NULL)
#define atomic_read_z(p) atomic_add_z(p, 0)
#define atomic_read_u(p) atomic_add_u(p, 0)
@@ -18,113 +19,244 @@
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+/*
+ * All arithmetic functions return the arithmetic result of the atomic
+ * operation. Some atomic operation APIs return the value prior to mutation, in
+ * which case the following functions must redundantly compute the result so
+ * that it can be returned. These functions are normally inlined, so the extra
+ * operations can be optimized away if the return values aren't used by the
+ * callers.
+ *
+ * <t> atomic_read_<t>(<t> *p) { return (*p); }
+ * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); }
+ * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); }
+ * bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
+ * {
+ * if (*p != c)
+ * return (true);
+ * *p = s;
+ * return (false);
+ * }
+ * void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
+ */
+
#ifndef JEMALLOC_ENABLE_INLINE
uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
+bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s);
+void atomic_write_uint64(uint64_t *p, uint64_t x);
uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
+bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s);
+void atomic_write_uint32(uint32_t *p, uint32_t x);
+void *atomic_add_p(void **p, void *x);
+void *atomic_sub_p(void **p, void *x);
+bool atomic_cas_p(void **p, void *c, void *s);
+void atomic_write_p(void **p, const void *x);
size_t atomic_add_z(size_t *p, size_t x);
size_t atomic_sub_z(size_t *p, size_t x);
+bool atomic_cas_z(size_t *p, size_t c, size_t s);
+void atomic_write_z(size_t *p, size_t x);
unsigned atomic_add_u(unsigned *p, unsigned x);
unsigned atomic_sub_u(unsigned *p, unsigned x);
+bool atomic_cas_u(unsigned *p, unsigned c, unsigned s);
+void atomic_write_u(unsigned *p, unsigned x);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
+ uint64_t t = x;
- return (__sync_add_and_fetch(p, x));
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
+ uint64_t t;
- return (__sync_sub_and_fetch(p, x));
+ x = (uint64_t)(-(int64_t)x);
+ t = x;
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
-#elif (defined(_MSC_VER))
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ uint8_t success;
+
+ asm volatile (
+ "lock; cmpxchgq %4, %0;"
+ "sete %1;"
+ : "=m" (*p), "=a" (success) /* Outputs. */
+ : "m" (*p), "a" (c), "r" (s) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+
+ return (!(bool)success);
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ asm volatile (
+ "xchgq %1, %0;" /* Lock is implied by xchgq. */
+ : "=m" (*p), "+r" (x) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
-
- return (InterlockedExchangeAdd64(p, x));
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (atomic_fetch_add(a, x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (atomic_fetch_sub(a, x) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (!atomic_compare_exchange_strong(a, &c, s));
+}
- return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ atomic_store(a, x);
}
-#elif (defined(JEMALLOC_OSATOMIC))
+# elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+ /*
+ * atomic_fetchadd_64() doesn't exist, but we only ever use this
+ * function on LP64 systems, so atomic_fetchadd_long() will do.
+ */
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ atomic_store_rel_long(p, x);
}
-# elif (defined(__amd64__) || defined(__x86_64__))
+# elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
+ return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- x = (uint64_t)(-(int64_t)x);
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
+ return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
- return (x);
+ return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
}
-# elif (defined(JEMALLOC_ATOMIC9))
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+ uint64_t o;
+
+ /*The documented OSAtomic*() API does not expose an atomic exchange. */
+ do {
+ o = atomic_read_uint64(p);
+ } while (atomic_cas_uint64(p, o, x));
+}
+# elif (defined(_MSC_VER))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- /*
- * atomic_fetchadd_64() doesn't exist, but we only ever use this
- * function on LP64 systems, so atomic_fetchadd_long() will do.
- */
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+ return (InterlockedExchangeAdd64(p, x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- assert(sizeof(uint64_t) == sizeof(unsigned long));
+ return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
+}
- return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ uint64_t o;
+
+ o = InterlockedCompareExchange64(p, s, c);
+ return (o != c);
}
-# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ InterlockedExchange64(p, x);
+}
+# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+ defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
@@ -138,6 +270,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
return (__sync_sub_and_fetch(p, x));
}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+
+ return (!__sync_bool_compare_and_swap(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ __sync_lock_test_and_set(p, x);
+}
# else
# error "Missing implementation for 64-bit atomic operations"
# endif
@@ -145,90 +291,184 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
/******************************************************************************/
/* 32-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
+ uint32_t t = x;
- return (__sync_add_and_fetch(p, x));
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
+ uint32_t t;
- return (__sync_sub_and_fetch(p, x));
+ x = (uint32_t)(-(int32_t)x);
+ t = x;
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
-#elif (defined(_MSC_VER))
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ uint8_t success;
+
+ asm volatile (
+ "lock; cmpxchgl %4, %0;"
+ "sete %1;"
+ : "=m" (*p), "=a" (success) /* Outputs. */
+ : "m" (*p), "a" (c), "r" (s) /* Inputs. */
+ : "memory"
+ );
+
+ return (!(bool)success);
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ asm volatile (
+ "xchgl %1, %0;" /* Lock is implied by xchgl. */
+ : "=m" (*p), "+r" (x) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
-
- return (InterlockedExchangeAdd(p, x));
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (atomic_fetch_add(a, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (atomic_fetch_sub(a, x) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (!atomic_compare_exchange_strong(a, &c, s));
+}
- return (InterlockedExchangeAdd(p, -((int32_t)x)));
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ atomic_store(a, x);
}
-#elif (defined(JEMALLOC_OSATOMIC))
+#elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+ return (atomic_fetchadd_32(p, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+ return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!atomic_cmpset_32(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ atomic_store_rel_32(p, x);
}
-#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+#elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
+ return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- x = (uint32_t)(-(int32_t)x);
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
+ return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
- return (x);
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
}
-#elif (defined(JEMALLOC_ATOMIC9))
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+ uint32_t o;
+
+ /*The documented OSAtomic*() API does not expose an atomic exchange. */
+ do {
+ o = atomic_read_uint32(p);
+ } while (atomic_cas_uint32(p, o, x));
+}
+#elif (defined(_MSC_VER))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- return (atomic_fetchadd_32(p, x) + x);
+ return (InterlockedExchangeAdd(p, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+ return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ uint32_t o;
+
+ o = InterlockedCompareExchange(p, s, c);
+ return (o != c);
}
-#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ InterlockedExchange(p, x);
+}
+#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
+ defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
@@ -242,11 +482,73 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
return (__sync_sub_and_fetch(p, x));
}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!__sync_bool_compare_and_swap(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ __sync_lock_test_and_set(p, x);
+}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
/******************************************************************************/
+/* Pointer operations. */
+JEMALLOC_INLINE void *
+atomic_add_p(void **p, void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE void *
+atomic_sub_p(void **p, void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return ((void *)atomic_add_uint64((uint64_t *)p,
+ (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((void *)atomic_add_uint32((uint32_t *)p,
+ (uint32_t)-((int32_t)x)));
+#endif
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_p(void **p, void *c, void *s)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_PTR == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_p(void **p, const void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_PTR == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
+/******************************************************************************/
/* size_t operations. */
JEMALLOC_INLINE size_t
atomic_add_z(size_t *p, size_t x)
@@ -272,6 +574,28 @@ atomic_sub_z(size_t *p, size_t x)
#endif
}
+JEMALLOC_INLINE bool
+atomic_cas_z(size_t *p, size_t c, size_t s)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_PTR == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_PTR == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
/******************************************************************************/
/* unsigned operations. */
JEMALLOC_INLINE unsigned
@@ -297,6 +621,29 @@ atomic_sub_u(unsigned *p, unsigned x)
(uint32_t)-((int32_t)x)));
#endif
}
+
+JEMALLOC_INLINE bool
+atomic_cas_u(unsigned *p, unsigned c, unsigned s)
+{
+
+#if (LG_SIZEOF_INT == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_INT == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_INT == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
/******************************************************************************/
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h
index 9cf75ffb0..39e46ee44 100644
--- a/deps/jemalloc/include/jemalloc/internal/base.h
+++ b/deps/jemalloc/include/jemalloc/internal/base.h
@@ -10,9 +10,7 @@
#ifdef JEMALLOC_H_EXTERNS
void *base_alloc(size_t size);
-void *base_calloc(size_t number, size_t size);
-extent_node_t *base_node_alloc(void);
-void base_node_dealloc(extent_node_t *node);
+void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
bool base_boot(void);
void base_prefork(void);
void base_postfork_parent(void);
diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h
index 605ebac58..fcc6005c7 100644
--- a/deps/jemalloc/include/jemalloc/internal/bitmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h
@@ -3,6 +3,7 @@
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
typedef struct bitmap_level_s bitmap_level_t;
typedef struct bitmap_info_s bitmap_info_t;
@@ -14,6 +15,51 @@ typedef unsigned long bitmap_t;
#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
+/* Number of groups required to store a given number of bits. */
+#define BITMAP_BITS2GROUPS(nbits) \
+ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define BITMAP_GROUPS_L0(nbits) \
+ BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \
+ BITMAP_BITS2GROUPS((nbits)))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define BITMAP_GROUPS_1_LEVEL(nbits) \
+ BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits) \
+ (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits) \
+ (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits) \
+ (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#else
+# error "Unsupported bitmap size"
+#endif
+
/* Maximum number of levels possible. */
#define BITMAP_MAX_LEVELS \
(LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
@@ -93,7 +139,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
bitmap_t g;
assert(bit < binfo->nbits);
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
@@ -126,15 +172,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
bitmap_t g;
unsigned i;
- assert(bitmap_full(bitmap, binfo) == false);
+ assert(!bitmap_full(bitmap, binfo));
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
- bit = ffsl(g) - 1;
+ bit = jemalloc_ffsl(g) - 1;
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
- bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+ bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1);
}
bitmap_set(bitmap, binfo, bit);
@@ -158,7 +204,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
@@ -172,7 +218,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
== 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- if (propagate == false)
+ if (!propagate)
break;
}
}
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h
index 87d8700da..5d1938353 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk.h
@@ -5,7 +5,7 @@
* Size and alignment of memory chunks that are allocated by the OS's virtual
* memory system.
*/
-#define LG_CHUNK_DEFAULT 22
+#define LG_CHUNK_DEFAULT 21
/* Return the chunk address for allocation address a. */
#define CHUNK_ADDR2BASE(a) \
@@ -19,6 +19,16 @@
#define CHUNK_CEILING(s) \
(((s) + chunksize_mask) & ~chunksize_mask)
+#define CHUNK_HOOKS_INITIALIZER { \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL \
+}
+
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
@@ -30,23 +40,36 @@
extern size_t opt_lg_chunk;
extern const char *opt_dss;
-/* Protects stats_chunks; currently not used for any other purpose. */
-extern malloc_mutex_t chunks_mtx;
-/* Chunk statistics. */
-extern chunk_stats_t stats_chunks;
-
-extern rtree_t *chunks_rtree;
+extern rtree_t chunks_rtree;
extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
extern size_t chunk_npages;
-extern size_t map_bias; /* Number of arena chunk header pages. */
-extern size_t arena_maxclass; /* Max size class for arenas. */
-void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
- dss_prec_t dss_prec);
-void chunk_unmap(void *chunk, size_t size);
-void chunk_dealloc(void *chunk, size_t size, bool unmap);
+extern const chunk_hooks_t chunk_hooks_default;
+
+chunk_hooks_t chunk_hooks_get(arena_t *arena);
+chunk_hooks_t chunk_hooks_set(arena_t *arena,
+ const chunk_hooks_t *chunk_hooks);
+
+bool chunk_register(const void *chunk, const extent_node_t *node);
+void chunk_deregister(const void *chunk, const extent_node_t *node);
+void *chunk_alloc_base(size_t size);
+void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *new_addr, size_t size, size_t alignment, bool *zero,
+ bool dalloc_node);
+void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
+void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool committed);
+void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool zeroed, bool committed);
+void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool committed);
+bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset,
+ size_t length);
+bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, size_t offset, size_t length);
bool chunk_boot(void);
void chunk_prefork(void);
void chunk_postfork_parent(void);
@@ -56,6 +79,19 @@ void chunk_postfork_child(void);
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+extent_node_t *chunk_lookup(const void *chunk, bool dependent);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
+JEMALLOC_INLINE extent_node_t *
+chunk_lookup(const void *ptr, bool dependent)
+{
+
+ return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent));
+}
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
index 4535ce09c..388f46be0 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
@@ -23,7 +23,8 @@ extern const char *dss_prec_names[];
dss_prec_t chunk_dss_prec_get(void);
bool chunk_dss_prec_set(dss_prec_t dss_prec);
-void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
+void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit);
bool chunk_in_dss(void *chunk);
bool chunk_dss_boot(void);
void chunk_dss_prefork(void);
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
index f24abac75..7d8014c58 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
@@ -9,10 +9,9 @@
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-bool pages_purge(void *addr, size_t length);
-
-void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
-bool chunk_dealloc_mmap(void *chunk, size_t size);
+void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero,
+ bool *commit);
+bool chunk_dalloc_mmap(void *chunk, size_t size);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h
index 58712a6a7..75c1c979f 100644
--- a/deps/jemalloc/include/jemalloc/internal/ckh.h
+++ b/deps/jemalloc/include/jemalloc/internal/ckh.h
@@ -66,13 +66,13 @@ struct ckh_s {
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
ckh_keycomp_t *keycomp);
-void ckh_delete(ckh_t *ckh);
+void ckh_delete(tsd_t *tsd, ckh_t *ckh);
size_t ckh_count(ckh_t *ckh);
bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool ckh_insert(ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
void **data);
bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
void ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h
index 0ffecc5f2..751c14b5b 100644
--- a/deps/jemalloc/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc/include/jemalloc/internal/ctl.h
@@ -34,6 +34,7 @@ struct ctl_arena_stats_s {
bool initialized;
unsigned nthreads;
const char *dss;
+ ssize_t lg_dirty_mult;
size_t pactive;
size_t pdirty;
arena_stats_t astats;
@@ -46,22 +47,15 @@ struct ctl_arena_stats_s {
malloc_bin_stats_t bstats[NBINS];
malloc_large_stats_t *lstats; /* nlclasses elements. */
+ malloc_huge_stats_t *hstats; /* nhclasses elements. */
};
struct ctl_stats_s {
size_t allocated;
size_t active;
+ size_t metadata;
+ size_t resident;
size_t mapped;
- struct {
- size_t current; /* stats_chunks.curchunks */
- uint64_t total; /* stats_chunks.nchunks */
- size_t high; /* stats_chunks.highchunks */
- } chunks;
- struct {
- size_t allocated; /* huge_allocated */
- uint64_t nmalloc; /* huge_nmalloc */
- uint64_t ndalloc; /* huge_ndalloc */
- } huge;
unsigned narenas;
ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */
};
diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h
index ba95ca816..386d50ef4 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent.h
@@ -7,25 +7,53 @@ typedef struct extent_node_s extent_node_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
-/* Tree of extents. */
+/* Tree of extents. Use accessor functions for en_* fields. */
struct extent_node_s {
- /* Linkage for the size/address-ordered tree. */
- rb_node(extent_node_t) link_szad;
+ /* Arena from which this extent came, if any. */
+ arena_t *en_arena;
- /* Linkage for the address-ordered tree. */
- rb_node(extent_node_t) link_ad;
+ /* Pointer to the extent that this tree node is responsible for. */
+ void *en_addr;
+
+ /* Total region size. */
+ size_t en_size;
+
+ /*
+ * The zeroed flag is used by chunk recycling code to track whether
+ * memory is zero-filled.
+ */
+ bool en_zeroed;
+
+ /*
+ * True if physical memory is committed to the extent, whether
+ * explicitly or implicitly as on a system that overcommits and
+ * satisfies physical memory needs on demand via soft page faults.
+ */
+ bool en_committed;
+
+ /*
+ * The achunk flag is used to validate that huge allocation lookups
+ * don't return arena chunks.
+ */
+ bool en_achunk;
/* Profile counters, used for huge objects. */
- prof_ctx_t *prof_ctx;
+ prof_tctx_t *en_prof_tctx;
- /* Pointer to the extent that this tree node is responsible for. */
- void *addr;
+ /* Linkage for arena's runs_dirty and chunks_cache rings. */
+ arena_runs_dirty_link_t rd;
+ qr(extent_node_t) cc_link;
- /* Total region size. */
- size_t size;
+ union {
+ /* Linkage for the size/address-ordered tree. */
+ rb_node(extent_node_t) szad_link;
+
+ /* Linkage for arena's huge and node_cache lists. */
+ ql_elm(extent_node_t) ql_link;
+ };
- /* True if zero-filled; used by chunk recycling code. */
- bool zeroed;
+ /* Linkage for the address-ordered tree. */
+ rb_node(extent_node_t) ad_link;
};
typedef rb_tree(extent_node_t) extent_tree_t;
@@ -41,6 +69,171 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+arena_t *extent_node_arena_get(const extent_node_t *node);
+void *extent_node_addr_get(const extent_node_t *node);
+size_t extent_node_size_get(const extent_node_t *node);
+bool extent_node_zeroed_get(const extent_node_t *node);
+bool extent_node_committed_get(const extent_node_t *node);
+bool extent_node_achunk_get(const extent_node_t *node);
+prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node);
+void extent_node_arena_set(extent_node_t *node, arena_t *arena);
+void extent_node_addr_set(extent_node_t *node, void *addr);
+void extent_node_size_set(extent_node_t *node, size_t size);
+void extent_node_zeroed_set(extent_node_t *node, bool zeroed);
+void extent_node_committed_set(extent_node_t *node, bool committed);
+void extent_node_achunk_set(extent_node_t *node, bool achunk);
+void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx);
+void extent_node_init(extent_node_t *node, arena_t *arena, void *addr,
+ size_t size, bool zeroed, bool committed);
+void extent_node_dirty_linkage_init(extent_node_t *node);
+void extent_node_dirty_insert(extent_node_t *node,
+ arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty);
+void extent_node_dirty_remove(extent_node_t *node);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
+JEMALLOC_INLINE arena_t *
+extent_node_arena_get(const extent_node_t *node)
+{
+
+ return (node->en_arena);
+}
+
+JEMALLOC_INLINE void *
+extent_node_addr_get(const extent_node_t *node)
+{
+
+ return (node->en_addr);
+}
+
+JEMALLOC_INLINE size_t
+extent_node_size_get(const extent_node_t *node)
+{
+
+ return (node->en_size);
+}
+
+JEMALLOC_INLINE bool
+extent_node_zeroed_get(const extent_node_t *node)
+{
+
+ return (node->en_zeroed);
+}
+
+JEMALLOC_INLINE bool
+extent_node_committed_get(const extent_node_t *node)
+{
+
+ assert(!node->en_achunk);
+ return (node->en_committed);
+}
+
+JEMALLOC_INLINE bool
+extent_node_achunk_get(const extent_node_t *node)
+{
+
+ return (node->en_achunk);
+}
+
+JEMALLOC_INLINE prof_tctx_t *
+extent_node_prof_tctx_get(const extent_node_t *node)
+{
+
+ return (node->en_prof_tctx);
+}
+
+JEMALLOC_INLINE void
+extent_node_arena_set(extent_node_t *node, arena_t *arena)
+{
+
+ node->en_arena = arena;
+}
+
+JEMALLOC_INLINE void
+extent_node_addr_set(extent_node_t *node, void *addr)
+{
+
+ node->en_addr = addr;
+}
+
+JEMALLOC_INLINE void
+extent_node_size_set(extent_node_t *node, size_t size)
+{
+
+ node->en_size = size;
+}
+
+JEMALLOC_INLINE void
+extent_node_zeroed_set(extent_node_t *node, bool zeroed)
+{
+
+ node->en_zeroed = zeroed;
+}
+
+JEMALLOC_INLINE void
+extent_node_committed_set(extent_node_t *node, bool committed)
+{
+
+ node->en_committed = committed;
+}
+
+JEMALLOC_INLINE void
+extent_node_achunk_set(extent_node_t *node, bool achunk)
+{
+
+ node->en_achunk = achunk;
+}
+
+JEMALLOC_INLINE void
+extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx)
+{
+
+ node->en_prof_tctx = tctx;
+}
+
+JEMALLOC_INLINE void
+extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size,
+ bool zeroed, bool committed)
+{
+
+ extent_node_arena_set(node, arena);
+ extent_node_addr_set(node, addr);
+ extent_node_size_set(node, size);
+ extent_node_zeroed_set(node, zeroed);
+ extent_node_committed_set(node, committed);
+ extent_node_achunk_set(node, false);
+ if (config_prof)
+ extent_node_prof_tctx_set(node, NULL);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_linkage_init(extent_node_t *node)
+{
+
+ qr_new(&node->rd, rd_link);
+ qr_new(node, cc_link);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_insert(extent_node_t *node,
+ arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty)
+{
+
+ qr_meld(runs_dirty, &node->rd, rd_link);
+ qr_meld(chunks_dirty, node, cc_link);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_remove(extent_node_t *node)
+{
+
+ qr_remove(&node->rd, rd_link);
+ qr_remove(node, cc_link);
+}
+
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h
index c7183ede8..bcead337a 100644
--- a/deps/jemalloc/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc/include/jemalloc/internal/hash.h
@@ -35,13 +35,14 @@ JEMALLOC_INLINE uint32_t
hash_rotl_32(uint32_t x, int8_t r)
{
- return (x << r) | (x >> (32 - r));
+ return ((x << r) | (x >> (32 - r)));
}
JEMALLOC_INLINE uint64_t
hash_rotl_64(uint64_t x, int8_t r)
{
- return (x << r) | (x >> (64 - r));
+
+ return ((x << r) | (x >> (64 - r)));
}
JEMALLOC_INLINE uint32_t
@@ -76,9 +77,9 @@ hash_fmix_64(uint64_t k)
{
k ^= k >> 33;
- k *= QU(0xff51afd7ed558ccdLLU);
+ k *= KQU(0xff51afd7ed558ccd);
k ^= k >> 33;
- k *= QU(0xc4ceb9fe1a85ec53LLU);
+ k *= KQU(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return (k);
@@ -247,8 +248,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t h1 = seed;
uint64_t h2 = seed;
- const uint64_t c1 = QU(0x87c37b91114253d5LLU);
- const uint64_t c2 = QU(0x4cf5ad432745937fLLU);
+ const uint64_t c1 = KQU(0x87c37b91114253d5);
+ const uint64_t c2 = KQU(0x4cf5ad432745937f);
/* body */
{
diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h
index a2b9c7791..ece7af980 100644
--- a/deps/jemalloc/include/jemalloc/internal/huge.h
+++ b/deps/jemalloc/include/jemalloc/internal/huge.h
@@ -9,34 +9,24 @@
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-/* Huge allocation statistics. */
-extern uint64_t huge_nmalloc;
-extern uint64_t huge_ndalloc;
-extern size_t huge_allocated;
-
-/* Protects chunk-related data structures. */
-extern malloc_mutex_t huge_mtx;
-
-void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec);
-void *huge_palloc(size_t size, size_t alignment, bool zero,
- dss_prec_t dss_prec);
-bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
- size_t extra);
-void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec);
+void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache);
+void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache);
+bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+ size_t usize_max, bool zero);
+void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+ size_t usize, size_t alignment, bool zero, tcache_t *tcache);
#ifdef JEMALLOC_JET
typedef void (huge_dalloc_junk_t)(void *, size_t);
extern huge_dalloc_junk_t *huge_dalloc_junk;
#endif
-void huge_dalloc(void *ptr, bool unmap);
+void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+arena_t *huge_aalloc(const void *ptr);
size_t huge_salloc(const void *ptr);
-dss_prec_t huge_dss_prec_get(arena_t *arena);
-prof_ctx_t *huge_prof_ctx_get(const void *ptr);
-void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-bool huge_boot(void);
-void huge_prefork(void);
-void huge_postfork_parent(void);
-void huge_postfork_child(void);
+prof_tctx_t *huge_prof_tctx_get(const void *ptr);
+void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void huge_prof_tctx_reset(const void *ptr);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 574bbb141..8536a3eda 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1,70 +1,13 @@
#ifndef JEMALLOC_INTERNAL_H
#define JEMALLOC_INTERNAL_H
-#include <math.h>
-#ifdef _WIN32
-# include <windows.h>
-# define ENOENT ERROR_PATH_NOT_FOUND
-# define EINVAL ERROR_BAD_ARGUMENTS
-# define EAGAIN ERROR_OUTOFMEMORY
-# define EPERM ERROR_WRITE_FAULT
-# define EFAULT ERROR_INVALID_ADDRESS
-# define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-# undef ERANGE
-# define ERANGE ERROR_INVALID_DATA
-#else
-# include <sys/param.h>
-# include <sys/mman.h>
-# include <sys/syscall.h>
-# if !defined(SYS_write) && defined(__NR_write)
-# define SYS_write __NR_write
-# endif
-# include <sys/uio.h>
-# include <pthread.h>
-# include <errno.h>
-#endif
-#include <sys/types.h>
-
-#include <limits.h>
-#ifndef SIZE_T_MAX
-# define SIZE_T_MAX SIZE_MAX
-#endif
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#ifndef offsetof
-# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
-#endif
-#include <inttypes.h>
-#include <string.h>
-#include <strings.h>
-#include <ctype.h>
-#ifdef _MSC_VER
-# include <io.h>
-typedef intptr_t ssize_t;
-# define PATH_MAX 1024
-# define STDERR_FILENO 2
-# define __func__ __FUNCTION__
-/* Disable warnings about deprecated system functions */
-# pragma warning(disable: 4996)
-#else
-# include <unistd.h>
-#endif
-#include <fcntl.h>
#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
#ifdef JEMALLOC_UTRACE
#include <sys/ktrace.h>
#endif
-#ifdef JEMALLOC_VALGRIND
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-#endif
-
#define JEMALLOC_NO_DEMANGLE
#ifdef JEMALLOC_JET
# define JEMALLOC_N(n) jet_##n
@@ -85,7 +28,7 @@ static const bool config_debug =
false
#endif
;
-static const bool config_dss =
+static const bool have_dss =
#ifdef JEMALLOC_DSS
true
#else
@@ -127,8 +70,8 @@ static const bool config_prof_libunwind =
false
#endif
;
-static const bool config_mremap =
-#ifdef JEMALLOC_MREMAP
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
true
#else
false
@@ -190,6 +133,17 @@ static const bool config_ivsalloc =
false
#endif
;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ true
+#else
+ false
+#endif
+ ;
+
+#ifdef JEMALLOC_C11ATOMICS
+#include <stdatomic.h>
+#endif
#ifdef JEMALLOC_ATOMIC9
#include <machine/atomic.h>
@@ -229,15 +183,43 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/jemalloc_internal_macros.h"
+/* Size class index type. */
+typedef unsigned szind_t;
+
+/*
+ * Flags bits:
+ *
+ * a: arena
+ * t: tcache
+ * 0: unused
+ * z: zero
+ * n: alignment
+ *
+ * aaaaaaaa aaaatttt tttttttt 0znnnnnn
+ */
+#define MALLOCX_ARENA_MASK ((int)~0xfffff)
+#define MALLOCX_ARENA_MAX 0xffe
+#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU)
+#define MALLOCX_TCACHE_MAX 0xffd
#define MALLOCX_LG_ALIGN_MASK ((int)0x3f)
-#define ALLOCM_LG_ALIGN_MASK ((int)0x3f)
+/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \
+ (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags) \
+ (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
+#define MALLOCX_ZERO_GET(flags) \
+ ((bool)(flags & MALLOCX_ZERO))
+
+#define MALLOCX_TCACHE_GET(flags) \
+ (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2)
+#define MALLOCX_ARENA_GET(flags) \
+ (((unsigned)(((unsigned)flags) >> 20)) - 1)
/* Smallest size class to support. */
-#define LG_TINY_MIN 3
#define TINY_MIN (1U << LG_TINY_MIN)
/*
- * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
#ifndef LG_QUANTUM
@@ -250,7 +232,7 @@ static const bool config_ivsalloc =
# ifdef __alpha__
# define LG_QUANTUM 4
# endif
-# ifdef __sparc64__
+# if (defined(__sparc64__) || defined(__sparcv9))
# define LG_QUANTUM 4
# endif
# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
@@ -268,6 +250,9 @@ static const bool config_ivsalloc =
# ifdef __mips__
# define LG_QUANTUM 3
# endif
+# ifdef __or1k__
+# define LG_QUANTUM 3
+# endif
# ifdef __powerpc__
# define LG_QUANTUM 4
# endif
@@ -280,8 +265,12 @@ static const bool config_ivsalloc =
# ifdef __tile__
# define LG_QUANTUM 4
# endif
+# ifdef __le32__
+# define LG_QUANTUM 4
+# endif
# ifndef LG_QUANTUM
-# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS"
+# error "Unknown minimum alignment for architecture; specify via "
+ "--with-lg-quantum"
# endif
#endif
@@ -321,12 +310,11 @@ static const bool config_ivsalloc =
#define CACHELINE_CEILING(s) \
(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
-/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */
+/* Page size. LG_PAGE is determined by the configure script. */
#ifdef PAGE_MASK
# undef PAGE_MASK
#endif
-#define LG_PAGE STATIC_PAGE_SHIFT
-#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define PAGE ((size_t)(1U << LG_PAGE))
#define PAGE_MASK ((size_t)(PAGE - 1))
/* Return the smallest pagesize multiple that is >= s. */
@@ -345,7 +333,7 @@ static const bool config_ivsalloc =
#define ALIGNMENT_CEILING(s, alignment) \
(((s) + (alignment - 1)) & (-(alignment)))
-/* Declare a variable length array */
+/* Declare a variable-length array. */
#if __STDC_VERSION__ < 199901L
# ifdef _MSC_VER
# include <malloc.h>
@@ -358,86 +346,12 @@ static const bool config_ivsalloc =
# endif
# endif
# define VARIABLE_ARRAY(type, name, count) \
- type *name = alloca(sizeof(type) * count)
-#else
-# define VARIABLE_ARRAY(type, name, count) type name[count]
-#endif
-
-#ifdef JEMALLOC_VALGRIND
-/*
- * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
- * so that when Valgrind reports errors, there are no extra stack frames
- * in the backtraces.
- *
- * The size that is reported to valgrind must be consistent through a chain of
- * malloc..realloc..realloc calls. Request size isn't recorded anywhere in
- * jemalloc, so it is critical that all callers of these macros provide usize
- * rather than request size. As a result, buffer overflow detection is
- * technically weakened for the standard API, though it is generally accepted
- * practice to consider any extra bytes reported by malloc_usable_size() as
- * usable space.
- */
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
- if (config_valgrind && opt_valgrind && cond) \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
-} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do { \
- if (config_valgrind && opt_valgrind) { \
- size_t rzsize = p2rz(ptr); \
- \
- if (ptr == old_ptr) { \
- VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
- usize, rzsize); \
- if (zero && old_usize < usize) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- old_usize), usize - old_usize); \
- } \
- } else { \
- if (old_ptr != NULL) { \
- VALGRIND_FREELIKE_BLOCK(old_ptr, \
- old_rzsize); \
- } \
- if (ptr != NULL) { \
- size_t copy_size = (old_usize < usize) \
- ? old_usize : usize; \
- size_t tail_size = usize - copy_size; \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
- rzsize, false); \
- if (copy_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED(ptr, \
- copy_size); \
- } \
- if (zero && tail_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- copy_size), tail_size); \
- } \
- } \
- } \
- } \
-} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
- if (config_valgrind && opt_valgrind) \
- VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \
-} while (0)
+ type *name = alloca(sizeof(type) * (count))
#else
-#define RUNNING_ON_VALGRIND ((unsigned)0)
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
- do {} while (0)
-#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \
- do {} while (0)
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0)
-#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
+# define VARIABLE_ARRAY(type, name, count) type name[(count)]
#endif
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -452,9 +366,10 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
@@ -464,6 +379,7 @@ static const bool config_ivsalloc =
/******************************************************************************/
#define JEMALLOC_H_STRUCTS
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -472,68 +388,83 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
+#define JEMALLOC_ARENA_STRUCTS_A
+#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_STRUCTS_A
#include "jemalloc/internal/extent.h"
+#define JEMALLOC_ARENA_STRUCTS_B
#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_STRUCTS_B
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
#include "jemalloc/internal/prof.h"
-typedef struct {
- uint64_t allocated;
- uint64_t deallocated;
-} thread_allocated_t;
-/*
- * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro
- * argument.
- */
-#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0})
+#include "jemalloc/internal/tsd.h"
#undef JEMALLOC_H_STRUCTS
/******************************************************************************/
#define JEMALLOC_H_EXTERNS
extern bool opt_abort;
-extern bool opt_junk;
+extern const char *opt_junk;
+extern bool opt_junk_alloc;
+extern bool opt_junk_free;
extern size_t opt_quarantine;
extern bool opt_redzone;
extern bool opt_utrace;
-extern bool opt_valgrind;
extern bool opt_xmalloc;
extern bool opt_zero;
extern size_t opt_narenas;
+extern bool in_valgrind;
+
/* Number of CPUs. */
extern unsigned ncpus;
-/* Protects arenas initialization (arenas, arenas_total). */
-extern malloc_mutex_t arenas_lock;
/*
- * Arenas that are used to service external requests. Not all elements of the
- * arenas array are necessarily used; arenas are created lazily as needed.
- *
- * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
- * arenas. arenas[narenas_auto..narenas_total) are only used if the application
- * takes some action to create them and allocate from them.
+ * index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by index2size_compute().
*/
-extern arena_t **arenas;
-extern unsigned narenas_total;
-extern unsigned narenas_auto; /* Read-only after initialization. */
-
+extern size_t const index2size_tab[NSIZES];
+/*
+ * size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes. In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via size2index().
+ */
+extern uint8_t const size2index_tab[];
+
+arena_t *a0get(void);
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
arena_t *arenas_extend(unsigned ind);
-void arenas_cleanup(void *arg);
-arena_t *choose_arena_hard(void);
+arena_t *arena_init(unsigned ind);
+unsigned narenas_total_get(void);
+arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_t *arena_choose_hard(tsd_t *tsd);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+unsigned arena_nbound(unsigned ind);
+void thread_allocated_cleanup(tsd_t *tsd);
+void thread_deallocated_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_cache_cleanup(tsd_t *tsd);
+void narenas_cache_cleanup(tsd_t *tsd);
+void arenas_cache_bypass_cleanup(tsd_t *tsd);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -542,24 +473,26 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
#include "jemalloc/internal/prof.h"
+#include "jemalloc/internal/tsd.h"
#undef JEMALLOC_H_EXTERNS
/******************************************************************************/
#define JEMALLOC_H_INLINES
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -572,26 +505,158 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *)
-
+szind_t size2index_compute(size_t size);
+szind_t size2index_lookup(size_t size);
+szind_t size2index(size_t size);
+size_t index2size_compute(szind_t index);
+size_t index2size_lookup(szind_t index);
+size_t index2size(szind_t index);
+size_t s2u_compute(size_t size);
+size_t s2u_lookup(size_t size);
size_t s2u(size_t size);
size_t sa2u(size_t size, size_t alignment);
-unsigned narenas_total_get(void);
-arena_t *choose_arena(arena_t *arena);
+arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/*
- * Map of pthread_self() --> arenas[???], used for selecting an arena to use
- * for allocations.
- */
-malloc_tsd_externs(arenas, arena_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL,
- arenas_cleanup)
+JEMALLOC_INLINE szind_t
+size2index_compute(size_t size)
+{
+
+#if (NTBINS != 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ size_t lg_ceil = lg_floor(pow2_ceil(size));
+ return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+ }
+#endif
+ {
+ size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+ (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
+ : lg_floor((size<<1)-1);
+ size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+ x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+ size_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+ size_t delta_inverse_mask = ZI(-1) << lg_delta;
+ size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+ ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+ size_t index = NTBINS + grp + mod;
+ return (index);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index_lookup(size_t size)
+{
+
+ assert(size <= LOOKUP_MAXCLASS);
+ {
+ size_t ret = ((size_t)(size2index_tab[(size-1) >>
+ LG_TINY_MIN]));
+ assert(ret == size2index_compute(size));
+ return (ret);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index(size_t size)
+{
+
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS))
+ return (size2index_lookup(size));
+ return (size2index_compute(size));
+}
+
+JEMALLOC_INLINE size_t
+index2size_compute(szind_t index)
+{
+
+#if (NTBINS > 0)
+ if (index < NTBINS)
+ return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+#endif
+ {
+ size_t reduced_index = index - NTBINS;
+ size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+ size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+ 1);
+
+ size_t grp_size_mask = ~((!!grp)-1);
+ size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+ (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+ size_t shift = (grp == 0) ? 1 : grp;
+ size_t lg_delta = shift + (LG_QUANTUM-1);
+ size_t mod_size = (mod+1) << lg_delta;
+
+ size_t usize = grp_size + mod_size;
+ return (usize);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size_lookup(szind_t index)
+{
+ size_t ret = (size_t)index2size_tab[index];
+ assert(ret == index2size_compute(index));
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size(szind_t index)
+{
+
+ assert(index < NSIZES);
+ return (index2size_lookup(index));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_compute(size_t size)
+{
+
+#if (NTBINS > 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ size_t lg_ceil = lg_floor(pow2_ceil(size));
+ return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+ (ZU(1) << lg_ceil));
+ }
+#endif
+ {
+ size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+ (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
+ : lg_floor((size<<1)-1);
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+ size_t delta = ZU(1) << lg_delta;
+ size_t delta_mask = delta - 1;
+ size_t usize = (size + delta_mask) & ~delta_mask;
+ return (usize);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_lookup(size_t size)
+{
+ size_t ret = index2size_lookup(size2index_lookup(size));
+
+ assert(ret == s2u_compute(size));
+ return (ret);
+}
/*
* Compute usable size that would result from allocating an object with the
@@ -601,11 +666,10 @@ JEMALLOC_ALWAYS_INLINE size_t
s2u(size_t size)
{
- if (size <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
- if (size <= arena_maxclass)
- return (PAGE_CEILING(size));
- return (CHUNK_CEILING(size));
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS))
+ return (s2u_lookup(size));
+ return (s2u_compute(size));
}
/*
@@ -619,108 +683,128 @@ sa2u(size_t size, size_t alignment)
assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
- /*
- * Round size up to the nearest multiple of alignment.
- *
- * This done, we can take advantage of the fact that for each small
- * size class, every object is aligned at the smallest power of two
- * that is non-zero in the base two representation of the size. For
- * example:
- *
- * Size | Base 2 | Minimum alignment
- * -----+----------+------------------
- * 96 | 1100000 | 32
- * 144 | 10100000 | 32
- * 192 | 11000000 | 64
- */
- usize = ALIGNMENT_CEILING(size, alignment);
- /*
- * (usize < size) protects against the combination of maximal
- * alignment and size greater than maximal alignment.
- */
- if (usize < size) {
- /* size_t overflow. */
- return (0);
+ /* Try for a small size class. */
+ if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each
+ * small size class, every object is aligned at the smallest
+ * power of two that is non-zero in the base two representation
+ * of the size. For example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ */
+ usize = s2u(ALIGNMENT_CEILING(size, alignment));
+ if (usize < LARGE_MINCLASS)
+ return (usize);
}
- if (usize <= arena_maxclass && alignment <= PAGE) {
- if (usize <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
- return (PAGE_CEILING(usize));
- } else {
- size_t run_size;
-
+ /* Try for a large size class. */
+ if (likely(size <= large_maxclass) && likely(alignment < chunksize)) {
/*
* We can't achieve subpage alignment, so round up alignment
- * permanently; it makes later calculations simpler.
+ * to the minimum that can actually be supported.
*/
alignment = PAGE_CEILING(alignment);
- usize = PAGE_CEILING(size);
- /*
- * (usize < size) protects against very large sizes within
- * PAGE of SIZE_T_MAX.
- *
- * (usize + alignment < usize) protects against the
- * combination of maximal alignment and usize large enough
- * to cause overflow. This is similar to the first overflow
- * check above, but it needs to be repeated due to the new
- * usize value, which may now be *equal* to maximal
- * alignment, whereas before we only detected overflow if the
- * original size was *greater* than maximal alignment.
- */
- if (usize < size || usize + alignment < usize) {
- /* size_t overflow. */
- return (0);
- }
+
+ /* Make sure result is a large size class. */
+ usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size);
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
- * If the run wouldn't fit within a chunk, round up to a huge
- * allocation size.
*/
- run_size = usize + alignment - PAGE;
- if (run_size <= arena_maxclass)
- return (PAGE_CEILING(usize));
- return (CHUNK_CEILING(usize));
+ if (usize + large_pad + alignment - PAGE <= arena_maxrun)
+ return (usize);
}
-}
-JEMALLOC_INLINE unsigned
-narenas_total_get(void)
-{
- unsigned narenas;
+ /* Huge size class. Beware of size_t overflow. */
+
+ /*
+ * We can't achieve subchunk alignment, so round up alignment to the
+ * minimum that can actually be supported.
+ */
+ alignment = CHUNK_CEILING(alignment);
+ if (alignment == 0) {
+ /* size_t overflow. */
+ return (0);
+ }
- malloc_mutex_lock(&arenas_lock);
- narenas = narenas_total;
- malloc_mutex_unlock(&arenas_lock);
+ /* Make sure result is a huge size class. */
+ if (size <= chunksize)
+ usize = chunksize;
+ else {
+ usize = s2u(size);
+ if (usize < size) {
+ /* size_t overflow. */
+ return (0);
+ }
+ }
- return (narenas);
+ /*
+ * Calculate the multi-chunk mapping that huge_palloc() would need in
+ * order to guarantee the alignment.
+ */
+ if (usize + alignment - PAGE < usize) {
+ /* size_t overflow. */
+ return (0);
+ }
+ return (usize);
}
/* Choose an arena based on a per-thread value. */
JEMALLOC_INLINE arena_t *
-choose_arena(arena_t *arena)
+arena_choose(tsd_t *tsd, arena_t *arena)
{
arena_t *ret;
if (arena != NULL)
return (arena);
- if ((ret = *arenas_tsd_get()) == NULL) {
- ret = choose_arena_hard();
- assert(ret != NULL);
- }
+ if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
+ ret = arena_choose_hard(tsd);
return (ret);
}
+
+JEMALLOC_INLINE arena_t *
+arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing)
+{
+ arena_t *arena;
+ arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+
+ /* init_if_missing requires refresh_if_missing. */
+ assert(!init_if_missing || refresh_if_missing);
+
+ if (unlikely(arenas_cache == NULL)) {
+ /* arenas_cache hasn't been initialized yet. */
+ return (arena_get_hard(tsd, ind, init_if_missing));
+ }
+ if (unlikely(ind >= tsd_narenas_cache_get(tsd))) {
+ /*
+ * ind is invalid, cache is old (too small), or arena to be
+ * initialized.
+ */
+ return (refresh_if_missing ? arena_get_hard(tsd, ind,
+ init_if_missing) : NULL);
+ }
+ arena = arenas_cache[ind];
+ if (likely(arena != NULL) || !refresh_if_missing)
+ return (arena);
+ return (arena_get_hard(tsd, ind, init_if_missing));
+}
#endif
#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/rtree.h"
/*
- * Include arena.h twice in order to resolve circular dependencies with
- * tcache.h.
+ * Include portions of arena.h interleaved with tcache.h in order to resolve
+ * circular dependencies.
*/
#define JEMALLOC_ARENA_INLINE_A
#include "jemalloc/internal/arena.h"
@@ -733,133 +817,155 @@ choose_arena(arena_t *arena)
#include "jemalloc/internal/quarantine.h"
#ifndef JEMALLOC_ENABLE_INLINE
-void *imalloct(size_t size, bool try_tcache, arena_t *arena);
-void *imalloc(size_t size);
-void *icalloct(size_t size, bool try_tcache, arena_t *arena);
-void *icalloc(size_t size);
-void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena);
-void *ipalloc(size_t usize, size_t alignment, bool zero);
+arena_t *iaalloc(const void *ptr);
size_t isalloc(const void *ptr, bool demote);
+void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache,
+ bool is_metadata, arena_t *arena);
+void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
+void *imalloc(tsd_t *tsd, size_t size);
+void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
+void *icalloc(tsd_t *tsd, size_t size);
+void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, bool is_metadata, arena_t *arena);
+void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena);
+void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
size_t ivsalloc(const void *ptr, bool demote);
size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
-void idalloct(void *ptr, bool try_tcache);
-void idalloc(void *ptr);
-void iqalloct(void *ptr, bool try_tcache);
-void iqalloc(void *ptr);
-void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
+void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata);
+void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void idalloc(tsd_t *tsd, void *ptr);
+void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t extra, size_t alignment, bool zero, tcache_t *tcache,
arena_t *arena);
-void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena);
-void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t)
+void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
+void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t alignment, bool zero);
+bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE arena_t *
+iaalloc(const void *ptr)
+{
+
+ assert(ptr != NULL);
+
+ return (arena_aalloc(ptr));
+}
+
+/*
+ * Typical usage:
+ * void *ptr = [...]
+ * size_t sz = isalloc(ptr, config_prof);
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+isalloc(const void *ptr, bool demote)
+{
+
+ assert(ptr != NULL);
+ /* Demotion only makes sense if config_prof is true. */
+ assert(config_prof || !demote);
+
+ return (arena_salloc(ptr, demote));
+}
+
JEMALLOC_ALWAYS_INLINE void *
-imalloct(size_t size, bool try_tcache, arena_t *arena)
+iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata,
+ arena_t *arena)
{
+ void *ret;
assert(size != 0);
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, false, try_tcache));
- else
- return (huge_malloc(size, false, huge_dss_prec_get(arena)));
+ ret = arena_malloc(tsd, arena, size, zero, tcache);
+ if (config_stats && is_metadata && likely(ret != NULL)) {
+ arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+ config_prof));
+ }
+ return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-imalloc(size_t size)
+imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
{
- return (imalloct(size, true, NULL));
+ return (iallocztm(tsd, size, false, tcache, false, arena));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloct(size_t size, bool try_tcache, arena_t *arena)
+imalloc(tsd_t *tsd, size_t size)
{
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, true, try_tcache));
- else
- return (huge_malloc(size, true, huge_dss_prec_get(arena)));
+ return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloc(size_t size)
+icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
{
- return (icalloct(size, true, NULL));
+ return (iallocztm(tsd, size, true, tcache, false, arena));
}
JEMALLOC_ALWAYS_INLINE void *
-ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena)
+icalloc(tsd_t *tsd, size_t size)
+{
+
+ return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL));
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, bool is_metadata, arena_t *arena)
{
void *ret;
assert(usize != 0);
assert(usize == sa2u(usize, alignment));
- if (usize <= arena_maxclass && alignment <= PAGE)
- ret = arena_malloc(arena, usize, zero, try_tcache);
- else {
- if (usize <= arena_maxclass) {
- ret = arena_palloc(choose_arena(arena), usize,
- alignment, zero);
- } else if (alignment <= chunksize)
- ret = huge_malloc(usize, zero, huge_dss_prec_get(arena));
- else
- ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena));
- }
-
+ ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+ if (config_stats && is_metadata && likely(ret != NULL)) {
+ arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+ config_prof));
+ }
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-ipalloc(size_t usize, size_t alignment, bool zero)
+ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena)
{
- return (ipalloct(usize, alignment, zero, true, NULL));
+ return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
}
-/*
- * Typical usage:
- * void *ptr = [...]
- * size_t sz = isalloc(ptr, config_prof);
- */
-JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+JEMALLOC_ALWAYS_INLINE void *
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
{
- size_t ret;
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
- /* Demotion only makes sense if config_prof is true. */
- assert(config_prof || demote == false);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- ret = arena_salloc(ptr, demote);
- else
- ret = huge_salloc(ptr);
- return (ret);
+ return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd,
+ NULL), false, NULL));
}
JEMALLOC_ALWAYS_INLINE size_t
ivsalloc(const void *ptr, bool demote)
{
+ extent_node_t *node;
/* Return 0 if ptr is not within a chunk managed by jemalloc. */
- if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
+ node = chunk_lookup(ptr, false);
+ if (node == NULL)
return (0);
+ /* Only arena chunks should be looked up via interior pointers. */
+ assert(extent_node_addr_get(node) == ptr ||
+ extent_node_achunk_get(node));
return (isalloc(ptr, demote));
}
@@ -870,7 +976,7 @@ u2rz(size_t usize)
size_t ret;
if (usize <= SMALL_MAXCLASS) {
- size_t binind = SMALL_SIZE2BIN(usize);
+ szind_t binind = size2index(usize);
ret = arena_bin_info[binind].redzone_size;
} else
ret = 0;
@@ -887,47 +993,62 @@ p2rz(const void *ptr)
}
JEMALLOC_ALWAYS_INLINE void
-idalloct(void *ptr, bool try_tcache)
+idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
{
- arena_chunk_t *chunk;
assert(ptr != NULL);
+ if (config_stats && is_metadata) {
+ arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+ config_prof));
+ }
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(chunk->arena, chunk, ptr, try_tcache);
- else
- huge_dalloc(ptr, true);
+ arena_dalloc(tsd, ptr, tcache);
}
JEMALLOC_ALWAYS_INLINE void
-idalloc(void *ptr)
+idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
- idalloct(ptr, true);
+ idalloctm(tsd, ptr, tcache, false);
}
JEMALLOC_ALWAYS_INLINE void
-iqalloct(void *ptr, bool try_tcache)
+idalloc(tsd_t *tsd, void *ptr)
{
- if (config_fill && opt_quarantine)
- quarantine(ptr);
+ idalloctm(tsd, ptr, tcache_get(tsd, false), false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+{
+
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine(tsd, ptr);
else
- idalloct(ptr, try_tcache);
+ idalloctm(tsd, ptr, tcache, false);
}
JEMALLOC_ALWAYS_INLINE void
-iqalloc(void *ptr)
+isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
{
- iqalloct(ptr, true);
+ arena_sdalloc(tsd, ptr, size, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+{
+
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine(tsd, ptr);
+ else
+ isdalloct(tsd, ptr, size, tcache);
}
JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena)
+iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
{
void *p;
size_t usize, copysize;
@@ -935,7 +1056,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
usize = sa2u(size + extra, alignment);
if (usize == 0)
return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
+ p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
if (p == NULL) {
if (extra == 0)
return (NULL);
@@ -943,7 +1064,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
usize = sa2u(size, alignment);
if (usize == 0)
return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
+ p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
if (p == NULL)
return (NULL);
}
@@ -953,72 +1074,57 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
+ isqalloc(tsd, ptr, oldsize, tcache);
return (p);
}
JEMALLOC_ALWAYS_INLINE void *
-iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
- bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena)
+iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache, arena_t *arena)
{
- size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
- oldsize = isalloc(ptr, config_prof);
-
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/*
* Existing object alignment is inadequate; allocate new space
* and copy.
*/
- return (iralloct_realign(ptr, oldsize, size, extra, alignment,
- zero, try_tcache_alloc, try_tcache_dalloc, arena));
+ return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment,
+ zero, tcache, arena));
}
- if (size + extra <= arena_maxclass) {
- return (arena_ralloc(arena, ptr, oldsize, size, extra,
- alignment, zero, try_tcache_alloc,
- try_tcache_dalloc));
- } else {
- return (huge_ralloc(ptr, oldsize, size, extra,
- alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena)));
- }
+ return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero,
+ tcache));
}
JEMALLOC_ALWAYS_INLINE void *
-iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero)
{
- return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL));
+ return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
+ tcache_get(tsd, true), NULL));
}
JEMALLOC_ALWAYS_INLINE bool
-ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
+ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment,
+ bool zero)
{
- size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
- oldsize = isalloc(ptr, config_prof);
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/* Existing object alignment is inadequate. */
return (true);
}
- if (size <= arena_maxclass)
- return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
- else
- return (huge_ralloc_no_move(ptr, oldsize, size, extra));
+ return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
}
-
-malloc_tsd_externs(thread_allocated, thread_allocated_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t,
- THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup)
#endif
#include "jemalloc/internal/prof.h"
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
new file mode 100644
index 000000000..a601d6ebb
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -0,0 +1,64 @@
+#ifndef JEMALLOC_INTERNAL_DECLS_H
+#define JEMALLOC_INTERNAL_DECLS_H
+
+#include <math.h>
+#ifdef _WIN32
+# include <windows.h>
+# include "msvc_compat/windows_extra.h"
+
+#else
+# include <sys/param.h>
+# include <sys/mman.h>
+# if !defined(__pnacl__) && !defined(__native_client__)
+# include <sys/syscall.h>
+# if !defined(SYS_write) && defined(__NR_write)
+# define SYS_write __NR_write
+# endif
+# include <sys/uio.h>
+# endif
+# include <pthread.h>
+# include <errno.h>
+#endif
+#include <sys/types.h>
+
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
+#endif
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#ifdef _MSC_VER
+# include <io.h>
+typedef intptr_t ssize_t;
+# define PATH_MAX 1024
+# define STDERR_FILENO 2
+# define __func__ __FUNCTION__
+# ifdef JEMALLOC_HAS_RESTRICT
+# define restrict __restrict
+# endif
+/* Disable warnings about deprecated system functions. */
+# pragma warning(disable: 4996)
+#if _MSC_VER < 1800
+static int
+isblank(int c)
+{
+
+ return (c == '\t' || c == ' ');
+}
+#endif
+#else
+# include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#endif /* JEMALLOC_INTERNAL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c166fbd9e..b0f8caaf8 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -22,6 +22,9 @@
*/
#undef CPU_SPINWAIT
+/* Defined if C11 atomics are available. */
+#undef JEMALLOC_C11ATOMICS
+
/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
#undef JEMALLOC_ATOMIC9
@@ -35,7 +38,7 @@
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
* __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
@@ -43,17 +46,37 @@
* Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
* __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#undef JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if madvise(2) is available.
+ */
+#undef JEMALLOC_HAVE_MADVISE
+
+/*
* Defined if OSSpin*() functions are available, as provided by Darwin, and
* documented in the spinlock(3) manual page.
*/
#undef JEMALLOC_OSSPIN
/*
+ * Defined if secure_getenv(3) is available.
+ */
+#undef JEMALLOC_HAVE_SECURE_GETENV
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#undef JEMALLOC_HAVE_ISSETUGID
+
+/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
@@ -76,9 +99,6 @@
*/
#undef JEMALLOC_MUTEX_INIT_CB
-/* Defined if sbrk() is supported. */
-#undef JEMALLOC_HAVE_SBRK
-
/* Non-empty if the tls_model attribute is supported. */
#undef JEMALLOC_TLS_MODEL
@@ -137,8 +157,26 @@
/* Support lazy locking (avoid locking unless a second thread is launched). */
#undef JEMALLOC_LAZY_LOCK
-/* One page is 2^STATIC_PAGE_SHIFT bytes. */
-#undef STATIC_PAGE_SHIFT
+/* Minimum size class to support is 2^LG_TINY_MIN bytes. */
+#undef LG_TINY_MIN
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#undef LG_QUANTUM
+
+/* One page is 2^LG_PAGE bytes. */
+#undef LG_PAGE
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#undef JEMALLOC_MAPS_COALESCE
/*
* If defined, use munmap() to unmap freed chunks, rather than storing them for
@@ -147,23 +185,29 @@
*/
#undef JEMALLOC_MUNMAP
-/*
- * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is
- * disabled by default because it is Linux-specific and it will cause virtual
- * memory map holes, much like munmap(2) does.
- */
-#undef JEMALLOC_MREMAP
-
/* TLS is used to map arenas and magazine caches to threads. */
#undef JEMALLOC_TLS
/*
+ * ffs()/ffsl() functions to use for bitmapping. Don't use these directly;
+ * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h.
+ */
+#undef JEMALLOC_INTERNAL_FFSL
+#undef JEMALLOC_INTERNAL_FFS
+
+/*
* JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
* within jemalloc-owned chunks before dereferencing them.
*/
#undef JEMALLOC_IVSALLOC
/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#undef JEMALLOC_CACHE_OBLIVIOUS
+
+/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
@@ -182,9 +226,7 @@
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_FREE
-/*
- * Define if operating system has alloca.h header.
- */
+/* Define if operating system has alloca.h header. */
#undef JEMALLOC_HAS_ALLOCA_H
/* C99 restrict keyword supported. */
@@ -202,4 +244,19 @@
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#undef LG_SIZEOF_INTMAX_T
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+#undef JEMALLOC_GLIBC_MALLOC_HOOK
+
+/* glibc memalign hook. */
+#undef JEMALLOC_GLIBC_MEMALIGN_HOOK
+
+/* Adaptive mutex support in pthreads. */
+#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+#undef JEMALLOC_EXPORT
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
index 4e2392302..a08ba772e 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -39,9 +39,15 @@
#endif
#define ZU(z) ((size_t)z)
+#define ZI(z) ((ssize_t)z)
#define QU(q) ((uint64_t)q)
#define QI(q) ((int64_t)q)
+#define KZU(z) ZU(z##ULL)
+#define KZI(z) ZI(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQI(q) QI(q##LL)
+
#ifndef __DECONST
# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h
index de44e1435..f051f2917 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex.h
@@ -10,7 +10,7 @@ typedef struct malloc_mutex_s malloc_mutex_t;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
#else
-# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \
+# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \
defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
@@ -26,7 +26,11 @@ typedef struct malloc_mutex_s malloc_mutex_t;
struct malloc_mutex_s {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ SRWLOCK lock;
+# else
CRITICAL_SECTION lock;
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLock lock;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -70,7 +74,11 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
if (isthreaded) {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ AcquireSRWLockExclusive(&mutex->lock);
+# else
EnterCriticalSection(&mutex->lock);
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockLock(&mutex->lock);
#else
@@ -85,7 +93,11 @@ malloc_mutex_unlock(malloc_mutex_t *mutex)
if (isthreaded) {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ ReleaseSRWLockExclusive(&mutex->lock);
+# else
LeaveCriticalSection(&mutex->lock);
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockUnlock(&mutex->lock);
#else
diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h
new file mode 100644
index 000000000..da7eb9686
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/pages.h
@@ -0,0 +1,26 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void *pages_map(void *addr, size_t size);
+void pages_unmap(void *addr, size_t size);
+void *pages_trim(void *addr, size_t alloc_size, size_t leadsize,
+ size_t size);
+bool pages_commit(void *addr, size_t size);
+bool pages_decommit(void *addr, size_t size);
+bool pages_purge(void *addr, size_t size);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
index 93516d242..a90021aa6 100644
--- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
+++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
@@ -1,44 +1,76 @@
-a0calloc
-a0free
+a0dalloc
+a0get
a0malloc
+arena_aalloc
arena_alloc_junk_small
arena_bin_index
arena_bin_info
+arena_bitselm_get
arena_boot
+arena_choose
+arena_choose_hard
+arena_chunk_alloc_huge
+arena_chunk_cache_maybe_insert
+arena_chunk_cache_maybe_remove
+arena_chunk_dalloc_huge
+arena_chunk_ralloc_huge_expand
+arena_chunk_ralloc_huge_shrink
+arena_chunk_ralloc_huge_similar
+arena_cleanup
arena_dalloc
arena_dalloc_bin
-arena_dalloc_bin_locked
+arena_dalloc_bin_junked_locked
arena_dalloc_junk_large
arena_dalloc_junk_small
arena_dalloc_large
-arena_dalloc_large_locked
+arena_dalloc_large_junked_locked
arena_dalloc_small
arena_dss_prec_get
arena_dss_prec_set
+arena_get
+arena_get_hard
+arena_init
+arena_lg_dirty_mult_default_get
+arena_lg_dirty_mult_default_set
+arena_lg_dirty_mult_get
+arena_lg_dirty_mult_set
arena_malloc
arena_malloc_large
arena_malloc_small
arena_mapbits_allocated_get
arena_mapbits_binind_get
+arena_mapbits_decommitted_get
arena_mapbits_dirty_get
arena_mapbits_get
+arena_mapbits_internal_set
arena_mapbits_large_binind_set
arena_mapbits_large_get
arena_mapbits_large_set
arena_mapbits_large_size_get
+arena_mapbitsp_get
+arena_mapbitsp_read
+arena_mapbitsp_write
+arena_mapbits_size_decode
+arena_mapbits_size_encode
arena_mapbits_small_runind_get
arena_mapbits_small_set
arena_mapbits_unallocated_set
arena_mapbits_unallocated_size_get
arena_mapbits_unallocated_size_set
arena_mapbits_unzeroed_get
-arena_mapbits_unzeroed_set
-arena_mapbitsp_get
-arena_mapbitsp_read
-arena_mapbitsp_write
-arena_mapp_get
-arena_maxclass
+arena_maxrun
+arena_maybe_purge
+arena_metadata_allocated_add
+arena_metadata_allocated_get
+arena_metadata_allocated_sub
+arena_migrate
+arena_miscelm_get
+arena_miscelm_to_pageind
+arena_miscelm_to_rpages
+arena_nbound
arena_new
+arena_node_alloc
+arena_node_dalloc
arena_palloc
arena_postfork_child
arena_postfork_parent
@@ -46,50 +78,47 @@ arena_prefork
arena_prof_accum
arena_prof_accum_impl
arena_prof_accum_locked
-arena_prof_ctx_get
-arena_prof_ctx_set
arena_prof_promoted
+arena_prof_tctx_get
+arena_prof_tctx_reset
+arena_prof_tctx_set
arena_ptr_small_binind_get
arena_purge_all
arena_quarantine_junk_small
arena_ralloc
arena_ralloc_junk_large
arena_ralloc_no_move
+arena_rd_to_miscelm
arena_redzone_corruption
arena_run_regind
+arena_run_to_miscelm
arena_salloc
+arenas_cache_bypass_cleanup
+arenas_cache_cleanup
+arena_sdalloc
arena_stats_merge
arena_tcache_fill_small
-arenas
-arenas_booted
-arenas_cleanup
-arenas_extend
-arenas_initialized
-arenas_lock
-arenas_tls
-arenas_tsd
-arenas_tsd_boot
-arenas_tsd_cleanup_wrapper
-arenas_tsd_get
-arenas_tsd_get_wrapper
-arenas_tsd_init_head
-arenas_tsd_set
+atomic_add_p
atomic_add_u
atomic_add_uint32
atomic_add_uint64
atomic_add_z
+atomic_cas_p
+atomic_cas_u
+atomic_cas_uint32
+atomic_cas_uint64
+atomic_cas_z
+atomic_sub_p
atomic_sub_u
atomic_sub_uint32
atomic_sub_uint64
atomic_sub_z
base_alloc
base_boot
-base_calloc
-base_node_alloc
-base_node_dealloc
base_postfork_child
base_postfork_parent
base_prefork
+base_stats_get
bitmap_full
bitmap_get
bitmap_info_init
@@ -99,49 +128,54 @@ bitmap_set
bitmap_sfu
bitmap_size
bitmap_unset
+bootstrap_calloc
+bootstrap_free
+bootstrap_malloc
bt_init
buferror
-choose_arena
-choose_arena_hard
-chunk_alloc
+chunk_alloc_base
+chunk_alloc_cache
chunk_alloc_dss
chunk_alloc_mmap
+chunk_alloc_wrapper
chunk_boot
-chunk_dealloc
-chunk_dealloc_mmap
+chunk_dalloc_arena
+chunk_dalloc_cache
+chunk_dalloc_mmap
+chunk_dalloc_wrapper
+chunk_deregister
chunk_dss_boot
chunk_dss_postfork_child
chunk_dss_postfork_parent
chunk_dss_prec_get
chunk_dss_prec_set
chunk_dss_prefork
+chunk_hooks_default
+chunk_hooks_get
+chunk_hooks_set
chunk_in_dss
+chunk_lookup
chunk_npages
chunk_postfork_child
chunk_postfork_parent
chunk_prefork
-chunk_unmap
-chunks_mtx
-chunks_rtree
+chunk_purge_arena
+chunk_purge_wrapper
+chunk_register
chunksize
chunksize_mask
-ckh_bucket_search
+chunks_rtree
ckh_count
ckh_delete
-ckh_evict_reloc_insert
ckh_insert
-ckh_isearch
ckh_iter
ckh_new
ckh_pointer_hash
ckh_pointer_keycomp
-ckh_rebuild
ckh_remove
ckh_search
ckh_string_hash
ckh_string_keycomp
-ckh_try_bucket_insert
-ckh_try_insert
ctl_boot
ctl_bymib
ctl_byname
@@ -150,6 +184,23 @@ ctl_postfork_child
ctl_postfork_parent
ctl_prefork
dss_prec_names
+extent_node_achunk_get
+extent_node_achunk_set
+extent_node_addr_get
+extent_node_addr_set
+extent_node_arena_get
+extent_node_arena_set
+extent_node_dirty_insert
+extent_node_dirty_linkage_init
+extent_node_dirty_remove
+extent_node_init
+extent_node_prof_tctx_get
+extent_node_prof_tctx_set
+extent_node_size_get
+extent_node_size_set
+extent_node_zeroed_get
+extent_node_zeroed_set
+extent_tree_ad_empty
extent_tree_ad_first
extent_tree_ad_insert
extent_tree_ad_iter
@@ -166,6 +217,7 @@ extent_tree_ad_reverse_iter
extent_tree_ad_reverse_iter_recurse
extent_tree_ad_reverse_iter_start
extent_tree_ad_search
+extent_tree_szad_empty
extent_tree_szad_first
extent_tree_szad_insert
extent_tree_szad_iter
@@ -193,45 +245,49 @@ hash_rotl_64
hash_x64_128
hash_x86_128
hash_x86_32
-huge_allocated
-huge_boot
+huge_aalloc
huge_dalloc
huge_dalloc_junk
-huge_dss_prec_get
huge_malloc
-huge_mtx
-huge_ndalloc
-huge_nmalloc
huge_palloc
-huge_postfork_child
-huge_postfork_parent
-huge_prefork
-huge_prof_ctx_get
-huge_prof_ctx_set
+huge_prof_tctx_get
+huge_prof_tctx_reset
+huge_prof_tctx_set
huge_ralloc
huge_ralloc_no_move
huge_salloc
-iallocm
+iaalloc
+iallocztm
icalloc
icalloct
idalloc
idalloct
+idalloctm
imalloc
imalloct
+index2size
+index2size_compute
+index2size_lookup
+index2size_tab
+in_valgrind
ipalloc
ipalloct
+ipallocztm
iqalloc
-iqalloct
iralloc
iralloct
iralloct_realign
isalloc
+isdalloct
+isqalloc
isthreaded
ivsalloc
ixalloc
jemalloc_postfork_child
jemalloc_postfork_parent
jemalloc_prefork
+large_maxclass
+lg_floor
malloc_cprintf
malloc_mutex_init
malloc_mutex_lock
@@ -242,7 +298,8 @@ malloc_mutex_unlock
malloc_printf
malloc_snprintf
malloc_strtoumax
-malloc_tsd_boot
+malloc_tsd_boot0
+malloc_tsd_boot1
malloc_tsd_cleanup_register
malloc_tsd_dalloc
malloc_tsd_malloc
@@ -251,16 +308,18 @@ malloc_vcprintf
malloc_vsnprintf
malloc_write
map_bias
+map_misc_offset
mb_write
mutex_boot
-narenas_auto
-narenas_total
+narenas_cache_cleanup
narenas_total_get
ncpus
nhbins
opt_abort
opt_dss
opt_junk
+opt_junk_alloc
+opt_junk_free
opt_lg_chunk
opt_lg_dirty_mult
opt_lg_prof_interval
@@ -274,84 +333,99 @@ opt_prof_final
opt_prof_gdump
opt_prof_leak
opt_prof_prefix
+opt_prof_thread_active_init
opt_quarantine
opt_redzone
opt_stats_print
opt_tcache
opt_utrace
-opt_valgrind
opt_xmalloc
opt_zero
p2rz
+pages_commit
+pages_decommit
+pages_map
pages_purge
+pages_trim
+pages_unmap
pow2_ceil
+prof_active_get
+prof_active_get_unlocked
+prof_active_set
+prof_alloc_prep
+prof_alloc_rollback
prof_backtrace
prof_boot0
prof_boot1
prof_boot2
-prof_bt_count
-prof_ctx_get
-prof_ctx_set
+prof_dump_header
prof_dump_open
prof_free
+prof_free_sampled_object
prof_gdump
+prof_gdump_get
+prof_gdump_get_unlocked
+prof_gdump_set
+prof_gdump_val
prof_idump
prof_interval
prof_lookup
prof_malloc
+prof_malloc_sample_object
prof_mdump
prof_postfork_child
prof_postfork_parent
prof_prefork
-prof_promote
prof_realloc
+prof_reset
prof_sample_accum_update
prof_sample_threshold_update
-prof_tdata_booted
+prof_tctx_get
+prof_tctx_reset
+prof_tctx_set
prof_tdata_cleanup
prof_tdata_get
prof_tdata_init
-prof_tdata_initialized
-prof_tdata_tls
-prof_tdata_tsd
-prof_tdata_tsd_boot
-prof_tdata_tsd_cleanup_wrapper
-prof_tdata_tsd_get
-prof_tdata_tsd_get_wrapper
-prof_tdata_tsd_init_head
-prof_tdata_tsd_set
+prof_tdata_reinit
+prof_thread_active_get
+prof_thread_active_init_get
+prof_thread_active_init_set
+prof_thread_active_set
+prof_thread_name_get
+prof_thread_name_set
quarantine
quarantine_alloc_hook
-quarantine_boot
-quarantine_booted
+quarantine_alloc_hook_work
quarantine_cleanup
-quarantine_init
-quarantine_tls
-quarantine_tsd
-quarantine_tsd_boot
-quarantine_tsd_cleanup_wrapper
-quarantine_tsd_get
-quarantine_tsd_get_wrapper
-quarantine_tsd_init_head
-quarantine_tsd_set
register_zone
+rtree_child_read
+rtree_child_read_hard
+rtree_child_tryread
rtree_delete
rtree_get
-rtree_get_locked
rtree_new
-rtree_postfork_child
-rtree_postfork_parent
-rtree_prefork
+rtree_node_valid
rtree_set
+rtree_start_level
+rtree_subkey
+rtree_subtree_read
+rtree_subtree_read_hard
+rtree_subtree_tryread
+rtree_val_read
+rtree_val_write
s2u
+s2u_compute
+s2u_lookup
sa2u
set_errno
-small_size2bin
+size2index
+size2index_compute
+size2index_lookup
+size2index_tab
stats_cactive
stats_cactive_add
stats_cactive_get
stats_cactive_sub
-stats_chunks
stats_print
tcache_alloc_easy
tcache_alloc_large
@@ -359,55 +433,67 @@ tcache_alloc_small
tcache_alloc_small_hard
tcache_arena_associate
tcache_arena_dissociate
+tcache_arena_reassociate
tcache_bin_flush_large
tcache_bin_flush_small
tcache_bin_info
-tcache_boot0
-tcache_boot1
-tcache_booted
+tcache_boot
+tcache_cleanup
tcache_create
tcache_dalloc_large
tcache_dalloc_small
-tcache_destroy
-tcache_enabled_booted
+tcache_enabled_cleanup
tcache_enabled_get
-tcache_enabled_initialized
tcache_enabled_set
-tcache_enabled_tls
-tcache_enabled_tsd
-tcache_enabled_tsd_boot
-tcache_enabled_tsd_cleanup_wrapper
-tcache_enabled_tsd_get
-tcache_enabled_tsd_get_wrapper
-tcache_enabled_tsd_init_head
-tcache_enabled_tsd_set
tcache_event
tcache_event_hard
tcache_flush
tcache_get
-tcache_initialized
+tcache_get_hard
tcache_maxclass
+tcaches
tcache_salloc
+tcaches_create
+tcaches_destroy
+tcaches_flush
+tcaches_get
tcache_stats_merge
-tcache_thread_cleanup
-tcache_tls
-tcache_tsd
-tcache_tsd_boot
-tcache_tsd_cleanup_wrapper
-tcache_tsd_get
-tcache_tsd_get_wrapper
-tcache_tsd_init_head
-tcache_tsd_set
-thread_allocated_booted
-thread_allocated_initialized
-thread_allocated_tls
-thread_allocated_tsd
-thread_allocated_tsd_boot
-thread_allocated_tsd_cleanup_wrapper
-thread_allocated_tsd_get
-thread_allocated_tsd_get_wrapper
-thread_allocated_tsd_init_head
-thread_allocated_tsd_set
+thread_allocated_cleanup
+thread_deallocated_cleanup
+tsd_arena_get
+tsd_arena_set
+tsd_boot
+tsd_boot0
+tsd_boot1
+tsd_booted
+tsd_cleanup
+tsd_cleanup_wrapper
+tsd_fetch
+tsd_get
+tsd_wrapper_get
+tsd_wrapper_set
+tsd_initialized
tsd_init_check_recursion
tsd_init_finish
+tsd_init_head
+tsd_nominal
+tsd_quarantine_get
+tsd_quarantine_set
+tsd_set
+tsd_tcache_enabled_get
+tsd_tcache_enabled_set
+tsd_tcache_get
+tsd_tcache_set
+tsd_tls
+tsd_tsd
+tsd_prof_tdata_get
+tsd_prof_tdata_set
+tsd_thread_allocated_get
+tsd_thread_allocated_set
+tsd_thread_deallocated_get
+tsd_thread_deallocated_set
u2rz
+valgrind_freelike_block
+valgrind_make_mem_defined
+valgrind_make_mem_noaccess
+valgrind_make_mem_undefined
diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h
index 7b2b06512..216d0ef47 100644
--- a/deps/jemalloc/include/jemalloc/internal/prng.h
+++ b/deps/jemalloc/include/jemalloc/internal/prng.h
@@ -15,7 +15,7 @@
* See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
*
* This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position. For example. the lowest bit has a cycle of 2,
+ * proportional to bit position. For example, the lowest bit has a cycle of 2,
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
*
@@ -26,22 +26,22 @@
* const uint32_t a, c : See above discussion.
*/
#define prng32(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 32); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 32); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (32 - lg_range); \
+ r >>= (32 - (lg_range)); \
} while (false)
/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
#define prng64(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 64); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 64); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (64 - lg_range); \
+ r >>= (64 - (lg_range)); \
} while (false)
#endif /* JEMALLOC_H_TYPES */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h
index 6f162d21e..e5198c3e8 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof.h
@@ -3,8 +3,8 @@
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */
@@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t;
*/
#define PROF_BT_MAX 128
-/* Maximum number of backtraces to store in each per thread LRU cache. */
-#define PROF_TCMAX 1024
-
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
@@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_PRINTF_BUFSIZE 128
/*
- * Number of mutexes shared among all ctx's. No space is allocated for these
+ * Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
/*
+ * Number of mutexes shared among all tdata's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NTDATA_LOCKS 256
+
+/*
* prof_tdata pointers close to NULL are used to encode state information that
* is used for cleaning up during thread shutdown.
*/
@@ -63,141 +66,186 @@ struct prof_bt_s {
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
prof_bt_t *bt;
- unsigned nignore;
unsigned max;
} prof_unwind_data_t;
#endif
struct prof_cnt_s {
- /*
- * Profiling counters. An allocation/deallocation pair can operate on
- * different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
- * negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require something
- * like 128-bit counters; this implementation doesn't bother to solve
- * that problem.
- */
- int64_t curobjs;
- int64_t curbytes;
+ /* Profiling counters. */
+ uint64_t curobjs;
+ uint64_t curbytes;
uint64_t accumobjs;
uint64_t accumbytes;
};
-struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's cnts_ql. */
- ql_elm(prof_thr_cnt_t) cnts_link;
+typedef enum {
+ prof_tctx_state_initializing,
+ prof_tctx_state_nominal,
+ prof_tctx_state_dumping,
+ prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
- /* Linkage into thread's LRU. */
- ql_elm(prof_thr_cnt_t) lru_link;
+struct prof_tctx_s {
+ /* Thread data for thread that performed the allocation. */
+ prof_tdata_t *tdata;
/*
- * Associated context. If a thread frees an object that it did not
- * allocate, it is possible that the context is not cached in the
- * thread's hash table, in which case it must be able to look up the
- * context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's cnts_ql.
+ * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+ * defunct during teardown.
*/
- prof_ctx_t *ctx;
+ uint64_t thr_uid;
+ uint64_t thr_discrim;
+
+ /* Profiling counters, protected by tdata->lock. */
+ prof_cnt_t cnts;
+
+ /* Associated global context. */
+ prof_gctx_t *gctx;
/*
- * Threads use memory barriers to update the counters. Since there is
- * only ever one writer, the only challenge is for the reader to get a
- * consistent read of the counters.
- *
- * The writer uses this series of operations:
- *
- * 1) Increment epoch to an odd number.
- * 2) Update counters.
- * 3) Increment epoch to an even number.
- *
- * The reader must assure 1) that the epoch is even while it reads the
- * counters, and 2) that the epoch doesn't change between the time it
- * starts and finishes reading the counters.
+ * UID that distinguishes multiple tctx's created by the same thread,
+ * but coexisting in gctx->tctxs. There are two ways that such
+ * coexistence can occur:
+ * - A dumper thread can cause a tctx to be retained in the purgatory
+ * state.
+ * - Although a single "producer" thread must create all tctx's which
+ * share the same thr_uid, multiple "consumers" can each concurrently
+ * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
+ * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+ * threshold can be hit again before the first consumer finishes
+ * executing prof_tctx_destroy().
*/
- unsigned epoch;
+ uint64_t tctx_uid;
- /* Profiling counters. */
- prof_cnt_t cnts;
-};
+ /* Linkage into gctx's tctxs. */
+ rb_node(prof_tctx_t) tctx_link;
-struct prof_ctx_s {
- /* Associated backtrace. */
- prof_bt_t *bt;
+ /*
+ * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+ * sample vs destroy race.
+ */
+ bool prepared;
+
+ /* Current dump-related state, protected by gctx->lock. */
+ prof_tctx_state_t state;
+
+ /*
+ * Copy of cnts snapshotted during early dump phase, protected by
+ * dump_mtx.
+ */
+ prof_cnt_t dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
- /* Protects nlimbo, cnt_merged, and cnts_ql. */
+struct prof_gctx_s {
+ /* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
/*
- * Number of threads that currently cause this ctx to be in a state of
+ * Number of threads that currently cause this gctx to be in a state of
* limbo due to one of:
- * - Initializing per thread counters associated with this ctx.
- * - Preparing to destroy this ctx.
- * - Dumping a heap profile that includes this ctx.
+ * - Initializing this gctx.
+ * - Initializing per thread counters associated with this gctx.
+ * - Preparing to destroy this gctx.
+ * - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
- * ctx.
+ * gctx.
*/
unsigned nlimbo;
- /* Temporary storage for summation during dump. */
- prof_cnt_t cnt_summed;
-
- /* When threads exit, they merge their stats into cnt_merged. */
- prof_cnt_t cnt_merged;
-
/*
- * List of profile counters, one for each thread that has allocated in
+ * Tree of profile counters, one for each thread that has allocated in
* this context.
*/
- ql_head(prof_thr_cnt_t) cnts_ql;
+ prof_tctx_tree_t tctxs;
+
+ /* Linkage for tree of contexts to be dumped. */
+ rb_node(prof_gctx_t) dump_link;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Associated backtrace. */
+ prof_bt_t bt;
- /* Linkage for list of contexts to be dumped. */
- ql_elm(prof_ctx_t) dump_link;
+ /* Backtrace vector, variable size, referred to by bt. */
+ void *vec[1];
};
-typedef ql_head(prof_ctx_t) prof_ctx_list_t;
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
struct prof_tdata_s {
+ malloc_mutex_t *lock;
+
+ /* Monotonically increasing unique thread identifier. */
+ uint64_t thr_uid;
+
/*
- * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
- * cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no
- * others will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t
- * counter data into the associated prof_ctx_t objects, and unlink/free
- * the prof_thr_cnt_t objects.
+ * Monotonically increasing discriminator among tdata structures
+ * associated with the same thr_uid.
*/
- ckh_t bt2cnt;
+ uint64_t thr_discrim;
- /* LRU for contents of bt2cnt. */
- ql_head(prof_thr_cnt_t) lru_ql;
+ /* Included in heap profile dumps if non-NULL. */
+ char *thread_name;
- /* Backtrace vector, used for calls to prof_backtrace(). */
- void **vec;
+ bool attached;
+ bool expired;
+
+ rb_node(prof_tdata_t) tdata_link;
+
+ /*
+ * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
+ * necessary when incrementing this field, because only one thread ever
+ * does so.
+ */
+ uint64_t tctx_uid_next;
+
+ /*
+ * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
+ * backtraces for which it has non-zero allocation/deallocation counters
+ * associated with thread-specific prof_tctx_t objects. Other threads
+ * may write to prof_tctx_t contents when freeing associated objects.
+ */
+ ckh_t bt2tctx;
/* Sampling state. */
uint64_t prng_state;
- uint64_t threshold;
- uint64_t accum;
+ uint64_t bytes_until_sample;
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
bool enq_gdump;
+
+ /*
+ * Set to true during an early dump phase for tdata's which are
+ * currently being dumped. New threads' tdata's have this initialized
+ * to false so that they aren't accidentally included in later dump
+ * phases.
+ */
+ bool dumping;
+
+ /*
+ * True if profiling is active for this tdata's thread
+ * (thread.prof.active mallctl).
+ */
+ bool active;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Backtrace vector, used for calls to prof_backtrace(). */
+ void *vec[PROF_BT_MAX];
};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false. No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
@@ -211,6 +259,12 @@ extern char opt_prof_prefix[
#endif
1];
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool prof_gdump_val;
+
/*
* Profile dump interval, measured in bytes allocated. Each arena triggers a
* profile dump when it reaches this threshold. The effect is that the
@@ -221,391 +275,269 @@ extern char opt_prof_prefix[
extern uint64_t prof_interval;
/*
- * If true, promote small sampled objects to large objects, since small run
- * headers do not have embedded profile context pointers.
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
*/
-extern bool prof_promote;
+extern size_t lg_prof_sample;
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt, unsigned nignore);
-prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
+void prof_backtrace(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
#ifdef JEMALLOC_JET
+size_t prof_tdata_count(void);
size_t prof_bt_count(void);
+const prof_cnt_t *prof_cnt_all(void);
typedef int (prof_dump_open_t)(bool, const char *);
extern prof_dump_open_t *prof_dump_open;
+typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+extern prof_dump_header_t *prof_dump_header;
#endif
void prof_idump(void);
bool prof_mdump(const char *filename);
void prof_gdump(void);
-prof_tdata_t *prof_tdata_init(void);
-void prof_tdata_cleanup(void *arg);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+const char *prof_thread_name_get(void);
+bool prof_active_get(void);
+bool prof_active_set(bool active);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(void);
+bool prof_thread_active_set(bool active);
+bool prof_thread_active_init_get(void);
+bool prof_thread_active_init_set(bool active_init);
+bool prof_gdump_get(void);
+bool prof_gdump_set(bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(void);
void prof_prefork(void);
void prof_postfork_parent(void);
void prof_postfork_child(void);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
-#define PROF_ALLOC_PREP(nignore, size, ret) do { \
- prof_tdata_t *prof_tdata; \
- prof_bt_t bt; \
- \
- assert(size == s2u(size)); \
- \
- prof_tdata = prof_tdata_get(true); \
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
- if (prof_tdata != NULL) \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- else \
- ret = NULL; \
- break; \
- } \
- \
- if (opt_prof_active == false) { \
- /* Sampling is currently inactive, so avoid sampling. */\
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } else if (opt_lg_prof_sample == 0) { \
- /* Don't bother with sampling logic, since sampling */\
- /* interval is 1. */\
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else { \
- if (prof_tdata->threshold == 0) { \
- /* Initialize. Seed the prng differently for */\
- /* each thread. */\
- prof_tdata->prng_state = \
- (uint64_t)(uintptr_t)&size; \
- prof_sample_threshold_update(prof_tdata); \
- } \
- \
- /* Determine whether to capture a backtrace based on */\
- /* whether size is enough for prof_accum to reach */\
- /* prof_tdata->threshold. However, delay updating */\
- /* these variables until prof_{m,re}alloc(), because */\
- /* we don't know for sure that the allocation will */\
- /* succeed. */\
- /* */\
- /* Use subtraction rather than addition to avoid */\
- /* potential integer overflow. */\
- if (size >= prof_tdata->threshold - \
- prof_tdata->accum) { \
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } \
-} while (0)
-
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
-
-prof_tdata_t *prof_tdata_get(bool create);
-void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-bool prof_sample_accum_update(size_t size);
-void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr, size_t size);
+bool prof_active_get_unlocked(void);
+bool prof_gdump_get_unlocked(void);
+prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
+bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
+ prof_tdata_t **tdata_out);
+prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+ bool update);
+prof_tctx_t *prof_tctx_get(const void *ptr);
+void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *tctx);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
+ prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+ size_t old_usize, prof_tctx_t *old_tctx);
+void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-malloc_tsd_externs(prof_tdata, prof_tdata_t *)
-malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
- prof_tdata_cleanup)
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void)
+{
+
+ /*
+ * Even if opt_prof is true, sampling can be temporarily disabled by
+ * setting prof_active to false. No locking is used when reading
+ * prof_active in the fast path, so there are no guarantees regarding
+ * how long it will take for all threads to notice state changes.
+ */
+ return (prof_active);
+}
-JEMALLOC_INLINE prof_tdata_t *
-prof_tdata_get(bool create)
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
{
- prof_tdata_t *prof_tdata;
+
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return (prof_gdump_val);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create)
+{
+ prof_tdata_t *tdata;
cassert(config_prof);
- prof_tdata = *prof_tdata_tsd_get();
- if (create && prof_tdata == NULL)
- prof_tdata = prof_tdata_init();
+ tdata = tsd_prof_tdata_get(tsd);
+ if (create) {
+ if (unlikely(tdata == NULL)) {
+ if (tsd_nominal(tsd)) {
+ tdata = prof_tdata_init(tsd);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ } else if (unlikely(tdata->expired)) {
+ tdata = prof_tdata_reinit(tsd, tdata);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ assert(tdata == NULL || tdata->attached);
+ }
- return (prof_tdata);
+ return (tdata);
}
-JEMALLOC_INLINE void
-prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(const void *ptr)
{
- /*
- * The body of this function is compiled out unless heap profiling is
- * enabled, so that it is possible to compile jemalloc with floating
- * point support completely disabled. Avoiding floating point code is
- * important on memory-constrained systems, but it also enables a
- * workaround for versions of glibc that don't properly save/restore
- * floating point registers during dynamic lazy symbol loading (which
- * internally calls into whatever malloc implementation happens to be
- * integrated into the application). Note that some compilers (e.g.
- * gcc 4.8) may use floating point registers for fast memory moves, so
- * jemalloc must be compiled with such optimizations disabled (e.g.
- * -mno-sse) in order for the workaround to be complete.
- */
-#ifdef JEMALLOC_PROF
- uint64_t r;
- double u;
cassert(config_prof);
+ assert(ptr != NULL);
- /*
- * Compute sample threshold as a geometrically distributed random
- * variable with mean (2^opt_lg_prof_sample).
- *
- * __ __
- * | log(u) | 1
- * prof_tdata->threshold = | -------- |, where p = -------------------
- * | log(1-p) | opt_lg_prof_sample
- * 2
- *
- * For more information on the math, see:
- *
- * Non-Uniform Random Variate Generation
- * Luc Devroye
- * Springer-Verlag, New York, 1986
- * pp 500
- * (http://luc.devroye.org/rnbookindex.html)
- */
- prng64(r, 53, prof_tdata->prng_state,
- UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
- u = (double)r * (1.0/9007199254740992.0L);
- prof_tdata->threshold = (uint64_t)(log(u) /
- log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
- + (uint64_t)1U;
-#endif
+ return (arena_prof_tctx_get(ptr));
}
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
+ arena_prof_tctx_set(ptr, usize, tctx);
}
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *old_tctx)
{
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- arena_prof_ctx_set(ptr, usize, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
+ arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
}
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size)
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+ prof_tdata_t **tdata_out)
{
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
cassert(config_prof);
- /* Sampling logic is unnecessary if the interval is 1. */
- assert(opt_lg_prof_sample != 0);
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = prof_tdata_get(tsd, true);
+ if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = NULL;
+
+ if (tdata_out != NULL)
+ *tdata_out = tdata;
+
+ if (tdata == NULL)
return (true);
- /* Take care to avoid integer overflow. */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- prof_tdata->accum -= (prof_tdata->threshold - size);
- /* Compute new sample threshold. */
- prof_sample_threshold_update(prof_tdata);
- while (prof_tdata->accum >= prof_tdata->threshold) {
- prof_tdata->accum -= prof_tdata->threshold;
- prof_sample_threshold_update(prof_tdata);
- }
- return (false);
- } else {
- prof_tdata->accum += size;
+ if (tdata->bytes_until_sample >= usize) {
+ if (update)
+ tdata->bytes_until_sample -= usize;
return (true);
+ } else {
+ /* Compute new sample threshold. */
+ if (update)
+ prof_sample_threshold_update(tdata);
+ return (!tdata->active);
}
}
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
+{
+ prof_tctx_t *ret;
+ prof_tdata_t *tdata;
+ prof_bt_t bt;
+
+ assert(usize == s2u(usize));
+
+ if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+ &tdata)))
+ ret = (prof_tctx_t *)(uintptr_t)1U;
+ else {
+ bt_init(&bt, tdata->vec);
+ prof_backtrace(&bt);
+ ret = prof_lookup(tsd, &bt);
+ }
+
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
cassert(config_prof);
assert(ptr != NULL);
assert(usize == isalloc(ptr, true));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. For malloc()-like allocation, it is
- * always possible to tell in advance how large an
- * object's usable size will be, so there should never
- * be a difference between the usize passed to
- * PROF_ALLOC_PREP() and prof_malloc().
- */
- assert((uintptr_t)cnt == (uintptr_t)1U);
- }
- }
-
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_malloc_sample_object(ptr, usize, tctx);
+ else
+ prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
}
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+ bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+ prof_tctx_t *old_tctx)
{
- prof_thr_cnt_t *told_cnt;
+ bool sampled, old_sampled;
cassert(config_prof);
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+ assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
- if (ptr != NULL) {
+ if (prof_active && !updated && ptr != NULL) {
assert(usize == isalloc(ptr, true));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. The usize passed to
- * PROF_ALLOC_PREP() was larger than what
- * actually got allocated, so a backtrace was
- * captured for this allocation, even though
- * its actual usize was insufficient to cross
- * the sample threshold.
- */
- cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- }
- }
-
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
+ if (prof_sample_accum_update(tsd, usize, true, NULL)) {
/*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
+ * Don't sample. The usize passed to prof_alloc_prep()
+ * was larger than what actually got allocated, so a
+ * backtrace was captured for this allocation, even
+ * though its actual usize was insufficient to cross the
+ * sample threshold.
*/
- malloc_mutex_lock(old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_usize;
- malloc_mutex_unlock(old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ tctx = (prof_tctx_t *)(uintptr_t)1U;
}
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
- cnt->epoch++;
- } else if (ptr != NULL)
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_usize;
}
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
+
+ sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+ old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+
+ if (unlikely(sampled))
+ prof_malloc_sample_object(ptr, usize, tctx);
+ else
+ prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+
+ if (unlikely(old_sampled))
+ prof_free_sampled_object(tsd, old_usize, old_tctx);
}
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize)
{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
+ prof_tctx_t *tctx = prof_tctx_get(ptr);
cassert(config_prof);
+ assert(usize == isalloc(ptr, true));
- if ((uintptr_t)ctx > (uintptr_t)1) {
- prof_thr_cnt_t *tcnt;
- assert(size == isalloc(ptr, true));
- tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(ctx->lock);
- }
- }
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_free_sampled_object(tsd, usize, tctx);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h
index f70c5f6f3..1834bb855 100644
--- a/deps/jemalloc/include/jemalloc/internal/ql.h
+++ b/deps/jemalloc/include/jemalloc/internal/ql.h
@@ -1,6 +1,4 @@
-/*
- * List definitions.
- */
+/* List definitions. */
#define ql_head(a_type) \
struct { \
a_type *qlh_first; \
diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h
index 602944b9b..0fbaec25e 100644
--- a/deps/jemalloc/include/jemalloc/internal/qr.h
+++ b/deps/jemalloc/include/jemalloc/internal/qr.h
@@ -40,8 +40,10 @@ struct { \
(a_qr_b)->a_field.qre_prev = t; \
} while (0)
-/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
- * have two copies of the code. */
+/*
+ * qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code.
+ */
#define qr_split(a_qr_a, a_qr_b, a_field) \
qr_meld((a_qr_a), (a_qr_b), a_field)
diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h
index 16f677f73..ae607399f 100644
--- a/deps/jemalloc/include/jemalloc/internal/quarantine.h
+++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h
@@ -29,36 +29,29 @@ struct quarantine_s {
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-quarantine_t *quarantine_init(size_t lg_maxobjs);
-void quarantine(void *ptr);
-void quarantine_cleanup(void *arg);
-bool quarantine_boot(void);
+void quarantine_alloc_hook_work(tsd_t *tsd);
+void quarantine(tsd_t *tsd, void *ptr);
+void quarantine_cleanup(tsd_t *tsd);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *)
-
void quarantine_alloc_hook(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_))
-malloc_tsd_externs(quarantine, quarantine_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL,
- quarantine_cleanup)
-
JEMALLOC_ALWAYS_INLINE void
quarantine_alloc_hook(void)
{
- quarantine_t *quarantine;
+ tsd_t *tsd;
assert(config_fill && opt_quarantine);
- quarantine = *quarantine_tsd_get();
- if (quarantine == NULL)
- quarantine_init(LG_MAXOBJS_INIT);
+ tsd = tsd_fetch();
+ if (tsd_quarantine_get(tsd) == NULL)
+ quarantine_alloc_hook_work(tsd);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h
index 423802eb2..2ca8e5933 100644
--- a/deps/jemalloc/include/jemalloc/internal/rb.h
+++ b/deps/jemalloc/include/jemalloc/internal/rb.h
@@ -158,6 +158,8 @@ struct { \
#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
a_attr void \
a_prefix##new(a_rbt_type *rbtree); \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree); \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree); \
a_attr a_type * \
@@ -198,7 +200,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* int (a_cmp *)(a_type *a_node, a_type *a_other);
* ^^^^^^
* or a_key
- * Interpretation of comparision function return values:
+ * Interpretation of comparison function return values:
* -1 : a_node < a_other
* 0 : a_node == a_other
* 1 : a_node > a_other
@@ -224,6 +226,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* Args:
* tree: Pointer to an uninitialized red-black tree object.
*
+ * static bool
+ * ex_empty(ex_t *tree);
+ * Description: Determine whether tree is empty.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * Ret: True if tree is empty, false otherwise.
+ *
* static ex_node_t *
* ex_first(ex_t *tree);
* static ex_node_t *
@@ -309,6 +318,10 @@ a_attr void \
a_prefix##new(a_rbt_type *rbtree) { \
rb_new(a_type, a_field, rbtree); \
} \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree) { \
+ return (rbtree->rbt_root == &rbtree->rbt_nil); \
+} \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree) { \
a_type *ret; \
@@ -580,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
if (left != &rbtree->rbt_nil) { \
/* node has no successor, but it has a left child. */\
/* Splice node out, without losing the left child. */\
- assert(rbtn_red_get(a_type, a_field, node) == false); \
+ assert(!rbtn_red_get(a_type, a_field, node)); \
assert(rbtn_red_get(a_type, a_field, left)); \
rbtn_black_set(a_type, a_field, left); \
if (pathp == path) { \
@@ -616,8 +629,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
if (pathp->cmp < 0) { \
rbtn_left_set(a_type, a_field, pathp->node, \
pathp[1].node); \
- assert(rbtn_red_get(a_type, a_field, pathp[1].node) \
- == false); \
+ assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \
if (rbtn_red_get(a_type, a_field, pathp->node)) { \
a_type *right = rbtn_right_get(a_type, a_field, \
pathp->node); \
@@ -681,7 +693,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
/* Balance restored, but rotation modified */\
- /* subree root, which may actually be the tree */\
+ /* subtree root, which may actually be the tree */\
/* root. */\
if (pathp == path) { \
/* Set root. */ \
@@ -849,7 +861,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
} \
/* Set root. */ \
rbtree->rbt_root = path->node; \
- assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \
+ assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \
} \
a_attr a_type * \
a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h
index bc74769f5..28ae9d1dd 100644
--- a/deps/jemalloc/include/jemalloc/internal/rtree.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree.h
@@ -1,170 +1,292 @@
/*
* This radix tree implementation is tailored to the singular purpose of
- * tracking which chunks are currently owned by jemalloc. This functionality
- * is mandatory for OS X, where jemalloc must be able to respond to object
- * ownership queries.
+ * associating metadata with chunks that are currently owned by jemalloc.
*
*******************************************************************************
*/
#ifdef JEMALLOC_H_TYPES
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+typedef struct rtree_level_s rtree_level_t;
typedef struct rtree_s rtree_t;
/*
- * Size of each radix tree node (must be a power of 2). This impacts tree
- * depth.
+ * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
+ * machine address width.
*/
-#define RTREE_NODESIZE (1U << 16)
+#define LG_RTREE_BITS_PER_LEVEL 4
+#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define RTREE_HEIGHT_MAX \
+ ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
-typedef void *(rtree_alloc_t)(size_t);
-typedef void (rtree_dalloc_t)(void *);
+/* Used for two-stage lock-free node initialization. */
+#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
+
+/*
+ * The node allocation callback function's argument is the number of contiguous
+ * rtree_node_elm_t structures to allocate, and the resulting memory must be
+ * zeroed.
+ */
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
+typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
+struct rtree_node_elm_s {
+ union {
+ void *pun;
+ rtree_node_elm_t *child;
+ extent_node_t *val;
+ };
+};
+
+struct rtree_level_s {
+ /*
+ * A non-NULL subtree points to a subtree rooted along the hypothetical
+ * path to the leaf node corresponding to key 0. Depending on what keys
+ * have been used to store to the tree, an arbitrary combination of
+ * subtree pointers may remain NULL.
+ *
+ * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
+ * This results in a 3-level tree, and the leftmost leaf can be directly
+ * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
+ * 0x00000000) can be accessed via subtrees[1], and the remainder of the
+ * tree can be accessed via subtrees[0].
+ *
+ * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
+ *
+ * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
+ *
+ * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
+ *
+ * This has practical implications on x64, which currently uses only the
+ * lower 47 bits of virtual address space in userland, thus leaving
+ * subtrees[0] unused and avoiding a level of tree traversal.
+ */
+ union {
+ void *subtree_pun;
+ rtree_node_elm_t *subtree;
+ };
+ /* Number of key bits distinguished by this level. */
+ unsigned bits;
+ /*
+ * Cumulative number of key bits distinguished by traversing to
+ * corresponding tree level.
+ */
+ unsigned cumbits;
+};
+
struct rtree_s {
- rtree_alloc_t *alloc;
- rtree_dalloc_t *dalloc;
- malloc_mutex_t mutex;
- void **root;
- unsigned height;
- unsigned level2bits[1]; /* Dynamically sized. */
+ rtree_node_alloc_t *alloc;
+ rtree_node_dalloc_t *dalloc;
+ unsigned height;
+ /*
+ * Precomputed table used to convert from the number of leading 0 key
+ * bits to which subtree level to start at.
+ */
+ unsigned start_level[RTREE_HEIGHT_MAX];
+ rtree_level_t levels[RTREE_HEIGHT_MAX];
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc);
+bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
+ rtree_node_dalloc_t *dalloc);
void rtree_delete(rtree_t *rtree);
-void rtree_prefork(rtree_t *rtree);
-void rtree_postfork_parent(rtree_t *rtree);
-void rtree_postfork_child(rtree_t *rtree);
+rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree,
+ unsigned level);
+rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree,
+ rtree_node_elm_t *elm, unsigned level);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-#ifdef JEMALLOC_DEBUG
-uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key);
-#endif
-uint8_t rtree_get(rtree_t *rtree, uintptr_t key);
-bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val);
+unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
+uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
+
+bool rtree_node_valid(rtree_node_elm_t *node);
+rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
+ unsigned level);
+extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
+ bool dependent);
+void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
+ const extent_node_t *val);
+rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
+rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
+
+extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
+bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-#define RTREE_GET_GENERATE(f) \
-/* The least significant bits of the key are ignored. */ \
-JEMALLOC_INLINE uint8_t \
-f(rtree_t *rtree, uintptr_t key) \
-{ \
- uint8_t ret; \
- uintptr_t subkey; \
- unsigned i, lshift, height, bits; \
- void **node, **child; \
- \
- RTREE_LOCK(&rtree->mutex); \
- for (i = lshift = 0, height = rtree->height, node = rtree->root;\
- i < height - 1; \
- i++, lshift += bits, node = child) { \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
- 3)) - bits); \
- child = (void**)node[subkey]; \
- if (child == NULL) { \
- RTREE_UNLOCK(&rtree->mutex); \
- return (0); \
- } \
- } \
- \
- /* \
- * node is a leaf, so it contains values rather than node \
- * pointers. \
- */ \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
- bits); \
- { \
- uint8_t *leaf = (uint8_t *)node; \
- ret = leaf[subkey]; \
- } \
- RTREE_UNLOCK(&rtree->mutex); \
- \
- RTREE_GET_VALIDATE \
- return (ret); \
+JEMALLOC_INLINE unsigned
+rtree_start_level(rtree_t *rtree, uintptr_t key)
+{
+ unsigned start_level;
+
+ if (unlikely(key == 0))
+ return (rtree->height - 1);
+
+ start_level = rtree->start_level[lg_floor(key) >>
+ LG_RTREE_BITS_PER_LEVEL];
+ assert(start_level < rtree->height);
+ return (start_level);
}
-#ifdef JEMALLOC_DEBUG
-# define RTREE_LOCK(l) malloc_mutex_lock(l)
-# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
-# define RTREE_GET_VALIDATE
-RTREE_GET_GENERATE(rtree_get_locked)
-# undef RTREE_LOCK
-# undef RTREE_UNLOCK
-# undef RTREE_GET_VALIDATE
-#endif
+JEMALLOC_INLINE uintptr_t
+rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
+{
-#define RTREE_LOCK(l)
-#define RTREE_UNLOCK(l)
-#ifdef JEMALLOC_DEBUG
- /*
- * Suppose that it were possible for a jemalloc-allocated chunk to be
- * munmap()ped, followed by a different allocator in another thread re-using
- * overlapping virtual memory, all without invalidating the cached rtree
- * value. The result would be a false positive (the rtree would claim that
- * jemalloc owns memory that it had actually discarded). This scenario
- * seems impossible, but the following assertion is a prudent sanity check.
- */
-# define RTREE_GET_VALIDATE \
- assert(rtree_get_locked(rtree, key) == ret);
-#else
-# define RTREE_GET_VALIDATE
-#endif
-RTREE_GET_GENERATE(rtree_get)
-#undef RTREE_LOCK
-#undef RTREE_UNLOCK
-#undef RTREE_GET_VALIDATE
+ return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+ rtree->levels[level].cumbits)) & ((ZU(1) <<
+ rtree->levels[level].bits) - 1));
+}
JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val)
+rtree_node_valid(rtree_node_elm_t *node)
+{
+
+ return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm)
+{
+ rtree_node_elm_t *child;
+
+ /* Double-checked read (first read may be stale. */
+ child = elm->child;
+ if (!rtree_node_valid(child))
+ child = atomic_read_p(&elm->pun);
+ return (child);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+{
+ rtree_node_elm_t *child;
+
+ child = rtree_child_tryread(elm);
+ if (unlikely(!rtree_node_valid(child)))
+ child = rtree_child_read_hard(rtree, elm, level);
+ return (child);
+}
+
+JEMALLOC_INLINE extent_node_t *
+rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
+{
+
+ if (dependent) {
+ /*
+ * Reading a val on behalf of a pointer to a valid allocation is
+ * guaranteed to be a clean read even without synchronization,
+ * because the rtree update became visible in memory before the
+ * pointer came into existence.
+ */
+ return (elm->val);
+ } else {
+ /*
+ * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
+ * dependent on a previous rtree write, which means a stale read
+ * could result if synchronization were omitted here.
+ */
+ return (atomic_read_p(&elm->pun));
+ }
+}
+
+JEMALLOC_INLINE void
+rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
+{
+
+ atomic_write_p(&elm->pun, val);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ /* Double-checked read (first read may be stale. */
+ subtree = rtree->levels[level].subtree;
+ if (!rtree_node_valid(subtree))
+ subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+ return (subtree);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ subtree = rtree_subtree_tryread(rtree, level);
+ if (unlikely(!rtree_node_valid(subtree)))
+ subtree = rtree_subtree_read_hard(rtree, level);
+ return (subtree);
+}
+
+JEMALLOC_INLINE extent_node_t *
+rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
{
uintptr_t subkey;
- unsigned i, lshift, height, bits;
- void **node, **child;
-
- malloc_mutex_lock(&rtree->mutex);
- for (i = lshift = 0, height = rtree->height, node = rtree->root;
- i < height - 1;
- i++, lshift += bits, node = child) {
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
- bits);
- child = (void**)node[subkey];
- if (child == NULL) {
- size_t size = ((i + 1 < height - 1) ? sizeof(void *)
- : (sizeof(uint8_t))) << rtree->level2bits[i+1];
- child = (void**)rtree->alloc(size);
- if (child == NULL) {
- malloc_mutex_unlock(&rtree->mutex);
- return (true);
- }
- memset(child, 0, size);
- node[subkey] = child;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
+
+ start_level = rtree_start_level(rtree, key);
+
+ for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
+ /**/; i++, node = child) {
+ if (!dependent && unlikely(!rtree_node_valid(node)))
+ return (NULL);
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ return (rtree_val_read(rtree, &node[subkey],
+ dependent));
}
+ assert(i < rtree->height - 1);
+ child = rtree_child_tryread(&node[subkey]);
}
+ not_reached();
+}
- /* node is a leaf, so it contains values rather than node pointers. */
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
- {
- uint8_t *leaf = (uint8_t *)node;
- leaf[subkey] = val;
- }
- malloc_mutex_unlock(&rtree->mutex);
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
+{
+ uintptr_t subkey;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
- return (false);
+ start_level = rtree_start_level(rtree, key);
+
+ node = rtree_subtree_read(rtree, start_level);
+ if (node == NULL)
+ return (true);
+ for (i = start_level; /**/; i++, node = child) {
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ rtree_val_write(rtree, &node[subkey], val);
+ return (false);
+ }
+ assert(i + 1 < rtree->height);
+ child = rtree_child_read(rtree, &node[subkey], i);
+ if (child == NULL)
+ return (true);
+ }
+ not_reached();
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
index 29c80c1fb..fc82036d3 100755
--- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh
+++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
@@ -1,17 +1,26 @@
#!/bin/sh
+#
+# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g>
# The following limits are chosen such that they cover all supported platforms.
-# Range of quanta.
-lg_qmin=3
-lg_qmax=4
+# Pointer sizes.
+lg_zarr="2 3"
+
+# Quanta.
+lg_qarr=$1
# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
-lg_tmin=3
+lg_tmin=$2
+
+# Maximum lookup size.
+lg_kmax=12
+
+# Page sizes.
+lg_parr=`echo $3 | tr ',' ' '`
-# Range of page sizes.
-lg_pmin=12
-lg_pmax=16
+# Size class group size (number of size classes for each size doubling).
+lg_g=$4
pow2() {
e=$1
@@ -22,68 +31,224 @@ pow2() {
done
}
+lg() {
+ x=$1
+ lg_result=0
+ while [ ${x} -gt 1 ] ; do
+ lg_result=$((${lg_result} + 1))
+ x=$((${x} / 2))
+ done
+}
+
+size_class() {
+ index=$1
+ lg_grp=$2
+ lg_delta=$3
+ ndelta=$4
+ lg_p=$5
+ lg_kmax=$6
+
+ lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
+ if [ ${pow2_result} -lt ${ndelta} ] ; then
+ rem="yes"
+ else
+ rem="no"
+ fi
+
+ lg_size=${lg_grp}
+ if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
+ lg_size=$((${lg_grp} + 1))
+ else
+ lg_size=${lg_grp}
+ rem="yes"
+ fi
+
+ if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
+ bin="yes"
+ else
+ bin="no"
+ fi
+ if [ ${lg_size} -lt ${lg_kmax} \
+ -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
+ lg_delta_lookup=${lg_delta}
+ else
+ lg_delta_lookup="no"
+ fi
+ printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+ # Defined upon return:
+ # - lg_delta_lookup (${lg_delta} or "no")
+ # - bin ("yes" or "no")
+}
+
+sep_line() {
+ echo " \\"
+}
+
+size_classes() {
+ lg_z=$1
+ lg_q=$2
+ lg_t=$3
+ lg_p=$4
+ lg_g=$5
+
+ pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
+ pow2 ${lg_g}; g=${pow2_result}
+
+ echo "#define SIZE_CLASSES \\"
+ echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+
+ ntbins=0
+ nlbins=0
+ lg_tiny_maxclass='"NA"'
+ nbins=0
+
+ # Tiny size classes.
+ ndelta=0
+ index=0
+ lg_grp=${lg_t}
+ lg_delta=${lg_grp}
+ while [ ${lg_grp} -lt ${lg_q} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ fi
+ ntbins=$((${ntbins} + 1))
+ lg_tiny_maxclass=${lg_grp} # Final written value is correct.
+ index=$((${index} + 1))
+ lg_delta=${lg_grp}
+ lg_grp=$((${lg_grp} + 1))
+ done
+
+ # First non-tiny group.
+ if [ ${ntbins} -gt 0 ] ; then
+ sep_line
+ # The first size class has an unusual encoding, because the size has to be
+ # split between grp and delta*ndelta.
+ lg_grp=$((${lg_grp} - 1))
+ ndelta=1
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ fi
+ while [ ${ndelta} -lt ${g} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ done
+
+ # All remaining groups.
+ lg_grp=$((${lg_grp} + ${lg_g}))
+ while [ ${lg_grp} -lt ${ptr_bits} ] ; do
+ sep_line
+ ndelta=1
+ if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then
+ ndelta_limit=$((${g} - 1))
+ else
+ ndelta_limit=${g}
+ fi
+ while [ ${ndelta} -le ${ndelta_limit} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ # Final written value is correct:
+ lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ # Final written value is correct:
+ small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ if [ ${lg_g} -gt 0 ] ; then
+ lg_large_minclass=$((${lg_grp} + 1))
+ else
+ lg_large_minclass=$((${lg_grp} + 2))
+ fi
+ fi
+ # Final written value is correct:
+ huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ done
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ done
+ echo
+ nsizes=${index}
+
+ # Defined upon completion:
+ # - ntbins
+ # - nlbins
+ # - nbins
+ # - nsizes
+ # - lg_tiny_maxclass
+ # - lookup_maxclass
+ # - small_maxclass
+ # - lg_large_minclass
+ # - huge_maxclass
+}
+
cat <<EOF
/* This file was automatically generated by size_classes.sh. */
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+/*
+ * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
+ * be defined prior to inclusion, and it in turn defines:
+ *
+ * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
+ * SIZE_CLASSES: Complete table of
+ * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
+ * tuples.
+ * index: Size class index.
+ * lg_grp: Lg group base size (no deltas added).
+ * lg_delta: Lg delta to previous size class.
+ * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta
+ * bin: 'yes' if a small bin size class, 'no' otherwise.
+ * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
+ * otherwise.
+ * NTBINS: Number of tiny bins.
+ * NLBINS: Number of bins supported by the lookup table.
+ * NBINS: Number of small size class bins.
+ * NSIZES: Number of size classes.
+ * LG_TINY_MAXCLASS: Lg of maximum tiny size class.
+ * LOOKUP_MAXCLASS: Maximum size class included in lookup table.
+ * SMALL_MAXCLASS: Maximum small size class.
+ * LG_LARGE_MINCLASS: Lg of minimum large size class.
+ * HUGE_MAXCLASS: Maximum (huge) size class.
+ */
+
+#define LG_SIZE_CLASS_GROUP ${lg_g}
+
EOF
-lg_q=${lg_qmin}
-while [ ${lg_q} -le ${lg_qmax} ] ; do
- lg_t=${lg_tmin}
- while [ ${lg_t} -le ${lg_q} ] ; do
- lg_p=${lg_pmin}
- while [ ${lg_p} -le ${lg_pmax} ] ; do
- echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
- echo "#define SIZE_CLASSES_DEFINED"
- pow2 ${lg_q}; q=${pow2_result}
- pow2 ${lg_t}; t=${pow2_result}
- pow2 ${lg_p}; p=${pow2_result}
- bin=0
- psz=0
- sz=${t}
- delta=$((${sz} - ${psz}))
- echo "/* SIZE_CLASS(bin, delta, sz) */"
- echo "#define SIZE_CLASSES \\"
-
- # Tiny size classes.
- while [ ${sz} -lt ${q} ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${sz}))
- delta=$((${sz} - ${psz}))
- done
- # Quantum-multiple size classes. For each doubling of sz, as many as 4
- # size classes exist. Their spacing is the greater of:
- # - q
- # - sz/4, where sz is a power of 2
- while [ ${sz} -lt ${p} ] ; do
- if [ ${sz} -ge $((${q} * 4)) ] ; then
- i=$((${sz} / 4))
- else
- i=${q}
- fi
- next_2pow=$((${sz} * 2))
- while [ ${sz} -lt $next_2pow ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${i}))
- delta=$((${sz} - ${psz}))
- done
+for lg_z in ${lg_zarr} ; do
+ for lg_q in ${lg_qarr} ; do
+ lg_t=${lg_tmin}
+ while [ ${lg_t} -le ${lg_q} ] ; do
+ # Iterate through page sizes and compute how many bins there are.
+ for lg_p in ${lg_parr} ; do
+ echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+ size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
+ echo "#define SIZE_CLASSES_DEFINED"
+ echo "#define NTBINS ${ntbins}"
+ echo "#define NLBINS ${nlbins}"
+ echo "#define NBINS ${nbins}"
+ echo "#define NSIZES ${nsizes}"
+ echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}"
+ echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}"
+ echo "#define SMALL_MAXCLASS ${small_maxclass}"
+ echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}"
+ echo "#define HUGE_MAXCLASS ${huge_maxclass}"
+ echo "#endif"
+ echo
done
- echo
- echo "#define NBINS ${bin}"
- echo "#define SMALL_MAXCLASS ${psz}"
- echo "#endif"
- echo
- lg_p=$((${lg_p} + 1))
+ lg_t=$((${lg_t} + 1))
done
- lg_t=$((${lg_t} + 1))
done
- lg_q=$((${lg_q} + 1))
done
cat <<EOF
@@ -92,11 +257,10 @@ cat <<EOF
#endif
#undef SIZE_CLASSES_DEFINED
/*
- * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
* cannot support more than 256 small size classes. Further constrain NBINS to
- * 255 to support prof_promote, since all small size classes, plus a "not
- * small" size class must be stored in 8 bits of arena_chunk_map_t's bits
- * field.
+ * 255 since all small size classes, plus a "not small" size class must be
+ * stored in 8 bits of arena_chunk_map_bits_t's bits field.
*/
#if (NBINS > 255)
# error "Too many small size classes"
diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h
index 27f68e368..c91dba99d 100644
--- a/deps/jemalloc/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats.h
@@ -4,6 +4,7 @@
typedef struct tcache_bin_stats_s tcache_bin_stats_t;
typedef struct malloc_bin_stats_s malloc_bin_stats_t;
typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct malloc_huge_stats_s malloc_huge_stats_t;
typedef struct arena_stats_s arena_stats_t;
typedef struct chunk_stats_s chunk_stats_t;
@@ -21,12 +22,6 @@ struct tcache_bin_stats_s {
struct malloc_bin_stats_s {
/*
- * Current number of bytes allocated, including objects currently
- * cached by tcache.
- */
- size_t allocated;
-
- /*
* Total number of allocation/deallocation requests served directly by
* the bin. Note that tcache may allocate an object, then recycle it
* many times, resulting many increments to nrequests, but only one
@@ -42,6 +37,12 @@ struct malloc_bin_stats_s {
*/
uint64_t nrequests;
+ /*
+ * Current number of regions of this size class, including regions
+ * currently cached by tcache.
+ */
+ size_t curregs;
+
/* Number of tcache fills from this bin. */
uint64_t nfills;
@@ -78,10 +79,25 @@ struct malloc_large_stats_s {
*/
uint64_t nrequests;
- /* Current number of runs of this size class. */
+ /*
+ * Current number of runs of this size class, including runs currently
+ * cached by tcache.
+ */
size_t curruns;
};
+struct malloc_huge_stats_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the arena.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /* Current number of (multi-)chunk allocations of this size class. */
+ size_t curhchunks;
+};
+
struct arena_stats_s {
/* Number of bytes currently mapped. */
size_t mapped;
@@ -95,34 +111,28 @@ struct arena_stats_s {
uint64_t nmadvise;
uint64_t purged;
+ /*
+ * Number of bytes currently mapped purely for metadata purposes, and
+ * number of bytes currently allocated for internal metadata.
+ */
+ size_t metadata_mapped;
+ size_t metadata_allocated; /* Protected via atomic_*_z(). */
+
/* Per-size-category statistics. */
size_t allocated_large;
uint64_t nmalloc_large;
uint64_t ndalloc_large;
uint64_t nrequests_large;
- /*
- * One element for each possible size class, including sizes that
- * overlap with bin size classes. This is necessary because ipalloc()
- * sometimes has to use such large objects in order to assure proper
- * alignment.
- */
- malloc_large_stats_t *lstats;
-};
-
-struct chunk_stats_s {
- /* Number of chunks that were allocated. */
- uint64_t nchunks;
+ size_t allocated_huge;
+ uint64_t nmalloc_huge;
+ uint64_t ndalloc_huge;
- /* High-water mark for number of chunks allocated. */
- size_t highchunks;
+ /* One element for each large size class. */
+ malloc_large_stats_t *lstats;
- /*
- * Current number of chunks allocated. This value isn't maintained for
- * any other purpose, so keep track of it in order to be able to set
- * highchunks.
- */
- size_t curchunks;
+ /* One element for each huge size class. */
+ malloc_huge_stats_t *hstats;
};
#endif /* JEMALLOC_H_STRUCTS */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h
index c3d4b58d4..5079cd266 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache.h
@@ -4,6 +4,7 @@
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
+typedef struct tcaches_s tcaches_t;
/*
* tcache pointers close to NULL are used to encode state information that is
@@ -16,6 +17,11 @@ typedef struct tcache_s tcache_t;
#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
/*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define TCACHE_NSLOTS_SMALL_MIN 20
+
+/*
* Absolute maximum number of cache slots for each small bin in the thread
* cache. This is an additional constraint beyond that imposed as: twice the
* number of regions per run for this size class.
@@ -69,10 +75,9 @@ struct tcache_bin_s {
struct tcache_s {
ql_elm(tcache_t) link; /* Used for aggregating stats. */
- uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
- arena_t *arena; /* This thread's arena. */
+ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
unsigned ev_cnt; /* Event count since incremental GC. */
- unsigned next_gc_bin; /* Next bin to GC. */
+ szind_t next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */
/*
* The pointer stacks associated with tbins follow as a contiguous
@@ -82,6 +87,14 @@ struct tcache_s {
*/
};
+/* Linkage for list of available (previously used) explicit tcache IDs. */
+struct tcaches_s {
+ union {
+ tcache_t *tcache;
+ tcaches_t *next;
+ };
+};
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
@@ -95,84 +108,90 @@ extern tcache_bin_info_t *tcache_bin_info;
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
* large-object bins.
*/
-extern size_t nhbins;
+extern size_t nhbins;
/* Maximum cached size class. */
-extern size_t tcache_maxclass;
+extern size_t tcache_maxclass;
+
+/*
+ * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
+ * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
+ * completely disjoint from this data structure. tcaches starts off as a sparse
+ * array, so it has no physical memory footprint until individual pages are
+ * touched. This allows the entire array to be allocated the first time an
+ * explicit tcache is created without a disproportionate impact on memory usage.
+ */
+extern tcaches_t *tcaches;
size_t tcache_salloc(const void *ptr);
-void tcache_event_hard(tcache_t *tcache);
-void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
- size_t binind);
-void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
-void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
+void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ tcache_bin_t *tbin, szind_t binind);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+ szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+ unsigned rem, tcache_t *tcache);
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void tcache_arena_dissociate(tcache_t *tcache);
-tcache_t *tcache_create(arena_t *arena);
-void tcache_destroy(tcache_t *tcache);
-void tcache_thread_cleanup(void *arg);
+void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
+ arena_t *newarena);
+void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+tcache_t *tcache_get_hard(tsd_t *tsd);
+tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_enabled_cleanup(tsd_t *tsd);
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
-bool tcache_boot0(void);
-bool tcache_boot1(void);
+bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
-
-void tcache_event(tcache_t *tcache);
+void tcache_event(tsd_t *tsd, tcache_t *tcache);
void tcache_flush(void);
bool tcache_enabled_get(void);
-tcache_t *tcache_get(bool create);
+tcache_t *tcache_get(tsd_t *tsd, bool create);
void tcache_enabled_set(bool enabled);
void *tcache_alloc_easy(tcache_bin_t *tbin);
-void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
-void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
-void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ size_t size, bool zero);
+void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ size_t size, bool zero);
+void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
+ szind_t binind);
+void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
+ size_t size);
+tcache_t *tcaches_get(tsd_t *tsd, unsigned ind);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
-/* Map of thread-specific caches. */
-malloc_tsd_externs(tcache, tcache_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL,
- tcache_thread_cleanup)
-/* Per thread flag that allows thread caches to be disabled. */
-malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t,
- tcache_enabled_default, malloc_tsd_no_cleanup)
-
JEMALLOC_INLINE void
tcache_flush(void)
{
- tcache_t *tcache;
+ tsd_t *tsd;
cassert(config_tcache);
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
- return;
- tcache_destroy(tcache);
- tcache = NULL;
- tcache_tsd_set(&tcache);
+ tsd = tsd_fetch();
+ tcache_cleanup(tsd);
}
JEMALLOC_INLINE bool
tcache_enabled_get(void)
{
+ tsd_t *tsd;
tcache_enabled_t tcache_enabled;
cassert(config_tcache);
- tcache_enabled = *tcache_enabled_tsd_get();
+ tsd = tsd_fetch();
+ tcache_enabled = tsd_tcache_enabled_get(tsd);
if (tcache_enabled == tcache_enabled_default) {
tcache_enabled = (tcache_enabled_t)opt_tcache;
- tcache_enabled_tsd_set(&tcache_enabled);
+ tsd_tcache_enabled_set(tsd, tcache_enabled);
}
return ((bool)tcache_enabled);
@@ -181,85 +200,41 @@ tcache_enabled_get(void)
JEMALLOC_INLINE void
tcache_enabled_set(bool enabled)
{
+ tsd_t *tsd;
tcache_enabled_t tcache_enabled;
- tcache_t *tcache;
cassert(config_tcache);
+ tsd = tsd_fetch();
+
tcache_enabled = (tcache_enabled_t)enabled;
- tcache_enabled_tsd_set(&tcache_enabled);
- tcache = *tcache_tsd_get();
- if (enabled) {
- if (tcache == TCACHE_STATE_DISABLED) {
- tcache = NULL;
- tcache_tsd_set(&tcache);
- }
- } else /* disabled */ {
- if (tcache > TCACHE_STATE_MAX) {
- tcache_destroy(tcache);
- tcache = NULL;
- }
- if (tcache == NULL) {
- tcache = TCACHE_STATE_DISABLED;
- tcache_tsd_set(&tcache);
- }
- }
+ tsd_tcache_enabled_set(tsd, tcache_enabled);
+
+ if (!enabled)
+ tcache_cleanup(tsd);
}
JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(bool create)
+tcache_get(tsd_t *tsd, bool create)
{
tcache_t *tcache;
- if (config_tcache == false)
- return (NULL);
- if (config_lazy_lock && isthreaded == false)
+ if (!config_tcache)
return (NULL);
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
- if (tcache == TCACHE_STATE_DISABLED)
- return (NULL);
- if (tcache == NULL) {
- if (create == false) {
- /*
- * Creating a tcache here would cause
- * allocation as a side effect of free().
- * Ordinarily that would be okay since
- * tcache_create() failure is a soft failure
- * that doesn't propagate. However, if TLS
- * data are freed via free() as in glibc,
- * subtle corruption could result from setting
- * a TLS variable after its backing memory is
- * freed.
- */
- return (NULL);
- }
- if (tcache_enabled_get() == false) {
- tcache_enabled_set(false); /* Memoize. */
- return (NULL);
- }
- return (tcache_create(choose_arena(NULL)));
- }
- if (tcache == TCACHE_STATE_PURGATORY) {
- /*
- * Make a note that an allocator function was called
- * after tcache_thread_cleanup() was called.
- */
- tcache = TCACHE_STATE_REINCARNATED;
- tcache_tsd_set(&tcache);
- return (NULL);
- }
- if (tcache == TCACHE_STATE_REINCARNATED)
- return (NULL);
- not_reached();
+ tcache = tsd_tcache_get(tsd);
+ if (!create)
+ return (tcache);
+ if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
+ tcache = tcache_get_hard(tsd);
+ tsd_tcache_set(tsd, tcache);
}
return (tcache);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_event(tcache_t *tcache)
+tcache_event(tsd_t *tsd, tcache_t *tcache)
{
if (TCACHE_GC_INCR == 0)
@@ -267,8 +242,8 @@ tcache_event(tcache_t *tcache)
tcache->ev_cnt++;
assert(tcache->ev_cnt <= TCACHE_GC_INCR);
- if (tcache->ev_cnt == TCACHE_GC_INCR)
- tcache_event_hard(tcache);
+ if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR))
+ tcache_event_hard(tsd, tcache);
}
JEMALLOC_ALWAYS_INLINE void *
@@ -276,85 +251,87 @@ tcache_alloc_easy(tcache_bin_t *tbin)
{
void *ret;
- if (tbin->ncached == 0) {
+ if (unlikely(tbin->ncached == 0)) {
tbin->low_water = -1;
return (NULL);
}
tbin->ncached--;
- if ((int)tbin->ncached < tbin->low_water)
+ if (unlikely((int)tbin->ncached < tbin->low_water))
tbin->low_water = tbin->ncached;
ret = tbin->avail[tbin->ncached];
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+ bool zero)
{
void *ret;
- size_t binind;
+ szind_t binind;
+ size_t usize;
tcache_bin_t *tbin;
- binind = SMALL_SIZE2BIN(size);
+ binind = size2index(size);
assert(binind < NBINS);
tbin = &tcache->tbins[binind];
- size = arena_bin_info[binind].reg_size;
+ usize = index2size(binind);
ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
- ret = tcache_alloc_small_hard(tcache, tbin, binind);
+ if (unlikely(ret == NULL)) {
+ ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);
if (ret == NULL)
return (NULL);
}
- assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
+ assert(tcache_salloc(ret) == usize);
- if (zero == false) {
+ if (likely(!zero)) {
if (config_fill) {
- if (opt_junk) {
+ if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
&arena_bin_info[binind], false);
- } else if (opt_zero)
- memset(ret, 0, size);
+ } else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
} else {
- if (config_fill && opt_junk) {
+ if (config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret, &arena_bin_info[binind],
true);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
+ memset(ret, 0, usize);
}
if (config_stats)
tbin->tstats.nrequests++;
if (config_prof)
- tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
- tcache_event(tcache);
+ tcache->prof_accumbytes += usize;
+ tcache_event(tsd, tcache);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+ bool zero)
{
void *ret;
- size_t binind;
+ szind_t binind;
+ size_t usize;
tcache_bin_t *tbin;
- size = PAGE_CEILING(size);
- assert(size <= tcache_maxclass);
- binind = NBINS + (size >> LG_PAGE) - 1;
+ binind = size2index(size);
+ usize = index2size(binind);
+ assert(usize <= tcache_maxclass);
assert(binind < nhbins);
tbin = &tcache->tbins[binind];
ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
+ if (unlikely(ret == NULL)) {
/*
* Only allocate one large object at a time, because it's quite
* expensive to create one and not use it.
*/
- ret = arena_malloc_large(tcache->arena, size, zero);
+ ret = arena_malloc_large(arena, usize, zero);
if (ret == NULL)
return (NULL);
} else {
- if (config_prof && prof_promote && size == PAGE) {
+ if (config_prof && usize == LARGE_MINCLASS) {
arena_chunk_t *chunk =
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
@@ -362,57 +339,54 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
arena_mapbits_large_binind_set(chunk, pageind,
BININD_INVALID);
}
- if (zero == false) {
+ if (likely(!zero)) {
if (config_fill) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
+ if (unlikely(opt_junk_alloc))
+ memset(ret, 0xa5, usize);
+ else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- } else {
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
- }
+ } else
+ memset(ret, 0, usize);
if (config_stats)
tbin->tstats.nrequests++;
if (config_prof)
- tcache->prof_accumbytes += size;
+ tcache->prof_accumbytes += usize;
}
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
{
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
- if (config_fill && opt_junk)
+ if (config_fill && unlikely(opt_junk_free))
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
tbin = &tcache->tbins[binind];
tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
+ if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+ tcache_bin_flush_small(tsd, tcache, tbin, binind,
+ (tbin_info->ncached_max >> 1));
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
{
- size_t binind;
+ szind_t binind;
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
@@ -420,22 +394,31 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
assert(tcache_salloc(ptr) <= tcache_maxclass);
- binind = NBINS + (size >> LG_PAGE) - 1;
+ binind = size2index(size);
- if (config_fill && opt_junk)
- memset(ptr, 0x5a, size);
+ if (config_fill && unlikely(opt_junk_free))
+ arena_dalloc_junk_large(ptr, size);
tbin = &tcache->tbins[binind];
tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
+ if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+ tcache_bin_flush_large(tsd, tbin, binind,
+ (tbin_info->ncached_max >> 1), tcache);
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcaches_get(tsd_t *tsd, unsigned ind)
+{
+ tcaches_t *elm = &tcaches[ind];
+ if (unlikely(elm->tcache == NULL))
+ elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+ return (elm->tcache);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
index 9fb4a23ec..eed7aa013 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -2,7 +2,7 @@
#ifdef JEMALLOC_H_TYPES
/* Maximum number of malloc_tsd users with cleanup functions. */
-#define MALLOC_TSD_CLEANUPS_MAX 8
+#define MALLOC_TSD_CLEANUPS_MAX 2
typedef bool (*malloc_tsd_cleanup_t)(void);
@@ -12,9 +12,18 @@ typedef struct tsd_init_block_s tsd_init_block_t;
typedef struct tsd_init_head_s tsd_init_head_t;
#endif
+typedef struct tsd_s tsd_t;
+
+typedef enum {
+ tsd_state_uninitialized,
+ tsd_state_nominal,
+ tsd_state_purgatory,
+ tsd_state_reincarnated
+} tsd_state_t;
+
/*
* TLS/TSD-agnostic macro-based implementation of thread-specific data. There
- * are four macros that support (at least) three use cases: file-private,
+ * are five macros that support (at least) three use cases: file-private,
* library-private, and library-private inlined. Following is an example
* library-private tsd variable:
*
@@ -24,34 +33,36 @@ typedef struct tsd_init_head_s tsd_init_head_t;
* int y;
* } example_t;
* #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
- * malloc_tsd_protos(, example, example_t *)
- * malloc_tsd_externs(example, example_t *)
+ * malloc_tsd_types(example_, example_t)
+ * malloc_tsd_protos(, example_, example_t)
+ * malloc_tsd_externs(example_, example_t)
* In example.c:
- * malloc_tsd_data(, example, example_t *, EX_INITIALIZER)
- * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER,
+ * malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
+ * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
* example_tsd_cleanup)
*
* The result is a set of generated functions, e.g.:
*
* bool example_tsd_boot(void) {...}
- * example_t **example_tsd_get() {...}
- * void example_tsd_set(example_t **val) {...}
+ * example_t *example_tsd_get() {...}
+ * void example_tsd_set(example_t *val) {...}
*
* Note that all of the functions deal in terms of (a_type *) rather than
- * (a_type) so that it is possible to support non-pointer types (unlike
+ * (a_type) so that it is possible to support non-pointer types (unlike
* pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is
- * cast to (void *). This means that the cleanup function needs to cast *and*
- * dereference the function argument, e.g.:
+ * cast to (void *). This means that the cleanup function needs to cast the
+ * function argument to (a_type *), then dereference the resulting pointer to
+ * access fields, e.g.
*
* void
* example_tsd_cleanup(void *arg)
* {
- * example_t *example = *(example_t **)arg;
+ * example_t *example = (example_t *)arg;
*
+ * example->x = 42;
* [...]
- * if ([want the cleanup function to be called again]) {
- * example_tsd_set(&example);
- * }
+ * if ([want the cleanup function to be called again])
+ * example_tsd_set(example);
* }
*
* If example_tsd_set() is called within example_tsd_cleanup(), it will be
@@ -60,63 +71,96 @@ typedef struct tsd_init_head_s tsd_init_head_t;
* non-NULL.
*/
+/* malloc_tsd_types(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(JEMALLOC_TLS))
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(_WIN32))
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#else
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#endif
+
/* malloc_tsd_protos(). */
#define malloc_tsd_protos(a_attr, a_name, a_type) \
a_attr bool \
-a_name##_tsd_boot(void); \
+a_name##tsd_boot0(void); \
+a_attr void \
+a_name##tsd_boot1(void); \
+a_attr bool \
+a_name##tsd_boot(void); \
a_attr a_type * \
-a_name##_tsd_get(void); \
+a_name##tsd_get(void); \
a_attr void \
-a_name##_tsd_set(a_type *val);
+a_name##tsd_set(a_type *val);
/* malloc_tsd_externs(). */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern __thread bool a_name##_initialized; \
-extern bool a_name##_booted;
+extern __thread a_type a_name##tsd_tls; \
+extern __thread bool a_name##tsd_initialized; \
+extern bool a_name##tsd_booted;
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern pthread_key_t a_name##_tsd; \
-extern bool a_name##_booted;
+extern __thread a_type a_name##tsd_tls; \
+extern pthread_key_t a_name##tsd_tsd; \
+extern bool a_name##tsd_booted;
#elif (defined(_WIN32))
#define malloc_tsd_externs(a_name, a_type) \
-extern DWORD a_name##_tsd; \
-extern bool a_name##_booted;
+extern DWORD a_name##tsd_tsd; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
+extern bool a_name##tsd_booted;
#else
#define malloc_tsd_externs(a_name, a_type) \
-extern pthread_key_t a_name##_tsd; \
-extern tsd_init_head_t a_name##_tsd_init_head; \
-extern bool a_name##_booted;
+extern pthread_key_t a_name##tsd_tsd; \
+extern tsd_init_head_t a_name##tsd_init_head; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
+extern bool a_name##tsd_booted;
#endif
/* malloc_tsd_data(). */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
+ a_name##tsd_tls = a_initializer; \
a_attr __thread bool JEMALLOC_TLS_MODEL \
- a_name##_initialized = false; \
-a_attr bool a_name##_booted = false;
+ a_name##tsd_initialized = false; \
+a_attr bool a_name##tsd_booted = false;
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+ a_name##tsd_tls = a_initializer; \
+a_attr pthread_key_t a_name##tsd_tsd; \
+a_attr bool a_name##tsd_booted = false;
#elif (defined(_WIN32))
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr DWORD a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+a_attr DWORD a_name##tsd_tsd; \
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
+a_attr bool a_name##tsd_booted = false;
#else
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr tsd_init_head_t a_name##_tsd_init_head = { \
+a_attr pthread_key_t a_name##tsd_tsd; \
+a_attr tsd_init_head_t a_name##tsd_init_head = { \
ql_head_initializer(blocks), \
MALLOC_MUTEX_INITIALIZER \
}; \
-a_attr bool a_name##_booted = false;
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
+a_attr bool a_name##tsd_booted = false;
#endif
/* malloc_tsd_funcs(). */
@@ -125,75 +169,100 @@ a_attr bool a_name##_booted = false;
a_cleanup) \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
+a_name##tsd_cleanup_wrapper(void) \
{ \
\
- if (a_name##_initialized) { \
- a_name##_initialized = false; \
- a_cleanup(&a_name##_tls); \
+ if (a_name##tsd_initialized) { \
+ a_name##tsd_initialized = false; \
+ a_cleanup(&a_name##tsd_tls); \
} \
- return (a_name##_initialized); \
+ return (a_name##tsd_initialized); \
} \
a_attr bool \
-a_name##_tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
+ &a_name##tsd_cleanup_wrapper); \
} \
- a_name##_booted = true; \
+ a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
\
- assert(a_name##_booted); \
- return (&a_name##_tls); \
+ assert(a_name##tsd_booted); \
+ return (&a_name##tsd_tls); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
\
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
+ assert(a_name##tsd_booted); \
+ a_name##tsd_tls = (*val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
- a_name##_initialized = true; \
+ a_name##tsd_initialized = true; \
}
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \
+ if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \
+ 0) \
return (true); \
} \
- a_name##_booted = true; \
+ a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
\
- assert(a_name##_booted); \
- return (&a_name##_tls); \
+ assert(a_name##tsd_booted); \
+ return (&a_name##tsd_tls); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
\
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
+ assert(a_name##tsd_booted); \
+ a_name##tsd_tls = (*val); \
if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)(&a_name##_tls))) { \
+ if (pthread_setspecific(a_name##tsd_tsd, \
+ (void *)(&a_name##tsd_tls))) { \
malloc_write("<jemalloc>: Error" \
" setting TSD for "#a_name"\n"); \
if (opt_abort) \
@@ -204,27 +273,21 @@ a_name##_tsd_set(a_type *val) \
#elif (defined(_WIN32))
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
+a_name##tsd_cleanup_wrapper(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ DWORD error = GetLastError(); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ TlsGetValue(a_name##tsd_tsd); \
+ SetLastError(error); \
\
- wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \
if (wrapper == NULL) \
return (false); \
if (a_cleanup != malloc_tsd_no_cleanup && \
wrapper->initialized) { \
- a_type val = wrapper->val; \
- a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- a_cleanup(&val); \
+ a_cleanup(&wrapper->val); \
if (wrapper->initialized) { \
/* Trigger another cleanup round. */ \
return (true); \
@@ -233,63 +296,95 @@ a_name##_tsd_cleanup_wrapper(void) \
malloc_tsd_dalloc(wrapper); \
return (false); \
} \
-a_attr bool \
-a_name##_tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- a_name##_tsd = TlsAlloc(); \
- if (a_name##_tsd == TLS_OUT_OF_INDEXES) \
- return (true); \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
+ if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
} \
- a_name##_booted = true; \
- return (false); \
} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
+a_attr a_name##tsd_wrapper_t * \
+a_name##tsd_wrapper_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- TlsGetValue(a_name##_tsd); \
+ DWORD error = GetLastError(); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ TlsGetValue(a_name##tsd_tsd); \
+ SetLastError(error); \
\
- if (wrapper == NULL) { \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
+ if (unlikely(wrapper == NULL)) { \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
if (wrapper == NULL) { \
malloc_write("<jemalloc>: Error allocating" \
" TSD for "#a_name"\n"); \
abort(); \
} else { \
- static a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
+ wrapper->val = a_initializer; \
} \
+ a_name##tsd_wrapper_set(wrapper); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ a_name##tsd_tsd = TlsAlloc(); \
+ if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \
+ return (true); \
+ if (a_cleanup != malloc_tsd_no_cleanup) { \
+ malloc_tsd_cleanup_register( \
+ &a_name##tsd_cleanup_wrapper); \
+ } \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -297,16 +392,11 @@ a_name##_tsd_set(a_type *val) \
#else
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr void \
-a_name##_tsd_cleanup_wrapper(void *arg) \
+a_name##tsd_cleanup_wrapper(void *arg) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \
\
if (a_cleanup != malloc_tsd_no_cleanup && \
wrapper->initialized) { \
@@ -314,7 +404,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \
a_cleanup(&wrapper->val); \
if (wrapper->initialized) { \
/* Trigger another cleanup round. */ \
- if (pthread_setspecific(a_name##_tsd, \
+ if (pthread_setspecific(a_name##tsd_tsd, \
(void *)wrapper)) { \
malloc_write("<jemalloc>: Error" \
" setting TSD for "#a_name"\n"); \
@@ -326,67 +416,97 @@ a_name##_tsd_cleanup_wrapper(void *arg) \
} \
malloc_tsd_dalloc(wrapper); \
} \
-a_attr bool \
-a_name##_tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- if (pthread_key_create(&a_name##_tsd, \
- a_name##_tsd_cleanup_wrapper) != 0) \
- return (true); \
- a_name##_booted = true; \
- return (false); \
+ if (pthread_setspecific(a_name##tsd_tsd, \
+ (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
+a_attr a_name##tsd_wrapper_t * \
+a_name##tsd_wrapper_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- pthread_getspecific(a_name##_tsd); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ pthread_getspecific(a_name##tsd_tsd); \
\
- if (wrapper == NULL) { \
+ if (unlikely(wrapper == NULL)) { \
tsd_init_block_t block; \
wrapper = tsd_init_check_recursion( \
- &a_name##_tsd_init_head, &block); \
+ &a_name##tsd_init_head, &block); \
if (wrapper) \
return (wrapper); \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
block.data = wrapper; \
if (wrapper == NULL) { \
malloc_write("<jemalloc>: Error allocating" \
" TSD for "#a_name"\n"); \
abort(); \
} else { \
- static a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
+ wrapper->val = a_initializer; \
} \
- tsd_init_finish(&a_name##_tsd_init_head, &block); \
+ a_name##tsd_wrapper_set(wrapper); \
+ tsd_init_finish(&a_name##tsd_init_head, &block); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ if (pthread_key_create(&a_name##tsd_tsd, \
+ a_name##tsd_cleanup_wrapper) != 0) \
+ return (true); \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -410,25 +530,136 @@ struct tsd_init_head_s {
};
#endif
+#define MALLOC_TSD \
+/* O(name, type) */ \
+ O(tcache, tcache_t *) \
+ O(thread_allocated, uint64_t) \
+ O(thread_deallocated, uint64_t) \
+ O(prof_tdata, prof_tdata_t *) \
+ O(arena, arena_t *) \
+ O(arenas_cache, arena_t **) \
+ O(narenas_cache, unsigned) \
+ O(arenas_cache_bypass, bool) \
+ O(tcache_enabled, tcache_enabled_t) \
+ O(quarantine, quarantine_t *) \
+
+#define TSD_INITIALIZER { \
+ tsd_state_uninitialized, \
+ NULL, \
+ 0, \
+ 0, \
+ NULL, \
+ NULL, \
+ NULL, \
+ 0, \
+ false, \
+ tcache_enabled_default, \
+ NULL \
+}
+
+struct tsd_s {
+ tsd_state_t state;
+#define O(n, t) \
+ t n;
+MALLOC_TSD
+#undef O
+};
+
+static const tsd_t tsd_initializer = TSD_INITIALIZER;
+
+malloc_tsd_types(, tsd_t)
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
void *malloc_tsd_malloc(size_t size);
void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_no_cleanup(void *);
+void malloc_tsd_no_cleanup(void *arg);
void malloc_tsd_cleanup_register(bool (*f)(void));
-void malloc_tsd_boot(void);
+bool malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
!defined(_WIN32))
void *tsd_init_check_recursion(tsd_init_head_t *head,
tsd_init_block_t *block);
void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
#endif
+void tsd_cleanup(void *arg);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
+
+tsd_t *tsd_fetch(void);
+bool tsd_nominal(tsd_t *tsd);
+#define O(n, t) \
+t *tsd_##n##p_get(tsd_t *tsd); \
+t tsd_##n##_get(tsd_t *tsd); \
+void tsd_##n##_set(tsd_t *tsd, t n);
+MALLOC_TSD
+#undef O
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
+malloc_tsd_externs(, tsd_t)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+ tsd_t *tsd = tsd_get();
+
+ if (unlikely(tsd->state != tsd_state_nominal)) {
+ if (tsd->state == tsd_state_uninitialized) {
+ tsd->state = tsd_state_nominal;
+ /* Trigger cleanup handler registration. */
+ tsd_set(tsd);
+ } else if (tsd->state == tsd_state_purgatory) {
+ tsd->state = tsd_state_reincarnated;
+ tsd_set(tsd);
+ } else
+ assert(tsd->state == tsd_state_reincarnated);
+ }
+
+ return (tsd);
+}
+
+JEMALLOC_INLINE bool
+tsd_nominal(tsd_t *tsd)
+{
+
+ return (tsd->state == tsd_state_nominal);
+}
+
+#define O(n, t) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get(tsd_t *tsd) \
+{ \
+ \
+ return (&tsd->n); \
+} \
+ \
+JEMALLOC_ALWAYS_INLINE t \
+tsd_##n##_get(tsd_t *tsd) \
+{ \
+ \
+ return (*tsd_##n##p_get(tsd)); \
+} \
+ \
+JEMALLOC_ALWAYS_INLINE void \
+tsd_##n##_set(tsd_t *tsd, t n) \
+{ \
+ \
+ assert(tsd->state == tsd_state_nominal); \
+ tsd->n = n; \
+}
+MALLOC_TSD
+#undef O
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h
index 6b938f746..b2ea740fd 100644
--- a/deps/jemalloc/include/jemalloc/internal/util.h
+++ b/deps/jemalloc/include/jemalloc/internal/util.h
@@ -1,6 +1,36 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+#ifdef _WIN32
+# ifdef _WIN64
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX "ll"
+# else
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX ""
+# endif
+# define FMTd32 "d"
+# define FMTu32 "u"
+# define FMTx32 "x"
+# define FMTd64 FMT64_PREFIX "d"
+# define FMTu64 FMT64_PREFIX "u"
+# define FMTx64 FMT64_PREFIX "x"
+# define FMTdPTR FMTPTR_PREFIX "d"
+# define FMTuPTR FMTPTR_PREFIX "u"
+# define FMTxPTR FMTPTR_PREFIX "x"
+#else
+# include <inttypes.h>
+# define FMTd32 PRId32
+# define FMTu32 PRIu32
+# define FMTx32 PRIx32
+# define FMTd64 PRId64
+# define FMTu64 PRIu64
+# define FMTx64 PRIx64
+# define FMTdPTR PRIdPTR
+# define FMTuPTR PRIuPTR
+# define FMTxPTR PRIxPTR
+#endif
+
/* Size of stack-allocated buffer passed to buferror(). */
#define BUFERROR_BUF 64
@@ -22,9 +52,33 @@
* uninitialized.
*/
#ifdef JEMALLOC_CC_SILENCE
-# define JEMALLOC_CC_SILENCE_INIT(v) = v
+# define JEMALLOC_CC_SILENCE_INIT(v) = v
#else
-# define JEMALLOC_CC_SILENCE_INIT(v)
+# define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+#define JEMALLOC_GNUC_PREREQ(major, minor) \
+ (!defined(__clang__) && \
+ (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
+#ifndef __has_builtin
+# define __has_builtin(builtin) (0)
+#endif
+#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \
+ (defined(__clang__) && __has_builtin(builtin))
+
+#ifdef __GNUC__
+# define likely(x) __builtin_expect(!!(x), 1)
+# define unlikely(x) __builtin_expect(!!(x), 0)
+# if JEMALLOC_GNUC_PREREQ(4, 6) || \
+ JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
+# define unreachable() __builtin_unreachable()
+# else
+# define unreachable()
+# endif
+#else
+# define likely(x) !!(x)
+# define unlikely(x) !!(x)
+# define unreachable()
#endif
/*
@@ -33,7 +87,7 @@
*/
#ifndef assert
#define assert(e) do { \
- if (config_debug && !(e)) { \
+ if (unlikely(config_debug && !(e))) { \
malloc_printf( \
"<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
__FILE__, __LINE__, #e); \
@@ -50,6 +104,7 @@
__FILE__, __LINE__); \
abort(); \
} \
+ unreachable(); \
} while (0)
#endif
@@ -65,14 +120,14 @@
#ifndef assert_not_implemented
#define assert_not_implemented(e) do { \
- if (config_debug && !(e)) \
+ if (unlikely(config_debug && !(e))) \
not_implemented(); \
} while (0)
#endif
/* Use to assert a particular configuration, e.g., cassert(config_debug). */
#define cassert(c) do { \
- if ((c) == false) \
+ if (unlikely(!(c))) \
not_reached(); \
} while (0)
@@ -96,25 +151,47 @@ void malloc_write(const char *s);
int malloc_vsnprintf(char *str, size_t size, const char *format,
va_list ap);
int malloc_snprintf(char *str, size_t size, const char *format, ...)
- JEMALLOC_ATTR(format(printf, 3, 4));
+ JEMALLOC_FORMAT_PRINTF(3, 4);
void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, va_list ap);
void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
- const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
-void malloc_printf(const char *format, ...)
- JEMALLOC_ATTR(format(printf, 1, 2));
+ const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
+int jemalloc_ffsl(long bitmap);
+int jemalloc_ffs(int bitmap);
size_t pow2_ceil(size_t x);
+size_t lg_floor(size_t x);
void set_errno(int errnum);
int get_errno(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+
+/* Sanity check. */
+#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS)
+# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure
+#endif
+
+JEMALLOC_ALWAYS_INLINE int
+jemalloc_ffsl(long bitmap)
+{
+
+ return (JEMALLOC_INTERNAL_FFSL(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE int
+jemalloc_ffs(int bitmap)
+{
+
+ return (JEMALLOC_INTERNAL_FFS(bitmap));
+}
+
/* Compute the smallest power of 2 that is >= x. */
JEMALLOC_INLINE size_t
pow2_ceil(size_t x)
@@ -133,7 +210,82 @@ pow2_ceil(size_t x)
return (x);
}
-/* Sets error code */
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+ size_t ret;
+
+ assert(x != 0);
+
+ asm ("bsr %1, %0"
+ : "=r"(ret) // Outputs.
+ : "r"(x) // Inputs.
+ );
+ return (ret);
+}
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+ unsigned long ret;
+
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == 3)
+ _BitScanReverse64(&ret, x);
+#elif (LG_SIZEOF_PTR == 2)
+ _BitScanReverse(&ret, x);
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+ return (ret);
+}
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
+ return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x));
+#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+ return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x));
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#else
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+ assert(x != 0);
+
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+ x |= (x >> 32);
+ if (x == KZU(0xffffffffffffffff))
+ return (63);
+ x++;
+ return (jemalloc_ffsl(x) - 2);
+#elif (LG_SIZEOF_PTR == 2)
+ if (x == KZU(0xffffffff))
+ return (31);
+ x++;
+ return (jemalloc_ffs(x) - 2);
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#endif
+
+/* Set error code. */
JEMALLOC_INLINE void
set_errno(int errnum)
{
@@ -145,7 +297,7 @@ set_errno(int errnum)
#endif
}
-/* Get last error code */
+/* Get last error code. */
JEMALLOC_INLINE int
get_errno(void)
{
diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h
new file mode 100644
index 000000000..a3380df92
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/valgrind.h
@@ -0,0 +1,112 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#ifdef JEMALLOC_VALGRIND
+#include <valgrind/valgrind.h>
+
+/*
+ * The size that is reported to Valgrind must be consistent through a chain of
+ * malloc..realloc..realloc calls. Request size isn't recorded anywhere in
+ * jemalloc, so it is critical that all callers of these macros provide usize
+ * rather than request size. As a result, buffer overflow detection is
+ * technically weakened for the standard API, though it is generally accepted
+ * practice to consider any extra bytes reported by malloc_usable_size() as
+ * usable space.
+ */
+#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_noaccess(ptr, usize); \
+} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_undefined(ptr, usize); \
+} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_defined(ptr, usize); \
+} while (0)
+/*
+ * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
+ * calls must be embedded in macros rather than in functions so that when
+ * Valgrind reports errors, there are no extra stack frames in the backtraces.
+ */
+#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
+ if (unlikely(in_valgrind && cond)) \
+ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
+} while (0)
+#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
+ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
+ zero) do { \
+ if (unlikely(in_valgrind)) { \
+ size_t rzsize = p2rz(ptr); \
+ \
+ if (!maybe_moved || ptr == old_ptr) { \
+ VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
+ usize, rzsize); \
+ if (zero && old_usize < usize) { \
+ valgrind_make_mem_defined( \
+ (void *)((uintptr_t)ptr + \
+ old_usize), usize - old_usize); \
+ } \
+ } else { \
+ if (!old_ptr_maybe_null || old_ptr != NULL) { \
+ valgrind_freelike_block(old_ptr, \
+ old_rzsize); \
+ } \
+ if (!ptr_maybe_null || ptr != NULL) { \
+ size_t copy_size = (old_usize < usize) \
+ ? old_usize : usize; \
+ size_t tail_size = usize - copy_size; \
+ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
+ rzsize, false); \
+ if (copy_size > 0) { \
+ valgrind_make_mem_defined(ptr, \
+ copy_size); \
+ } \
+ if (zero && tail_size > 0) { \
+ valgrind_make_mem_defined( \
+ (void *)((uintptr_t)ptr + \
+ copy_size), tail_size); \
+ } \
+ } \
+ } \
+ } \
+} while (0)
+#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_freelike_block(ptr, rzsize); \
+} while (0)
+#else
+#define RUNNING_ON_VALGRIND ((unsigned)0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
+#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
+ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
+ zero) do {} while (0)
+#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_VALGRIND
+void valgrind_make_mem_noaccess(void *ptr, size_t usize);
+void valgrind_make_mem_undefined(void *ptr, size_t usize);
+void valgrind_make_mem_defined(void *ptr, size_t usize);
+void valgrind_freelike_block(void *ptr, size_t usize);
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc/include/jemalloc/jemalloc.sh b/deps/jemalloc/include/jemalloc/jemalloc.sh
index e4738ebae..c085814f2 100755
--- a/deps/jemalloc/include/jemalloc/jemalloc.sh
+++ b/deps/jemalloc/include/jemalloc/jemalloc.sh
@@ -12,7 +12,7 @@ extern "C" {
EOF
for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
- jemalloc_protos.h jemalloc_mangle.h ; do
+ jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do
cat "${objroot}include/jemalloc/${hdr}" \
| grep -v 'Generated from .* by configure\.' \
| sed -e 's/^#define /#define /g' \
@@ -22,7 +22,7 @@ done
cat <<EOF
#ifdef __cplusplus
-};
+}
#endif
#endif /* JEMALLOC_H_ */
EOF
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
index eb38d7105..ab13c3758 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -1,8 +1,14 @@
/* Defined if __attribute__((...)) syntax is supported. */
#undef JEMALLOC_HAVE_ATTR
-/* Support the experimental API. */
-#undef JEMALLOC_EXPERIMENTAL
+/* Defined if alloc_size attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+
+/* Defined if format(gnu_printf, ...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
+
+/* Defined if format(printf, ...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF
/*
* Define overrides for non-standard allocator-related functions if they are
@@ -20,5 +26,12 @@
*/
#undef JEMALLOC_USABLE_SIZE_CONST
+/*
+ * If defined, specify throw() for the public function prototypes when compiling
+ * with C++. The only justification for this is to match the prototypes that
+ * glibc defines.
+ */
+#undef JEMALLOC_USE_CXX_THROW
+
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#undef LG_SIZEOF_PTR
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
index 13dbdd912..a7028db34 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
@@ -1,3 +1,6 @@
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <limits.h>
#include <strings.h>
@@ -16,46 +19,88 @@
((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
# endif
# define MALLOCX_ZERO ((int)0x40)
-/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */
-# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8))
+/*
+ * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
+ * encodes MALLOCX_TCACHE_NONE.
+ */
+# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8))
+# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1)
+/*
+ * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
+ */
+# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20))
-#ifdef JEMALLOC_EXPERIMENTAL
-# define ALLOCM_LG_ALIGN(la) (la)
-# if LG_SIZEOF_PTR == 2
-# define ALLOCM_ALIGN(a) (ffs(a)-1)
-# else
-# define ALLOCM_ALIGN(a) \
- ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
-# endif
-# define ALLOCM_ZERO ((int)0x40)
-# define ALLOCM_NO_MOVE ((int)0x80)
-/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */
-# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8))
-# define ALLOCM_SUCCESS 0
-# define ALLOCM_ERR_OOM 1
-# define ALLOCM_ERR_NOT_MOVED 2
+#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
+# define JEMALLOC_CXX_THROW throw()
+#else
+# define JEMALLOC_CXX_THROW
#endif
#ifdef JEMALLOC_HAVE_ATTR
# define JEMALLOC_ATTR(s) __attribute__((s))
-# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
-# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s))
+# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2))
+# else
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
+# endif
+# ifndef JEMALLOC_EXPORT
+# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+# endif
+# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
+# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
+# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
+# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i))
+# else
+# define JEMALLOC_FORMAT_PRINTF(s, i)
+# endif
# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
+# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+# define JEMALLOC_RESTRICT_RETURN
+# define JEMALLOC_ALLOCATOR
#elif _MSC_VER
# define JEMALLOC_ATTR(s)
-# ifdef DLLEXPORT
-# define JEMALLOC_EXPORT __declspec(dllexport)
+# define JEMALLOC_ALIGNED(s) __declspec(align(s))
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
+# ifndef JEMALLOC_EXPORT
+# ifdef DLLEXPORT
+# define JEMALLOC_EXPORT __declspec(dllexport)
+# else
+# define JEMALLOC_EXPORT __declspec(dllimport)
+# endif
+# endif
+# define JEMALLOC_FORMAT_PRINTF(s, i)
+# define JEMALLOC_NOINLINE __declspec(noinline)
+# ifdef __cplusplus
+# define JEMALLOC_NOTHROW __declspec(nothrow)
# else
-# define JEMALLOC_EXPORT __declspec(dllimport)
+# define JEMALLOC_NOTHROW
# endif
-# define JEMALLOC_ALIGNED(s) __declspec(align(s))
# define JEMALLOC_SECTION(s) __declspec(allocate(s))
-# define JEMALLOC_NOINLINE __declspec(noinline)
+# define JEMALLOC_RESTRICT_RETURN __declspec(restrict)
+# if _MSC_VER >= 1900 && !defined(__EDG__)
+# define JEMALLOC_ALLOCATOR __declspec(allocator)
+# else
+# define JEMALLOC_ALLOCATOR
+# endif
#else
# define JEMALLOC_ATTR(s)
-# define JEMALLOC_EXPORT
# define JEMALLOC_ALIGNED(s)
-# define JEMALLOC_SECTION(s)
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
+# define JEMALLOC_EXPORT
+# define JEMALLOC_FORMAT_PRINTF(s, i)
# define JEMALLOC_NOINLINE
+# define JEMALLOC_NOTHROW
+# define JEMALLOC_SECTION(s)
+# define JEMALLOC_RESTRICT_RETURN
+# define JEMALLOC_ALLOCATOR
#endif
+
+/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint()
+ * function. */
+#define JEMALLOC_FRAG_HINT
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in b/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
index 25446de3d..a78414b19 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
@@ -7,52 +7,60 @@ extern JEMALLOC_EXPORT const char *@je_@malloc_conf;
extern JEMALLOC_EXPORT void (*@je_@malloc_message)(void *cbopaque,
const char *s);
-JEMALLOC_EXPORT void *@je_@malloc(size_t size) JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT void *@je_@calloc(size_t num, size_t size)
- JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT int @je_@posix_memalign(void **memptr, size_t alignment,
- size_t size) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT void *@je_@aligned_alloc(size_t alignment, size_t size)
- JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT void *@je_@realloc(void *ptr, size_t size);
-JEMALLOC_EXPORT void @je_@free(void *ptr);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@malloc(size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@calloc(size_t num, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@posix_memalign(void **memptr,
+ size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@aligned_alloc(size_t alignment,
+ size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc)
+ JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@realloc(void *ptr, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@free(void *ptr)
+ JEMALLOC_CXX_THROW;
-JEMALLOC_EXPORT void *@je_@mallocx(size_t size, int flags);
-JEMALLOC_EXPORT void *@je_@rallocx(void *ptr, size_t size, int flags);
-JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra,
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@mallocx(size_t size, int flags)
+ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@rallocx(void *ptr, size_t size,
+ int flags) JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@xallocx(void *ptr, size_t size,
+ size_t extra, int flags);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@sallocx(const void *ptr,
+ int flags) JEMALLOC_ATTR(pure);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@dallocx(void *ptr, int flags);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@sdallocx(void *ptr, size_t size,
int flags);
-JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags);
-JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags);
-JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@nallocx(size_t size, int flags)
+ JEMALLOC_ATTR(pure);
-JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen);
-JEMALLOC_EXPORT int @je_@mallctlnametomib(const char *name, size_t *mibp,
- size_t *miblenp);
-JEMALLOC_EXPORT int @je_@mallctlbymib(const size_t *mib, size_t miblen,
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctl(const char *name,
void *oldp, size_t *oldlenp, void *newp, size_t newlen);
-JEMALLOC_EXPORT void @je_@malloc_stats_print(void (*write_cb)(void *,
- const char *), void *@je_@cbopaque, const char *opts);
-JEMALLOC_EXPORT size_t @je_@malloc_usable_size(
- JEMALLOC_USABLE_SIZE_CONST void *ptr);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlnametomib(const char *name,
+ size_t *mibp, size_t *miblenp);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlbymib(const size_t *mib,
+ size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@malloc_stats_print(
+ void (*write_cb)(void *, const char *), void *@je_@cbopaque,
+ const char *opts);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@malloc_usable_size(
+ JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW;
#ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size)
- JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@memalign(size_t alignment, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc);
#endif
#ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc);
-#endif
-
-#ifdef JEMALLOC_EXPERIMENTAL
-JEMALLOC_EXPORT int @je_@allocm(void **ptr, size_t *rsize, size_t size,
- int flags) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@rallocm(void **ptr, size_t *rsize, size_t size,
- size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@sallocm(const void *ptr, size_t *rsize, int flags)
- JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@dallocm(void *ptr, int flags)
- JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@nallocm(size_t *rsize, size_t size, int flags);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@valloc(size_t size) JEMALLOC_CXX_THROW
+ JEMALLOC_ATTR(malloc);
#endif
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in
new file mode 100644
index 000000000..fa7b350ad
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in
@@ -0,0 +1,57 @@
+/*
+ * void *
+ * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
+ * bool *commit, unsigned arena_ind);
+ */
+typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned);
+
+/*
+ * bool
+ * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind);
+ */
+typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned);
+
+/*
+ * bool
+ * chunk_commit(void *chunk, size_t size, size_t offset, size_t length,
+ * unsigned arena_ind);
+ */
+typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned);
+
+/*
+ * bool
+ * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length,
+ * unsigned arena_ind);
+ */
+typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned);
+
+/*
+ * bool
+ * chunk_purge(void *chunk, size_t size, size_t offset, size_t length,
+ * unsigned arena_ind);
+ */
+typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned);
+
+/*
+ * bool
+ * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b,
+ * bool committed, unsigned arena_ind);
+ */
+typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned);
+
+/*
+ * bool
+ * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+ * bool committed, unsigned arena_ind);
+ */
+typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned);
+
+typedef struct {
+ chunk_alloc_t *alloc;
+ chunk_dalloc_t *dalloc;
+ chunk_commit_t *commit;
+ chunk_decommit_t *decommit;
+ chunk_purge_t *purge;
+ chunk_split_t *split;
+ chunk_merge_t *merge;
+} chunk_hooks_t;
diff --git a/deps/jemalloc/include/msvc_compat/stdbool.h b/deps/jemalloc/include/msvc_compat/C99/stdbool.h
index da9ee8b80..d92160ebc 100644
--- a/deps/jemalloc/include/msvc_compat/stdbool.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdbool.h
@@ -5,7 +5,11 @@
/* MSVC doesn't define _Bool or bool in C, but does have BOOL */
/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */
+/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as
+ * a built-in type. */
+#ifndef __clang__
typedef BOOL _Bool;
+#endif
#define bool _Bool
#define true 1
diff --git a/deps/jemalloc/include/msvc_compat/stdint.h b/deps/jemalloc/include/msvc_compat/C99/stdint.h
index d02608a59..d02608a59 100644
--- a/deps/jemalloc/include/msvc_compat/stdint.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdint.h
diff --git a/deps/jemalloc/include/msvc_compat/inttypes.h b/deps/jemalloc/include/msvc_compat/inttypes.h
deleted file mode 100644
index a4e6b75cb..000000000
--- a/deps/jemalloc/include/msvc_compat/inttypes.h
+++ /dev/null
@@ -1,313 +0,0 @@
-// ISO C9x compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-// Copyright (c) 2006 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// 1. Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. The name of the author may be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_INTTYPES_H_ // [
-#define _MSC_INTTYPES_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#include "stdint.h"
-
-// 7.8 Format conversion of integer types
-
-typedef struct {
- intmax_t quot;
- intmax_t rem;
-} imaxdiv_t;
-
-// 7.8.1 Macros for format specifiers
-
-#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198
-
-#ifdef _WIN64
-# define __PRI64_PREFIX "l"
-# define __PRIPTR_PREFIX "l"
-#else
-# define __PRI64_PREFIX "ll"
-# define __PRIPTR_PREFIX
-#endif
-
-// The fprintf macros for signed integers are:
-#define PRId8 "d"
-#define PRIi8 "i"
-#define PRIdLEAST8 "d"
-#define PRIiLEAST8 "i"
-#define PRIdFAST8 "d"
-#define PRIiFAST8 "i"
-
-#define PRId16 "hd"
-#define PRIi16 "hi"
-#define PRIdLEAST16 "hd"
-#define PRIiLEAST16 "hi"
-#define PRIdFAST16 "hd"
-#define PRIiFAST16 "hi"
-
-#define PRId32 "d"
-#define PRIi32 "i"
-#define PRIdLEAST32 "d"
-#define PRIiLEAST32 "i"
-#define PRIdFAST32 "d"
-#define PRIiFAST32 "i"
-
-#define PRId64 __PRI64_PREFIX "d"
-#define PRIi64 __PRI64_PREFIX "i"
-#define PRIdLEAST64 __PRI64_PREFIX "d"
-#define PRIiLEAST64 __PRI64_PREFIX "i"
-#define PRIdFAST64 __PRI64_PREFIX "d"
-#define PRIiFAST64 __PRI64_PREFIX "i"
-
-#define PRIdMAX __PRI64_PREFIX "d"
-#define PRIiMAX __PRI64_PREFIX "i"
-
-#define PRIdPTR __PRIPTR_PREFIX "d"
-#define PRIiPTR __PRIPTR_PREFIX "i"
-
-// The fprintf macros for unsigned integers are:
-#define PRIo8 "o"
-#define PRIu8 "u"
-#define PRIx8 "x"
-#define PRIX8 "X"
-#define PRIoLEAST8 "o"
-#define PRIuLEAST8 "u"
-#define PRIxLEAST8 "x"
-#define PRIXLEAST8 "X"
-#define PRIoFAST8 "o"
-#define PRIuFAST8 "u"
-#define PRIxFAST8 "x"
-#define PRIXFAST8 "X"
-
-#define PRIo16 "ho"
-#define PRIu16 "hu"
-#define PRIx16 "hx"
-#define PRIX16 "hX"
-#define PRIoLEAST16 "ho"
-#define PRIuLEAST16 "hu"
-#define PRIxLEAST16 "hx"
-#define PRIXLEAST16 "hX"
-#define PRIoFAST16 "ho"
-#define PRIuFAST16 "hu"
-#define PRIxFAST16 "hx"
-#define PRIXFAST16 "hX"
-
-#define PRIo32 "o"
-#define PRIu32 "u"
-#define PRIx32 "x"
-#define PRIX32 "X"
-#define PRIoLEAST32 "o"
-#define PRIuLEAST32 "u"
-#define PRIxLEAST32 "x"
-#define PRIXLEAST32 "X"
-#define PRIoFAST32 "o"
-#define PRIuFAST32 "u"
-#define PRIxFAST32 "x"
-#define PRIXFAST32 "X"
-
-#define PRIo64 __PRI64_PREFIX "o"
-#define PRIu64 __PRI64_PREFIX "u"
-#define PRIx64 __PRI64_PREFIX "x"
-#define PRIX64 __PRI64_PREFIX "X"
-#define PRIoLEAST64 __PRI64_PREFIX "o"
-#define PRIuLEAST64 __PRI64_PREFIX "u"
-#define PRIxLEAST64 __PRI64_PREFIX "x"
-#define PRIXLEAST64 __PRI64_PREFIX "X"
-#define PRIoFAST64 __PRI64_PREFIX "o"
-#define PRIuFAST64 __PRI64_PREFIX "u"
-#define PRIxFAST64 __PRI64_PREFIX "x"
-#define PRIXFAST64 __PRI64_PREFIX "X"
-
-#define PRIoMAX __PRI64_PREFIX "o"
-#define PRIuMAX __PRI64_PREFIX "u"
-#define PRIxMAX __PRI64_PREFIX "x"
-#define PRIXMAX __PRI64_PREFIX "X"
-
-#define PRIoPTR __PRIPTR_PREFIX "o"
-#define PRIuPTR __PRIPTR_PREFIX "u"
-#define PRIxPTR __PRIPTR_PREFIX "x"
-#define PRIXPTR __PRIPTR_PREFIX "X"
-
-// The fscanf macros for signed integers are:
-#define SCNd8 "d"
-#define SCNi8 "i"
-#define SCNdLEAST8 "d"
-#define SCNiLEAST8 "i"
-#define SCNdFAST8 "d"
-#define SCNiFAST8 "i"
-
-#define SCNd16 "hd"
-#define SCNi16 "hi"
-#define SCNdLEAST16 "hd"
-#define SCNiLEAST16 "hi"
-#define SCNdFAST16 "hd"
-#define SCNiFAST16 "hi"
-
-#define SCNd32 "ld"
-#define SCNi32 "li"
-#define SCNdLEAST32 "ld"
-#define SCNiLEAST32 "li"
-#define SCNdFAST32 "ld"
-#define SCNiFAST32 "li"
-
-#define SCNd64 "I64d"
-#define SCNi64 "I64i"
-#define SCNdLEAST64 "I64d"
-#define SCNiLEAST64 "I64i"
-#define SCNdFAST64 "I64d"
-#define SCNiFAST64 "I64i"
-
-#define SCNdMAX "I64d"
-#define SCNiMAX "I64i"
-
-#ifdef _WIN64 // [
-# define SCNdPTR "I64d"
-# define SCNiPTR "I64i"
-#else // _WIN64 ][
-# define SCNdPTR "ld"
-# define SCNiPTR "li"
-#endif // _WIN64 ]
-
-// The fscanf macros for unsigned integers are:
-#define SCNo8 "o"
-#define SCNu8 "u"
-#define SCNx8 "x"
-#define SCNX8 "X"
-#define SCNoLEAST8 "o"
-#define SCNuLEAST8 "u"
-#define SCNxLEAST8 "x"
-#define SCNXLEAST8 "X"
-#define SCNoFAST8 "o"
-#define SCNuFAST8 "u"
-#define SCNxFAST8 "x"
-#define SCNXFAST8 "X"
-
-#define SCNo16 "ho"
-#define SCNu16 "hu"
-#define SCNx16 "hx"
-#define SCNX16 "hX"
-#define SCNoLEAST16 "ho"
-#define SCNuLEAST16 "hu"
-#define SCNxLEAST16 "hx"
-#define SCNXLEAST16 "hX"
-#define SCNoFAST16 "ho"
-#define SCNuFAST16 "hu"
-#define SCNxFAST16 "hx"
-#define SCNXFAST16 "hX"
-
-#define SCNo32 "lo"
-#define SCNu32 "lu"
-#define SCNx32 "lx"
-#define SCNX32 "lX"
-#define SCNoLEAST32 "lo"
-#define SCNuLEAST32 "lu"
-#define SCNxLEAST32 "lx"
-#define SCNXLEAST32 "lX"
-#define SCNoFAST32 "lo"
-#define SCNuFAST32 "lu"
-#define SCNxFAST32 "lx"
-#define SCNXFAST32 "lX"
-
-#define SCNo64 "I64o"
-#define SCNu64 "I64u"
-#define SCNx64 "I64x"
-#define SCNX64 "I64X"
-#define SCNoLEAST64 "I64o"
-#define SCNuLEAST64 "I64u"
-#define SCNxLEAST64 "I64x"
-#define SCNXLEAST64 "I64X"
-#define SCNoFAST64 "I64o"
-#define SCNuFAST64 "I64u"
-#define SCNxFAST64 "I64x"
-#define SCNXFAST64 "I64X"
-
-#define SCNoMAX "I64o"
-#define SCNuMAX "I64u"
-#define SCNxMAX "I64x"
-#define SCNXMAX "I64X"
-
-#ifdef _WIN64 // [
-# define SCNoPTR "I64o"
-# define SCNuPTR "I64u"
-# define SCNxPTR "I64x"
-# define SCNXPTR "I64X"
-#else // _WIN64 ][
-# define SCNoPTR "lo"
-# define SCNuPTR "lu"
-# define SCNxPTR "lx"
-# define SCNXPTR "lX"
-#endif // _WIN64 ]
-
-#endif // __STDC_FORMAT_MACROS ]
-
-// 7.8.2 Functions for greatest-width integer types
-
-// 7.8.2.1 The imaxabs function
-#define imaxabs _abs64
-
-// 7.8.2.2 The imaxdiv function
-
-// This is modified version of div() function from Microsoft's div.c found
-// in %MSVC.NET%\crt\src\div.c
-#ifdef STATIC_IMAXDIV // [
-static
-#else // STATIC_IMAXDIV ][
-_inline
-#endif // STATIC_IMAXDIV ]
-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
-{
- imaxdiv_t result;
-
- result.quot = numer / denom;
- result.rem = numer % denom;
-
- if (numer < 0 && result.rem > 0) {
- // did division wrong; must fix up
- ++result.quot;
- result.rem -= denom;
- }
-
- return result;
-}
-
-// 7.8.2.3 The strtoimax and strtoumax functions
-#define strtoimax _strtoi64
-#define strtoumax _strtoui64
-
-// 7.8.2.4 The wcstoimax and wcstoumax functions
-#define wcstoimax _wcstoi64
-#define wcstoumax _wcstoui64
-
-
-#endif // _MSC_INTTYPES_H_ ]
diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h
index c84975b6b..f01ffdd18 100644
--- a/deps/jemalloc/include/msvc_compat/strings.h
+++ b/deps/jemalloc/include/msvc_compat/strings.h
@@ -3,8 +3,9 @@
/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
* for both */
-#include <intrin.h>
-#pragma intrinsic(_BitScanForward)
+#ifdef _MSC_VER
+# include <intrin.h>
+# pragma intrinsic(_BitScanForward)
static __forceinline int ffsl(long x)
{
unsigned long i;
@@ -20,4 +21,9 @@ static __forceinline int ffs(int x)
return (ffsl(x));
}
+#else
+# define ffsl(x) __builtin_ffsl(x)
+# define ffs(x) __builtin_ffs(x)
#endif
+
+#endif /* strings_h */
diff --git a/deps/jemalloc/include/msvc_compat/windows_extra.h b/deps/jemalloc/include/msvc_compat/windows_extra.h
new file mode 100644
index 000000000..0c5e323ff
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/windows_extra.h
@@ -0,0 +1,26 @@
+#ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
+#define MSVC_COMPAT_WINDOWS_EXTRA_H
+
+#ifndef ENOENT
+# define ENOENT ERROR_PATH_NOT_FOUND
+#endif
+#ifndef EINVAL
+# define EINVAL ERROR_BAD_ARGUMENTS
+#endif
+#ifndef EAGAIN
+# define EAGAIN ERROR_OUTOFMEMORY
+#endif
+#ifndef EPERM
+# define EPERM ERROR_WRITE_FAULT
+#endif
+#ifndef EFAULT
+# define EFAULT ERROR_INVALID_ADDRESS
+#endif
+#ifndef ENOMEM
+# define ENOMEM ERROR_NOT_ENOUGH_MEMORY
+#endif
+#ifndef ERANGE
+# define ERANGE ERROR_INVALID_DATA
+#endif
+
+#endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */
diff --git a/deps/jemalloc/jemalloc.pc.in b/deps/jemalloc/jemalloc.pc.in
new file mode 100644
index 000000000..1a3ad9b34
--- /dev/null
+++ b/deps/jemalloc/jemalloc.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+install_suffix=@install_suffix@
+
+Name: jemalloc
+Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
+URL: http://www.canonware.com/jemalloc
+Version: @jemalloc_version@
+Cflags: -I${includedir}
+Libs: -L${libdir} -ljemalloc${install_suffix}
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c
index dad707b63..3081519cc 100644
--- a/deps/jemalloc/src/arena.c
+++ b/deps/jemalloc/src/arena.c
@@ -5,37 +5,17 @@
/* Data. */
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
+static ssize_t lg_dirty_mult_default;
arena_bin_info_t arena_bin_info[NBINS];
-JEMALLOC_ALIGNED(CACHELINE)
-const uint8_t small_size2bin[] = {
-#define S2B_8(i) i,
-#define S2B_16(i) S2B_8(i) S2B_8(i)
-#define S2B_32(i) S2B_16(i) S2B_16(i)
-#define S2B_64(i) S2B_32(i) S2B_32(i)
-#define S2B_128(i) S2B_64(i) S2B_64(i)
-#define S2B_256(i) S2B_128(i) S2B_128(i)
-#define S2B_512(i) S2B_256(i) S2B_256(i)
-#define S2B_1024(i) S2B_512(i) S2B_512(i)
-#define S2B_2048(i) S2B_1024(i) S2B_1024(i)
-#define S2B_4096(i) S2B_2048(i) S2B_2048(i)
-#define S2B_8192(i) S2B_4096(i) S2B_4096(i)
-#define SIZE_CLASS(bin, delta, size) \
- S2B_##delta(bin)
- SIZE_CLASSES
-#undef S2B_8
-#undef S2B_16
-#undef S2B_32
-#undef S2B_64
-#undef S2B_128
-#undef S2B_256
-#undef S2B_512
-#undef S2B_1024
-#undef S2B_2048
-#undef S2B_4096
-#undef S2B_8192
-#undef SIZE_CLASS
-};
+size_t map_bias;
+size_t map_misc_offset;
+size_t arena_maxrun; /* Max run size for arenas. */
+size_t large_maxclass; /* Max large size class. */
+static size_t small_maxrun; /* Max run size used for small size classes. */
+static bool *small_run_tab; /* Valid small run page multiples. */
+unsigned nlclasses; /* Number of large size classes. */
+unsigned nhclasses; /* Number of huge size classes. */
/******************************************************************************/
/*
@@ -45,7 +25,7 @@ const uint8_t small_size2bin[] = {
static void arena_purge(arena_t *arena, bool all);
static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
- bool cleaned);
+ bool cleaned, bool decommitted);
static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, arena_bin_t *bin);
static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
@@ -53,296 +33,326 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
/******************************************************************************/
-static inline int
-arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+#define CHUNK_MAP_KEY ((uintptr_t)0x1U)
+
+JEMALLOC_INLINE_C arena_chunk_map_misc_t *
+arena_miscelm_key_create(size_t size)
{
- uintptr_t a_mapelm = (uintptr_t)a;
- uintptr_t b_mapelm = (uintptr_t)b;
- assert(a != NULL);
- assert(b != NULL);
+ return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
+ CHUNK_MAP_KEY));
+}
+
+JEMALLOC_INLINE_C bool
+arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm)
+{
- return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
+ return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0);
}
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
- u.rb_link, arena_run_comp)
+#undef CHUNK_MAP_KEY
-static inline int
-arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+JEMALLOC_INLINE_C size_t
+arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
{
- int ret;
- size_t a_size = a->bits & ~PAGE_MASK;
- size_t b_size = b->bits & ~PAGE_MASK;
- ret = (a_size > b_size) - (a_size < b_size);
- if (ret == 0) {
- uintptr_t a_mapelm, b_mapelm;
+ assert(arena_miscelm_is_key(miscelm));
- if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
- a_mapelm = (uintptr_t)a;
- else {
- /*
- * Treat keys as though they are lower than anything
- * else.
- */
- a_mapelm = 0;
- }
- b_mapelm = (uintptr_t)b;
+ return (arena_mapbits_size_decode((uintptr_t)miscelm));
+}
- ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
- }
+JEMALLOC_INLINE_C size_t
+arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm)
+{
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
- return (ret);
-}
+ assert(!arena_miscelm_is_key(miscelm));
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
- u.rb_link, arena_avail_comp)
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+ pageind = arena_miscelm_to_pageind(miscelm);
+ mapbits = arena_mapbits_get(chunk, pageind);
+ return (arena_mapbits_size_decode(mapbits));
+}
-static inline int
-arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
+JEMALLOC_INLINE_C int
+arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
{
+ uintptr_t a_miscelm = (uintptr_t)a;
+ uintptr_t b_miscelm = (uintptr_t)b;
assert(a != NULL);
assert(b != NULL);
- /*
- * Short-circuit for self comparison. The following comparison code
- * would come to the same result, but at the cost of executing the slow
- * path.
- */
- if (a == b)
- return (0);
+ return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
+ rb_link, arena_run_comp)
+
+static size_t
+run_quantize(size_t size)
+{
+ size_t qsize;
+
+ assert(size != 0);
+ assert(size == PAGE_CEILING(size));
+
+ /* Don't change sizes that are valid small run sizes. */
+ if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
+ return (size);
/*
- * Order such that chunks with higher fragmentation are "less than"
- * those with lower fragmentation -- purging order is from "least" to
- * "greatest". Fragmentation is measured as:
- *
- * mean current avail run size
- * --------------------------------
- * mean defragmented avail run size
- *
- * navail
- * -----------
- * nruns_avail nruns_avail-nruns_adjac
- * = ========================= = -----------------------
- * navail nruns_avail
- * -----------------------
- * nruns_avail-nruns_adjac
- *
- * The following code multiplies away the denominator prior to
- * comparison, in order to avoid division.
- *
+ * Round down to the nearest run size that can actually be requested
+ * during normal large allocation. Add large_pad so that cache index
+ * randomization can offset the allocation from the page boundary.
*/
- {
- size_t a_val = (a->nruns_avail - a->nruns_adjac) *
- b->nruns_avail;
- size_t b_val = (b->nruns_avail - b->nruns_adjac) *
- a->nruns_avail;
+ qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
+ if (qsize <= SMALL_MAXCLASS + large_pad)
+ return (run_quantize(size - large_pad));
+ assert(qsize <= size);
+ return (qsize);
+}
+
+static size_t
+run_quantize_next(size_t size)
+{
+ size_t large_run_size_next;
+
+ assert(size != 0);
+ assert(size == PAGE_CEILING(size));
- if (a_val < b_val)
- return (1);
- if (a_val > b_val)
- return (-1);
- }
/*
- * Break ties by chunk address. For fragmented chunks, report lower
- * addresses as "lower", so that fragmentation reduction happens first
- * at lower addresses. However, use the opposite ordering for
- * unfragmented chunks, in order to increase the chances of
- * re-allocating dirty runs.
+ * Return the next quantized size greater than the input size.
+ * Quantized sizes comprise the union of run sizes that back small
+ * region runs, and run sizes that back large regions with no explicit
+ * alignment constraints.
*/
- {
- uintptr_t a_chunk = (uintptr_t)a;
- uintptr_t b_chunk = (uintptr_t)b;
- int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk));
- if (a->nruns_adjac == 0) {
- assert(b->nruns_adjac == 0);
- ret = -ret;
+
+ if (size > SMALL_MAXCLASS) {
+ large_run_size_next = PAGE_CEILING(index2size(size2index(size -
+ large_pad) + 1) + large_pad);
+ } else
+ large_run_size_next = SIZE_T_MAX;
+ if (size >= small_maxrun)
+ return (large_run_size_next);
+
+ while (true) {
+ size += PAGE;
+ assert(size <= small_maxrun);
+ if (small_run_tab[size >> LG_PAGE]) {
+ if (large_run_size_next < size)
+ return (large_run_size_next);
+ return (size);
}
- return (ret);
}
}
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t,
- dirty_link, arena_chunk_dirty_comp)
-
-static inline bool
-arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind)
+static size_t
+run_quantize_first(size_t size)
{
- bool ret;
+ size_t qsize = run_quantize(size);
- if (pageind-1 < map_bias)
- ret = false;
- else {
- ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0);
- assert(ret == false || arena_mapbits_dirty_get(chunk,
- pageind-1) != arena_mapbits_dirty_get(chunk, pageind));
+ if (qsize < size) {
+ /*
+ * Skip a quantization that may have an adequately large run,
+ * because under-sized runs may be mixed in. This only happens
+ * when an unusual size is requested, i.e. for aligned
+ * allocation, and is just one of several places where linear
+ * search would potentially find sufficiently aligned available
+ * memory somewhere lower.
+ */
+ qsize = run_quantize_next(size);
}
- return (ret);
+ return (qsize);
}
-static inline bool
-arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages)
+JEMALLOC_INLINE_C int
+arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
{
- bool ret;
+ int ret;
+ uintptr_t a_miscelm = (uintptr_t)a;
+ size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ?
+ arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
+ size_t b_qsize = run_quantize(arena_miscelm_size_get(b));
- if (pageind+npages == chunk_npages)
- ret = false;
- else {
- assert(pageind+npages < chunk_npages);
- ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0);
- assert(ret == false || arena_mapbits_dirty_get(chunk, pageind)
- != arena_mapbits_dirty_get(chunk, pageind+npages));
+ /*
+ * Compare based on quantized size rather than size, in order to sort
+ * equally useful runs only by address.
+ */
+ ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
+ if (ret == 0) {
+ if (!arena_miscelm_is_key(a)) {
+ uintptr_t b_miscelm = (uintptr_t)b;
+
+ ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
+ } else {
+ /*
+ * Treat keys as if they are lower than anything else.
+ */
+ ret = -1;
+ }
}
+
return (ret);
}
-static inline bool
-arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages)
+/* Generate red-black tree functions. */
+rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
+ arena_chunk_map_misc_t, rb_link, arena_avail_comp)
+
+static void
+arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+ size_t npages)
{
- return (arena_avail_adjac_pred(chunk, pageind) ||
- arena_avail_adjac_succ(chunk, pageind, npages));
+ assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
+ LG_PAGE));
+ arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
+ pageind));
}
static void
-arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
- size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
+arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+ size_t npages)
{
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
+ arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
+ pageind));
+}
- /*
- * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
- * removed and reinserted even if the run to be inserted is clean.
- */
- if (chunk->ndirty != 0)
- arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
-
- if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
- chunk->nruns_adjac++;
- if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
- chunk->nruns_adjac++;
- chunk->nruns_avail++;
- assert(chunk->nruns_avail > chunk->nruns_adjac);
+static void
+arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+ size_t npages)
+{
+ arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
- if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
- arena->ndirty += npages;
- chunk->ndirty += npages;
- }
- if (chunk->ndirty != 0)
- arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+ assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
+ LG_PAGE));
+ assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
+ assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
+ CHUNK_MAP_DIRTY);
- arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk,
- pageind));
+ qr_new(&miscelm->rd, rd_link);
+ qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
+ arena->ndirty += npages;
}
static void
-arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
- size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
+arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+ size_t npages)
{
+ arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
+ assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
+ assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
+ CHUNK_MAP_DIRTY);
- /*
- * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
- * removed and reinserted even if the run to be removed is clean.
- */
- if (chunk->ndirty != 0)
- arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
+ qr_remove(&miscelm->rd, rd_link);
+ assert(arena->ndirty >= npages);
+ arena->ndirty -= npages;
+}
- if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
- chunk->nruns_adjac--;
- if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
- chunk->nruns_adjac--;
- chunk->nruns_avail--;
- assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail
- == 0 && chunk->nruns_adjac == 0));
+static size_t
+arena_chunk_dirty_npages(const extent_node_t *node)
+{
+
+ return (extent_node_size_get(node) >> LG_PAGE);
+}
- if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
- arena->ndirty -= npages;
- chunk->ndirty -= npages;
+void
+arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache)
+{
+
+ if (cache) {
+ extent_node_dirty_linkage_init(node);
+ extent_node_dirty_insert(node, &arena->runs_dirty,
+ &arena->chunks_cache);
+ arena->ndirty += arena_chunk_dirty_npages(node);
}
- if (chunk->ndirty != 0)
- arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+}
- arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk,
- pageind));
+void
+arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty)
+{
+
+ if (dirty) {
+ extent_node_dirty_remove(node);
+ assert(arena->ndirty >= arena_chunk_dirty_npages(node));
+ arena->ndirty -= arena_chunk_dirty_npages(node);
+ }
}
-static inline void *
+JEMALLOC_INLINE_C void *
arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
{
void *ret;
unsigned regind;
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
+ arena_chunk_map_misc_t *miscelm;
+ void *rpages;
assert(run->nfree > 0);
- assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
+ assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info));
- regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
- ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
+ regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
+ miscelm = arena_run_to_miscelm(run);
+ rpages = arena_miscelm_to_rpages(miscelm);
+ ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset +
(uintptr_t)(bin_info->reg_interval * regind));
run->nfree--;
- if (regind == run->nextind)
- run->nextind++;
- assert(regind < run->nextind);
return (ret);
}
-static inline void
+JEMALLOC_INLINE_C void
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
{
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
size_t mapbits = arena_mapbits_get(chunk, pageind);
- size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
+ szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind = arena_run_regind(run, bin_info, ptr);
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
assert(run->nfree < bin_info->nregs);
/* Freeing an interior pointer can cause assertion failure. */
- assert(((uintptr_t)ptr - ((uintptr_t)run +
+ assert(((uintptr_t)ptr -
+ ((uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) +
(uintptr_t)bin_info->reg0_offset)) %
(uintptr_t)bin_info->reg_interval == 0);
- assert((uintptr_t)ptr >= (uintptr_t)run +
+ assert((uintptr_t)ptr >=
+ (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) +
(uintptr_t)bin_info->reg0_offset);
/* Freeing an unallocated pointer can cause assertion failure. */
- assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
+ assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind));
- bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
+ bitmap_unset(run->bitmap, &bin_info->bitmap_info, regind);
run->nfree++;
}
-static inline void
+JEMALLOC_INLINE_C void
arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
{
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind <<
- LG_PAGE)), (npages << LG_PAGE));
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
+ (run_ind << LG_PAGE)), (npages << LG_PAGE));
memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
(npages << LG_PAGE));
}
-static inline void
+JEMALLOC_INLINE_C void
arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind)
{
- VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind <<
- LG_PAGE)), PAGE);
+ JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind
+ << LG_PAGE)), PAGE);
}
-static inline void
+JEMALLOC_INLINE_C void
arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
{
size_t i;
@@ -358,9 +368,9 @@ arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
{
if (config_stats) {
- ssize_t cactive_diff = CHUNK_CEILING((arena->nactive +
- add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive -
- sub_pages) << LG_PAGE);
+ ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages
+ - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
+ LG_PAGE);
if (cactive_diff != 0)
stats_cactive_add(cactive_diff);
}
@@ -368,10 +378,12 @@ arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
static void
arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
- size_t flag_dirty, size_t need_pages)
+ size_t flag_dirty, size_t flag_decommitted, size_t need_pages)
{
size_t total_pages, rem_pages;
+ assert(flag_dirty == 0 || flag_decommitted == 0);
+
total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
LG_PAGE;
assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
@@ -379,58 +391,75 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
assert(need_pages <= total_pages);
rem_pages = total_pages - need_pages;
- arena_avail_remove(arena, chunk, run_ind, total_pages, true, true);
+ arena_avail_remove(arena, chunk, run_ind, total_pages);
+ if (flag_dirty != 0)
+ arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
arena_cactive_update(arena, need_pages, 0);
arena->nactive += need_pages;
/* Keep track of trailing unused pages for later use. */
if (rem_pages > 0) {
+ size_t flags = flag_dirty | flag_decommitted;
+ size_t flag_unzeroed_mask = (flags == 0) ? CHUNK_MAP_UNZEROED :
+ 0;
+
+ arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
+ (rem_pages << LG_PAGE), flags |
+ (arena_mapbits_unzeroed_get(chunk, run_ind+need_pages) &
+ flag_unzeroed_mask));
+ arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1,
+ (rem_pages << LG_PAGE), flags |
+ (arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1) &
+ flag_unzeroed_mask));
if (flag_dirty != 0) {
- arena_mapbits_unallocated_set(chunk,
- run_ind+need_pages, (rem_pages << LG_PAGE),
- flag_dirty);
- arena_mapbits_unallocated_set(chunk,
- run_ind+total_pages-1, (rem_pages << LG_PAGE),
- flag_dirty);
- } else {
- arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
- (rem_pages << LG_PAGE),
- arena_mapbits_unzeroed_get(chunk,
- run_ind+need_pages));
- arena_mapbits_unallocated_set(chunk,
- run_ind+total_pages-1, (rem_pages << LG_PAGE),
- arena_mapbits_unzeroed_get(chunk,
- run_ind+total_pages-1));
+ arena_run_dirty_insert(arena, chunk, run_ind+need_pages,
+ rem_pages);
}
- arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages,
- false, true);
+ arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages);
}
}
-static void
+static bool
arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
bool remove, bool zero)
{
arena_chunk_t *chunk;
- size_t flag_dirty, run_ind, need_pages, i;
+ arena_chunk_map_misc_t *miscelm;
+ size_t flag_dirty, flag_decommitted, run_ind, need_pages;
+ size_t flag_unzeroed_mask;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
+ miscelm = arena_run_to_miscelm(run);
+ run_ind = arena_miscelm_to_pageind(miscelm);
flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
+ flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
need_pages = (size >> LG_PAGE);
assert(need_pages > 0);
+ if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize,
+ run_ind << LG_PAGE, size, arena->ind))
+ return (true);
+
if (remove) {
arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
- need_pages);
+ flag_decommitted, need_pages);
}
if (zero) {
- if (flag_dirty == 0) {
+ if (flag_decommitted != 0) {
+ /* The run is untouched, and therefore zeroed. */
+ JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
+ *)((uintptr_t)chunk + (run_ind << LG_PAGE)),
+ (need_pages << LG_PAGE));
+ } else if (flag_dirty != 0) {
+ /* The run is dirty, so all pages must be zeroed. */
+ arena_run_zero(chunk, run_ind, need_pages);
+ } else {
/*
* The run is clean, so some pages may be zeroed (i.e.
* never before touched).
*/
+ size_t i;
for (i = 0; i < need_pages; i++) {
if (arena_mapbits_unzeroed_get(chunk, run_ind+i)
!= 0)
@@ -443,12 +472,9 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
run_ind+i);
}
}
- } else {
- /* The run is dirty, so all pages must be zeroed. */
- arena_run_zero(chunk, run_ind, need_pages);
}
} else {
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
(run_ind << LG_PAGE)), (need_pages << LG_PAGE));
}
@@ -456,68 +482,66 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
* Set the last element first, in case the run only contains one page
* (i.e. both statements set the same element).
*/
- arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
+ flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
+ CHUNK_MAP_UNZEROED : 0;
+ arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ run_ind+need_pages-1)));
+ arena_mapbits_large_set(chunk, run_ind, size, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, run_ind)));
+ return (false);
}
-static void
+static bool
arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
{
- arena_run_split_large_helper(arena, run, size, true, zero);
+ return (arena_run_split_large_helper(arena, run, size, true, zero));
}
-static void
+static bool
arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
{
- arena_run_split_large_helper(arena, run, size, false, zero);
+ return (arena_run_split_large_helper(arena, run, size, false, zero));
}
-static void
+static bool
arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
- size_t binind)
+ szind_t binind)
{
arena_chunk_t *chunk;
- size_t flag_dirty, run_ind, need_pages, i;
+ arena_chunk_map_misc_t *miscelm;
+ size_t flag_dirty, flag_decommitted, run_ind, need_pages, i;
assert(binind != BININD_INVALID);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
+ miscelm = arena_run_to_miscelm(run);
+ run_ind = arena_miscelm_to_pageind(miscelm);
flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
+ flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
need_pages = (size >> LG_PAGE);
assert(need_pages > 0);
- arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages);
+ if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize,
+ run_ind << LG_PAGE, size, arena->ind))
+ return (true);
- /*
- * Propagate the dirty and unzeroed flags to the allocated small run,
- * so that arena_dalloc_bin_run() has the ability to conditionally trim
- * clean pages.
- */
- arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty);
- /*
- * The first page will always be dirtied during small run
- * initialization, so a validation failure here would not actually
- * cause an observable failure.
- */
- if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
- run_ind) == 0)
- arena_run_page_validate_zeroed(chunk, run_ind);
- for (i = 1; i < need_pages - 1; i++) {
- arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0);
- if (config_debug && flag_dirty == 0 &&
- arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
+ arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
+ flag_decommitted, need_pages);
+
+ for (i = 0; i < need_pages; i++) {
+ size_t flag_unzeroed = arena_mapbits_unzeroed_get(chunk,
+ run_ind+i);
+ arena_mapbits_small_set(chunk, run_ind+i, i, binind,
+ flag_unzeroed);
+ if (config_debug && flag_dirty == 0 && flag_unzeroed == 0)
arena_run_page_validate_zeroed(chunk, run_ind+i);
}
- arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1,
- binind, flag_dirty);
- if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
- run_ind+need_pages-1) == 0)
- arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1);
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
(run_ind << LG_PAGE)), (need_pages << LG_PAGE));
+ return (false);
}
static arena_chunk_t *
@@ -533,76 +557,143 @@ arena_chunk_init_spare(arena_t *arena)
assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
- arena_maxclass);
+ arena_maxrun);
assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
- arena_maxclass);
+ arena_maxrun);
assert(arena_mapbits_dirty_get(chunk, map_bias) ==
arena_mapbits_dirty_get(chunk, chunk_npages-1));
return (chunk);
}
+static bool
+arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
+{
+
+ /*
+ * The extent node notion of "committed" doesn't directly apply to
+ * arena chunks. Arbitrarily mark them as committed. The commit state
+ * of runs is tracked individually, and upon chunk deallocation the
+ * entire chunk is in a consistent commit state.
+ */
+ extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
+ extent_node_achunk_set(&chunk->node, true);
+ return (chunk_register(chunk, &chunk->node));
+}
+
static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
+arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ bool *zero, bool *commit)
{
arena_chunk_t *chunk;
- bool zero;
- size_t unzeroed, i;
-
- assert(arena->spare == NULL);
- zero = false;
malloc_mutex_unlock(&arena->lock);
- chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false,
- &zero, arena->dss_prec);
+
+ chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL,
+ chunksize, chunksize, zero, commit);
+ if (chunk != NULL && !*commit) {
+ /* Commit header. */
+ if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
+ LG_PAGE, arena->ind)) {
+ chunk_dalloc_wrapper(arena, chunk_hooks,
+ (void *)chunk, chunksize, *commit);
+ chunk = NULL;
+ }
+ }
+ if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) {
+ if (!*commit) {
+ /* Undo commit of header. */
+ chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
+ LG_PAGE, arena->ind);
+ }
+ chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+ chunksize, *commit);
+ chunk = NULL;
+ }
+
malloc_mutex_lock(&arena->lock);
- if (chunk == NULL)
- return (NULL);
- if (config_stats)
+ return (chunk);
+}
+
+static arena_chunk_t *
+arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
+{
+ arena_chunk_t *chunk;
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+ chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize,
+ chunksize, zero, true);
+ if (chunk != NULL) {
+ if (arena_chunk_register(arena, chunk, *zero)) {
+ chunk_dalloc_cache(arena, &chunk_hooks, chunk,
+ chunksize, true);
+ return (NULL);
+ }
+ *commit = true;
+ }
+ if (chunk == NULL) {
+ chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks,
+ zero, commit);
+ }
+
+ if (config_stats && chunk != NULL) {
arena->stats.mapped += chunksize;
+ arena->stats.metadata_mapped += (map_bias << LG_PAGE);
+ }
- chunk->arena = arena;
+ return (chunk);
+}
- /*
- * Claim that no pages are in use, since the header is merely overhead.
- */
- chunk->ndirty = 0;
+static arena_chunk_t *
+arena_chunk_init_hard(arena_t *arena)
+{
+ arena_chunk_t *chunk;
+ bool zero, commit;
+ size_t flag_unzeroed, flag_decommitted, i;
+
+ assert(arena->spare == NULL);
- chunk->nruns_avail = 0;
- chunk->nruns_adjac = 0;
+ zero = false;
+ commit = false;
+ chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
+ if (chunk == NULL)
+ return (NULL);
/*
* Initialize the map to contain one maximal free untouched run. Mark
- * the pages as zeroed iff chunk_alloc() returned a zeroed chunk.
+ * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted
+ * chunk.
*/
- unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
- arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
- unzeroed);
+ flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
+ flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
+ arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun,
+ flag_unzeroed | flag_decommitted);
/*
* There is no need to initialize the internal page map entries unless
* the chunk is not zeroed.
*/
- if (zero == false) {
- VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk,
- map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
- chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
+ if (!zero) {
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
+ (void *)arena_bitselm_get(chunk, map_bias+1),
+ (size_t)((uintptr_t) arena_bitselm_get(chunk,
+ chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
map_bias+1)));
for (i = map_bias+1; i < chunk_npages-1; i++)
- arena_mapbits_unzeroed_set(chunk, i, unzeroed);
+ arena_mapbits_internal_set(chunk, i, flag_unzeroed);
} else {
- VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk,
- map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
- chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
- map_bias+1)));
+ JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
+ *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
+ arena_bitselm_get(chunk, chunk_npages-1) -
+ (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
if (config_debug) {
for (i = map_bias+1; i < chunk_npages-1; i++) {
assert(arena_mapbits_unzeroed_get(chunk, i) ==
- unzeroed);
+ flag_unzeroed);
}
}
}
- arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass,
- unzeroed);
+ arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun,
+ flag_unzeroed);
return (chunk);
}
@@ -621,65 +712,383 @@ arena_chunk_alloc(arena_t *arena)
}
/* Insert the run into the runs_avail tree. */
- arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias,
- false, false);
+ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
return (chunk);
}
static void
-arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
{
+
assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
- arena_maxclass);
+ arena_maxrun);
assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
- arena_maxclass);
+ arena_maxrun);
assert(arena_mapbits_dirty_get(chunk, map_bias) ==
arena_mapbits_dirty_get(chunk, chunk_npages-1));
+ assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
+ arena_mapbits_decommitted_get(chunk, chunk_npages-1));
/*
* Remove run from the runs_avail tree, so that the arena does not use
* it.
*/
- arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias,
- false, false);
+ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
if (arena->spare != NULL) {
arena_chunk_t *spare = arena->spare;
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+ bool committed;
arena->spare = chunk;
- malloc_mutex_unlock(&arena->lock);
- chunk_dealloc((void *)spare, chunksize, true);
- malloc_mutex_lock(&arena->lock);
- if (config_stats)
+ if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
+ arena_run_dirty_remove(arena, spare, map_bias,
+ chunk_npages-map_bias);
+ }
+
+ chunk_deregister(spare, &spare->node);
+
+ committed = (arena_mapbits_decommitted_get(spare, map_bias) ==
+ 0);
+ if (!committed) {
+ /*
+ * Decommit the header. Mark the chunk as decommitted
+ * even if header decommit fails, since treating a
+ * partially committed chunk as committed has a high
+ * potential for causing later access of decommitted
+ * memory.
+ */
+ chunk_hooks = chunk_hooks_get(arena);
+ chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
+ LG_PAGE, arena->ind);
+ }
+
+ chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare,
+ chunksize, committed);
+
+ if (config_stats) {
arena->stats.mapped -= chunksize;
+ arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
+ }
} else
arena->spare = chunk;
}
-static arena_run_t *
-arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
+static void
+arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
{
- arena_run_t *run;
- arena_chunk_map_t *mapelm, key;
+ szind_t index = size2index(usize) - nlclasses - NBINS;
- key.bits = size | CHUNK_MAP_KEY;
- mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
- if (mapelm != NULL) {
- arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = (((uintptr_t)mapelm -
- (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
- + map_bias;
+ cassert(config_stats);
- run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
- LG_PAGE));
- arena_run_split_large(arena, run, size, zero);
- return (run);
+ arena->stats.nmalloc_huge++;
+ arena->stats.allocated_huge += usize;
+ arena->stats.hstats[index].nmalloc++;
+ arena->stats.hstats[index].curhchunks++;
+}
+
+static void
+arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize)
+{
+ szind_t index = size2index(usize) - nlclasses - NBINS;
+
+ cassert(config_stats);
+
+ arena->stats.nmalloc_huge--;
+ arena->stats.allocated_huge -= usize;
+ arena->stats.hstats[index].nmalloc--;
+ arena->stats.hstats[index].curhchunks--;
+}
+
+static void
+arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
+{
+ szind_t index = size2index(usize) - nlclasses - NBINS;
+
+ cassert(config_stats);
+
+ arena->stats.ndalloc_huge++;
+ arena->stats.allocated_huge -= usize;
+ arena->stats.hstats[index].ndalloc++;
+ arena->stats.hstats[index].curhchunks--;
+}
+
+static void
+arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
+{
+ szind_t index = size2index(usize) - nlclasses - NBINS;
+
+ cassert(config_stats);
+
+ arena->stats.ndalloc_huge--;
+ arena->stats.allocated_huge += usize;
+ arena->stats.hstats[index].ndalloc--;
+ arena->stats.hstats[index].curhchunks++;
+}
+
+static void
+arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
+{
+
+ arena_huge_dalloc_stats_update(arena, oldsize);
+ arena_huge_malloc_stats_update(arena, usize);
+}
+
+static void
+arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
+ size_t usize)
+{
+
+ arena_huge_dalloc_stats_update_undo(arena, oldsize);
+ arena_huge_malloc_stats_update_undo(arena, usize);
+}
+
+extent_node_t *
+arena_node_alloc(arena_t *arena)
+{
+ extent_node_t *node;
+
+ malloc_mutex_lock(&arena->node_cache_mtx);
+ node = ql_last(&arena->node_cache, ql_link);
+ if (node == NULL) {
+ malloc_mutex_unlock(&arena->node_cache_mtx);
+ return (base_alloc(sizeof(extent_node_t)));
}
+ ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
+ malloc_mutex_unlock(&arena->node_cache_mtx);
+ return (node);
+}
- return (NULL);
+void
+arena_node_dalloc(arena_t *arena, extent_node_t *node)
+{
+
+ malloc_mutex_lock(&arena->node_cache_mtx);
+ ql_elm_new(node, ql_link);
+ ql_tail_insert(&arena->node_cache, node, ql_link);
+ malloc_mutex_unlock(&arena->node_cache_mtx);
+}
+
+static void *
+arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ size_t usize, size_t alignment, bool *zero, size_t csize)
+{
+ void *ret;
+ bool commit = true;
+
+ ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment,
+ zero, &commit);
+ if (ret == NULL) {
+ /* Revert optimistic stats updates. */
+ malloc_mutex_lock(&arena->lock);
+ if (config_stats) {
+ arena_huge_malloc_stats_update_undo(arena, usize);
+ arena->stats.mapped -= usize;
+ }
+ arena->nactive -= (usize >> LG_PAGE);
+ malloc_mutex_unlock(&arena->lock);
+ }
+
+ return (ret);
+}
+
+void *
+arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
+ bool *zero)
+{
+ void *ret;
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+ size_t csize = CHUNK_CEILING(usize);
+
+ malloc_mutex_lock(&arena->lock);
+
+ /* Optimistically update stats. */
+ if (config_stats) {
+ arena_huge_malloc_stats_update(arena, usize);
+ arena->stats.mapped += usize;
+ }
+ arena->nactive += (usize >> LG_PAGE);
+
+ ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
+ zero, true);
+ malloc_mutex_unlock(&arena->lock);
+ if (ret == NULL) {
+ ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
+ alignment, zero, csize);
+ }
+
+ if (config_stats && ret != NULL)
+ stats_cactive_add(usize);
+ return (ret);
+}
+
+void
+arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
+{
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+ size_t csize;
+
+ csize = CHUNK_CEILING(usize);
+ malloc_mutex_lock(&arena->lock);
+ if (config_stats) {
+ arena_huge_dalloc_stats_update(arena, usize);
+ arena->stats.mapped -= usize;
+ stats_cactive_sub(usize);
+ }
+ arena->nactive -= (usize >> LG_PAGE);
+
+ chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
+ malloc_mutex_unlock(&arena->lock);
+}
+
+void
+arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
+ size_t usize)
+{
+
+ assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
+ assert(oldsize != usize);
+
+ malloc_mutex_lock(&arena->lock);
+ if (config_stats)
+ arena_huge_ralloc_stats_update(arena, oldsize, usize);
+ if (oldsize < usize) {
+ size_t udiff = usize - oldsize;
+ arena->nactive += udiff >> LG_PAGE;
+ if (config_stats)
+ stats_cactive_add(udiff);
+ } else {
+ size_t udiff = oldsize - usize;
+ arena->nactive -= udiff >> LG_PAGE;
+ if (config_stats)
+ stats_cactive_sub(udiff);
+ }
+ malloc_mutex_unlock(&arena->lock);
+}
+
+void
+arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
+ size_t usize)
+{
+ size_t udiff = oldsize - usize;
+ size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
+
+ malloc_mutex_lock(&arena->lock);
+ if (config_stats) {
+ arena_huge_ralloc_stats_update(arena, oldsize, usize);
+ if (cdiff != 0) {
+ arena->stats.mapped -= cdiff;
+ stats_cactive_sub(udiff);
+ }
+ }
+ arena->nactive -= udiff >> LG_PAGE;
+
+ if (cdiff != 0) {
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+ void *nchunk = (void *)((uintptr_t)chunk +
+ CHUNK_CEILING(usize));
+
+ chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
+ }
+ malloc_mutex_unlock(&arena->lock);
+}
+
+static bool
+arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk,
+ size_t udiff, size_t cdiff)
+{
+ bool err;
+ bool commit = true;
+
+ err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize,
+ zero, &commit) == NULL);
+ if (err) {
+ /* Revert optimistic stats updates. */
+ malloc_mutex_lock(&arena->lock);
+ if (config_stats) {
+ arena_huge_ralloc_stats_update_undo(arena, oldsize,
+ usize);
+ arena->stats.mapped -= cdiff;
+ }
+ arena->nactive -= (udiff >> LG_PAGE);
+ malloc_mutex_unlock(&arena->lock);
+ } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
+ cdiff, true, arena->ind)) {
+ chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero,
+ true);
+ err = true;
+ }
+ return (err);
+}
+
+bool
+arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
+ size_t usize, bool *zero)
+{
+ bool err;
+ chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+ void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
+ size_t udiff = usize - oldsize;
+ size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+
+ malloc_mutex_lock(&arena->lock);
+
+ /* Optimistically update stats. */
+ if (config_stats) {
+ arena_huge_ralloc_stats_update(arena, oldsize, usize);
+ arena->stats.mapped += cdiff;
+ }
+ arena->nactive += (udiff >> LG_PAGE);
+
+ err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
+ chunksize, zero, true) == NULL);
+ malloc_mutex_unlock(&arena->lock);
+ if (err) {
+ err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
+ chunk, oldsize, usize, zero, nchunk, udiff,
+ cdiff);
+ } else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
+ cdiff, true, arena->ind)) {
+ chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero,
+ true);
+ err = true;
+ }
+
+ if (config_stats && !err)
+ stats_cactive_add(udiff);
+ return (err);
+}
+
+/*
+ * Do first-best-fit run selection, i.e. select the lowest run that best fits.
+ * Run sizes are quantized, so not all candidate runs are necessarily exactly
+ * the same size.
+ */
+static arena_run_t *
+arena_run_first_best_fit(arena_t *arena, size_t size)
+{
+ size_t search_size = run_quantize_first(size);
+ arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
+ arena_chunk_map_misc_t *miscelm =
+ arena_avail_tree_nsearch(&arena->runs_avail, key);
+ if (miscelm == NULL)
+ return (NULL);
+ return (&miscelm->run);
+}
+
+static arena_run_t *
+arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
+{
+ arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
+ if (run != NULL) {
+ if (arena_run_split_large(arena, run, size, zero))
+ run = NULL;
+ }
+ return (run);
}
static arena_run_t *
@@ -688,8 +1097,8 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
arena_chunk_t *chunk;
arena_run_t *run;
- assert(size <= arena_maxclass);
- assert((size & PAGE_MASK) == 0);
+ assert(size <= arena_maxrun);
+ assert(size == PAGE_CEILING(size));
/* Search the arena's chunks for the lowest best fit. */
run = arena_run_alloc_large_helper(arena, size, zero);
@@ -701,8 +1110,9 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
*/
chunk = arena_chunk_alloc(arena);
if (chunk != NULL) {
- run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
- arena_run_split_large(arena, run, size, zero);
+ run = &arena_miscelm_get(chunk, map_bias)->run;
+ if (arena_run_split_large(arena, run, size, zero))
+ run = NULL;
return (run);
}
@@ -715,36 +1125,24 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
}
static arena_run_t *
-arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind)
+arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
{
- arena_run_t *run;
- arena_chunk_map_t *mapelm, key;
-
- key.bits = size | CHUNK_MAP_KEY;
- mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
- if (mapelm != NULL) {
- arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = (((uintptr_t)mapelm -
- (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
- + map_bias;
-
- run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
- LG_PAGE));
- arena_run_split_small(arena, run, size, binind);
- return (run);
+ arena_run_t *run = arena_run_first_best_fit(arena, size);
+ if (run != NULL) {
+ if (arena_run_split_small(arena, run, size, binind))
+ run = NULL;
}
-
- return (NULL);
+ return (run);
}
static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
+arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
{
arena_chunk_t *chunk;
arena_run_t *run;
- assert(size <= arena_maxclass);
- assert((size & PAGE_MASK) == 0);
+ assert(size <= arena_maxrun);
+ assert(size == PAGE_CEILING(size));
assert(binind != BININD_INVALID);
/* Search the arena's chunks for the lowest best fit. */
@@ -757,8 +1155,9 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
*/
chunk = arena_chunk_alloc(arena);
if (chunk != NULL) {
- run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
- arena_run_split_small(arena, run, size, binind);
+ run = &arena_miscelm_get(chunk, map_bias)->run;
+ if (arena_run_split_small(arena, run, size, binind))
+ run = NULL;
return (run);
}
@@ -770,313 +1169,373 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
return (arena_run_alloc_small_helper(arena, size, binind));
}
-static inline void
+static bool
+arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
+{
+
+ return (lg_dirty_mult >= -1 && lg_dirty_mult < (ssize_t)(sizeof(size_t)
+ << 3));
+}
+
+ssize_t
+arena_lg_dirty_mult_get(arena_t *arena)
+{
+ ssize_t lg_dirty_mult;
+
+ malloc_mutex_lock(&arena->lock);
+ lg_dirty_mult = arena->lg_dirty_mult;
+ malloc_mutex_unlock(&arena->lock);
+
+ return (lg_dirty_mult);
+}
+
+bool
+arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
+{
+
+ if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
+ return (true);
+
+ malloc_mutex_lock(&arena->lock);
+ arena->lg_dirty_mult = lg_dirty_mult;
+ arena_maybe_purge(arena);
+ malloc_mutex_unlock(&arena->lock);
+
+ return (false);
+}
+
+void
arena_maybe_purge(arena_t *arena)
{
- size_t npurgeable, threshold;
/* Don't purge if the option is disabled. */
- if (opt_lg_dirty_mult < 0)
+ if (arena->lg_dirty_mult < 0)
return;
- /* Don't purge if all dirty pages are already being purged. */
- if (arena->ndirty <= arena->npurgatory)
+ /* Don't recursively purge. */
+ if (arena->purging)
return;
- npurgeable = arena->ndirty - arena->npurgatory;
- threshold = (arena->nactive >> opt_lg_dirty_mult);
/*
- * Don't purge unless the number of purgeable pages exceeds the
- * threshold.
+ * Iterate, since preventing recursive purging could otherwise leave too
+ * many dirty pages.
*/
- if (npurgeable <= threshold)
- return;
-
- arena_purge(arena, false);
+ while (true) {
+ size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
+ if (threshold < chunk_npages)
+ threshold = chunk_npages;
+ /*
+ * Don't purge unless the number of purgeable pages exceeds the
+ * threshold.
+ */
+ if (arena->ndirty <= threshold)
+ return;
+ arena_purge(arena, false);
+ }
}
-static arena_chunk_t *
-chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
+static size_t
+arena_dirty_count(arena_t *arena)
{
- size_t *ndirty = (size_t *)arg;
+ size_t ndirty = 0;
+ arena_runs_dirty_link_t *rdelm;
+ extent_node_t *chunkselm;
- assert(chunk->ndirty != 0);
- *ndirty += chunk->ndirty;
- return (NULL);
+ for (rdelm = qr_next(&arena->runs_dirty, rd_link),
+ chunkselm = qr_next(&arena->chunks_cache, cc_link);
+ rdelm != &arena->runs_dirty; rdelm = qr_next(rdelm, rd_link)) {
+ size_t npages;
+
+ if (rdelm == &chunkselm->rd) {
+ npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+ chunkselm = qr_next(chunkselm, cc_link);
+ } else {
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+ rdelm);
+ arena_chunk_map_misc_t *miscelm =
+ arena_rd_to_miscelm(rdelm);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
+ assert(arena_mapbits_allocated_get(chunk, pageind) ==
+ 0);
+ assert(arena_mapbits_large_get(chunk, pageind) == 0);
+ assert(arena_mapbits_dirty_get(chunk, pageind) != 0);
+ npages = arena_mapbits_unallocated_size_get(chunk,
+ pageind) >> LG_PAGE;
+ }
+ ndirty += npages;
+ }
+
+ return (ndirty);
}
static size_t
-arena_compute_npurgatory(arena_t *arena, bool all)
+arena_compute_npurge(arena_t *arena, bool all)
{
- size_t npurgatory, npurgeable;
+ size_t npurge;
/*
* Compute the minimum number of pages that this thread should try to
* purge.
*/
- npurgeable = arena->ndirty - arena->npurgatory;
-
- if (all == false) {
- size_t threshold = (arena->nactive >> opt_lg_dirty_mult);
+ if (!all) {
+ size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
+ threshold = threshold < chunk_npages ? chunk_npages : threshold;
- npurgatory = npurgeable - threshold;
+ npurge = arena->ndirty - threshold;
} else
- npurgatory = npurgeable;
+ npurge = arena->ndirty;
- return (npurgatory);
+ return (npurge);
}
-static void
-arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all,
- arena_chunk_mapelms_t *mapelms)
-{
- size_t pageind, npages;
-
- /*
- * Temporarily allocate free dirty runs within chunk. If all is false,
- * only operate on dirty runs that are fragments; otherwise operate on
- * all dirty runs.
- */
- for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
- if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
+static size_t
+arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
+ size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel,
+ extent_node_t *purge_chunks_sentinel)
+{
+ arena_runs_dirty_link_t *rdelm, *rdelm_next;
+ extent_node_t *chunkselm;
+ size_t nstashed = 0;
+
+ /* Stash at least npurge pages. */
+ for (rdelm = qr_next(&arena->runs_dirty, rd_link),
+ chunkselm = qr_next(&arena->chunks_cache, cc_link);
+ rdelm != &arena->runs_dirty; rdelm = rdelm_next) {
+ size_t npages;
+ rdelm_next = qr_next(rdelm, rd_link);
+
+ if (rdelm == &chunkselm->rd) {
+ extent_node_t *chunkselm_next;
+ bool zero;
+ UNUSED void *chunk;
+
+ chunkselm_next = qr_next(chunkselm, cc_link);
+ /*
+ * Allocate. chunkselm remains valid due to the
+ * dalloc_node=false argument to chunk_alloc_cache().
+ */
+ zero = false;
+ chunk = chunk_alloc_cache(arena, chunk_hooks,
+ extent_node_addr_get(chunkselm),
+ extent_node_size_get(chunkselm), chunksize, &zero,
+ false);
+ assert(chunk == extent_node_addr_get(chunkselm));
+ assert(zero == extent_node_zeroed_get(chunkselm));
+ extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
+ purge_chunks_sentinel);
+ npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+ chunkselm = chunkselm_next;
+ } else {
+ arena_chunk_t *chunk =
+ (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+ arena_chunk_map_misc_t *miscelm =
+ arena_rd_to_miscelm(rdelm);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
+ arena_run_t *run = &miscelm->run;
size_t run_size =
arena_mapbits_unallocated_size_get(chunk, pageind);
npages = run_size >> LG_PAGE;
+
assert(pageind + npages <= chunk_npages);
assert(arena_mapbits_dirty_get(chunk, pageind) ==
arena_mapbits_dirty_get(chunk, pageind+npages-1));
- if (arena_mapbits_dirty_get(chunk, pageind) != 0 &&
- (all || arena_avail_adjac(chunk, pageind,
- npages))) {
- arena_run_t *run = (arena_run_t *)((uintptr_t)
- chunk + (uintptr_t)(pageind << LG_PAGE));
-
- arena_run_split_large(arena, run, run_size,
- false);
- /* Append to list for later processing. */
- ql_elm_new(mapelm, u.ql_link);
- ql_tail_insert(mapelms, mapelm, u.ql_link);
- }
- } else {
- /* Skip run. */
- if (arena_mapbits_large_get(chunk, pageind) != 0) {
- npages = arena_mapbits_large_size_get(chunk,
- pageind) >> LG_PAGE;
- } else {
- size_t binind;
- arena_bin_info_t *bin_info;
- arena_run_t *run = (arena_run_t *)((uintptr_t)
- chunk + (uintptr_t)(pageind << LG_PAGE));
-
- assert(arena_mapbits_small_runind_get(chunk,
- pageind) == 0);
- binind = arena_bin_index(arena, run->bin);
- bin_info = &arena_bin_info[binind];
- npages = bin_info->run_size >> LG_PAGE;
+ /*
+ * If purging the spare chunk's run, make it available
+ * prior to allocation.
+ */
+ if (chunk == arena->spare)
+ arena_chunk_alloc(arena);
+
+ /* Temporarily allocate the free dirty run. */
+ arena_run_split_large(arena, run, run_size, false);
+ /* Stash. */
+ if (false)
+ qr_new(rdelm, rd_link); /* Redundant. */
+ else {
+ assert(qr_next(rdelm, rd_link) == rdelm);
+ assert(qr_prev(rdelm, rd_link) == rdelm);
}
+ qr_meld(purge_runs_sentinel, rdelm, rd_link);
}
+
+ nstashed += npages;
+ if (!all && nstashed >= npurge)
+ break;
}
- assert(pageind == chunk_npages);
- assert(chunk->ndirty == 0 || all == false);
- assert(chunk->nruns_adjac == 0);
+
+ return (nstashed);
}
static size_t
-arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk,
- arena_chunk_mapelms_t *mapelms)
+arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ arena_runs_dirty_link_t *purge_runs_sentinel,
+ extent_node_t *purge_chunks_sentinel)
{
- size_t npurged, pageind, npages, nmadvise;
- arena_chunk_map_t *mapelm;
+ size_t npurged, nmadvise;
+ arena_runs_dirty_link_t *rdelm;
+ extent_node_t *chunkselm;
- malloc_mutex_unlock(&arena->lock);
if (config_stats)
nmadvise = 0;
npurged = 0;
- ql_foreach(mapelm, mapelms, u.ql_link) {
- bool unzeroed;
- size_t flag_unzeroed, i;
-
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t)) + map_bias;
- npages = arena_mapbits_large_size_get(chunk, pageind) >>
- LG_PAGE;
- assert(pageind + npages <= chunk_npages);
- unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
- LG_PAGE)), (npages << LG_PAGE));
- flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
- /*
- * Set the unzeroed flag for all pages, now that pages_purge()
- * has returned whether the pages were zeroed as a side effect
- * of purging. This chunk map modification is safe even though
- * the arena mutex isn't currently owned by this thread,
- * because the run is marked as allocated, thus protecting it
- * from being modified by any other thread. As long as these
- * writes don't perturb the first and last elements'
- * CHUNK_MAP_ALLOCATED bits, behavior is well defined.
- */
- for (i = 0; i < npages; i++) {
- arena_mapbits_unzeroed_set(chunk, pageind+i,
- flag_unzeroed);
+
+ malloc_mutex_unlock(&arena->lock);
+ for (rdelm = qr_next(purge_runs_sentinel, rd_link),
+ chunkselm = qr_next(purge_chunks_sentinel, cc_link);
+ rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
+ size_t npages;
+
+ if (rdelm == &chunkselm->rd) {
+ /*
+ * Don't actually purge the chunk here because 1)
+ * chunkselm is embedded in the chunk and must remain
+ * valid, and 2) we deallocate the chunk in
+ * arena_unstash_purged(), where it is destroyed,
+ * decommitted, or purged, depending on chunk
+ * deallocation policy.
+ */
+ size_t size = extent_node_size_get(chunkselm);
+ npages = size >> LG_PAGE;
+ chunkselm = qr_next(chunkselm, cc_link);
+ } else {
+ size_t pageind, run_size, flag_unzeroed, flags, i;
+ bool decommitted;
+ arena_chunk_t *chunk =
+ (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+ arena_chunk_map_misc_t *miscelm =
+ arena_rd_to_miscelm(rdelm);
+ pageind = arena_miscelm_to_pageind(miscelm);
+ run_size = arena_mapbits_large_size_get(chunk, pageind);
+ npages = run_size >> LG_PAGE;
+
+ assert(pageind + npages <= chunk_npages);
+ assert(!arena_mapbits_decommitted_get(chunk, pageind));
+ assert(!arena_mapbits_decommitted_get(chunk,
+ pageind+npages-1));
+ decommitted = !chunk_hooks->decommit(chunk, chunksize,
+ pageind << LG_PAGE, npages << LG_PAGE, arena->ind);
+ if (decommitted) {
+ flag_unzeroed = 0;
+ flags = CHUNK_MAP_DECOMMITTED;
+ } else {
+ flag_unzeroed = chunk_purge_wrapper(arena,
+ chunk_hooks, chunk, chunksize, pageind <<
+ LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
+ flags = flag_unzeroed;
+ }
+ arena_mapbits_large_set(chunk, pageind+npages-1, 0,
+ flags);
+ arena_mapbits_large_set(chunk, pageind, run_size,
+ flags);
+
+ /*
+ * Set the unzeroed flag for internal pages, now that
+ * chunk_purge_wrapper() has returned whether the pages
+ * were zeroed as a side effect of purging. This chunk
+ * map modification is safe even though the arena mutex
+ * isn't currently owned by this thread, because the run
+ * is marked as allocated, thus protecting it from being
+ * modified by any other thread. As long as these
+ * writes don't perturb the first and last elements'
+ * CHUNK_MAP_ALLOCATED bits, behavior is well defined.
+ */
+ for (i = 1; i < npages-1; i++) {
+ arena_mapbits_internal_set(chunk, pageind+i,
+ flag_unzeroed);
+ }
}
+
npurged += npages;
if (config_stats)
nmadvise++;
}
malloc_mutex_lock(&arena->lock);
- if (config_stats)
+
+ if (config_stats) {
arena->stats.nmadvise += nmadvise;
+ arena->stats.purged += npurged;
+ }
return (npurged);
}
static void
-arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk,
- arena_chunk_mapelms_t *mapelms)
-{
- arena_chunk_map_t *mapelm;
- size_t pageind;
-
- /* Deallocate runs. */
- for (mapelm = ql_first(mapelms); mapelm != NULL;
- mapelm = ql_first(mapelms)) {
- arena_run_t *run;
-
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t)) + map_bias;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind <<
- LG_PAGE));
- ql_remove(mapelms, mapelm, u.ql_link);
- arena_run_dalloc(arena, run, false, true);
+arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ arena_runs_dirty_link_t *purge_runs_sentinel,
+ extent_node_t *purge_chunks_sentinel)
+{
+ arena_runs_dirty_link_t *rdelm, *rdelm_next;
+ extent_node_t *chunkselm;
+
+ /* Deallocate chunks/runs. */
+ for (rdelm = qr_next(purge_runs_sentinel, rd_link),
+ chunkselm = qr_next(purge_chunks_sentinel, cc_link);
+ rdelm != purge_runs_sentinel; rdelm = rdelm_next) {
+ rdelm_next = qr_next(rdelm, rd_link);
+ if (rdelm == &chunkselm->rd) {
+ extent_node_t *chunkselm_next = qr_next(chunkselm,
+ cc_link);
+ void *addr = extent_node_addr_get(chunkselm);
+ size_t size = extent_node_size_get(chunkselm);
+ bool zeroed = extent_node_zeroed_get(chunkselm);
+ bool committed = extent_node_committed_get(chunkselm);
+ extent_node_dirty_remove(chunkselm);
+ arena_node_dalloc(arena, chunkselm);
+ chunkselm = chunkselm_next;
+ chunk_dalloc_arena(arena, chunk_hooks, addr, size,
+ zeroed, committed);
+ } else {
+ arena_chunk_t *chunk =
+ (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+ arena_chunk_map_misc_t *miscelm =
+ arena_rd_to_miscelm(rdelm);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
+ bool decommitted = (arena_mapbits_decommitted_get(chunk,
+ pageind) != 0);
+ arena_run_t *run = &miscelm->run;
+ qr_remove(rdelm, rd_link);
+ arena_run_dalloc(arena, run, false, true, decommitted);
+ }
}
}
-static inline size_t
-arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all)
+static void
+arena_purge(arena_t *arena, bool all)
{
- size_t npurged;
- arena_chunk_mapelms_t mapelms;
+ chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+ size_t npurge, npurgeable, npurged;
+ arena_runs_dirty_link_t purge_runs_sentinel;
+ extent_node_t purge_chunks_sentinel;
- ql_new(&mapelms);
-
- /*
- * If chunk is the spare, temporarily re-allocate it, 1) so that its
- * run is reinserted into runs_avail, and 2) so that it cannot be
- * completely discarded by another thread while arena->lock is dropped
- * by this thread. Note that the arena_run_dalloc() call will
- * implicitly deallocate the chunk, so no explicit action is required
- * in this function to deallocate the chunk.
- *
- * Note that once a chunk contains dirty pages, it cannot again contain
- * a single run unless 1) it is a dirty run, or 2) this function purges
- * dirty pages and causes the transition to a single clean run. Thus
- * (chunk == arena->spare) is possible, but it is not possible for
- * this function to be called on the spare unless it contains a dirty
- * run.
- */
- if (chunk == arena->spare) {
- assert(arena_mapbits_dirty_get(chunk, map_bias) != 0);
- assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0);
-
- arena_chunk_alloc(arena);
- }
-
- if (config_stats)
- arena->stats.purged += chunk->ndirty;
+ arena->purging = true;
/*
- * Operate on all dirty runs if there is no clean/dirty run
- * fragmentation.
+ * Calls to arena_dirty_count() are disabled even for debug builds
+ * because overhead grows nonlinearly as memory usage increases.
*/
- if (chunk->nruns_adjac == 0)
- all = true;
-
- arena_chunk_stash_dirty(arena, chunk, all, &mapelms);
- npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms);
- arena_chunk_unstash_purged(arena, chunk, &mapelms);
-
- return (npurged);
-}
-
-static void
-arena_purge(arena_t *arena, bool all)
-{
- arena_chunk_t *chunk;
- size_t npurgatory;
- if (config_debug) {
- size_t ndirty = 0;
-
- arena_chunk_dirty_iter(&arena->chunks_dirty, NULL,
- chunks_dirty_iter_cb, (void *)&ndirty);
+ if (false && config_debug) {
+ size_t ndirty = arena_dirty_count(arena);
assert(ndirty == arena->ndirty);
}
- assert(arena->ndirty > arena->npurgatory || all);
- assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
- arena->npurgatory) || all);
+ assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all);
if (config_stats)
arena->stats.npurge++;
- /*
- * Add the minimum number of pages this thread should try to purge to
- * arena->npurgatory. This will keep multiple threads from racing to
- * reduce ndirty below the threshold.
- */
- npurgatory = arena_compute_npurgatory(arena, all);
- arena->npurgatory += npurgatory;
-
- while (npurgatory > 0) {
- size_t npurgeable, npurged, nunpurged;
-
- /* Get next chunk with dirty pages. */
- chunk = arena_chunk_dirty_first(&arena->chunks_dirty);
- if (chunk == NULL) {
- /*
- * This thread was unable to purge as many pages as
- * originally intended, due to races with other threads
- * that either did some of the purging work, or re-used
- * dirty pages.
- */
- arena->npurgatory -= npurgatory;
- return;
- }
- npurgeable = chunk->ndirty;
- assert(npurgeable != 0);
+ npurge = arena_compute_npurge(arena, all);
+ qr_new(&purge_runs_sentinel, rd_link);
+ extent_node_dirty_linkage_init(&purge_chunks_sentinel);
- if (npurgeable > npurgatory && chunk->nruns_adjac == 0) {
- /*
- * This thread will purge all the dirty pages in chunk,
- * so set npurgatory to reflect this thread's intent to
- * purge the pages. This tends to reduce the chances
- * of the following scenario:
- *
- * 1) This thread sets arena->npurgatory such that
- * (arena->ndirty - arena->npurgatory) is at the
- * threshold.
- * 2) This thread drops arena->lock.
- * 3) Another thread causes one or more pages to be
- * dirtied, and immediately determines that it must
- * purge dirty pages.
- *
- * If this scenario *does* play out, that's okay,
- * because all of the purging work being done really
- * needs to happen.
- */
- arena->npurgatory += npurgeable - npurgatory;
- npurgatory = npurgeable;
- }
+ npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge,
+ &purge_runs_sentinel, &purge_chunks_sentinel);
+ assert(npurgeable >= npurge);
+ npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
+ &purge_chunks_sentinel);
+ assert(npurged == npurgeable);
+ arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+ &purge_chunks_sentinel);
- /*
- * Keep track of how many pages are purgeable, versus how many
- * actually get purged, and adjust counters accordingly.
- */
- arena->npurgatory -= npurgeable;
- npurgatory -= npurgeable;
- npurged = arena_chunk_purge(arena, chunk, all);
- nunpurged = npurgeable - npurged;
- arena->npurgatory += nunpurged;
- npurgatory += nunpurged;
- }
+ arena->purging = false;
}
void
@@ -1090,7 +1549,8 @@ arena_purge_all(arena_t *arena)
static void
arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
- size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty)
+ size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
+ size_t flag_decommitted)
{
size_t size = *p_size;
size_t run_ind = *p_run_ind;
@@ -1099,7 +1559,9 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
/* Try to coalesce forward. */
if (run_ind + run_pages < chunk_npages &&
arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
- arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
+ arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty &&
+ arena_mapbits_decommitted_get(chunk, run_ind+run_pages) ==
+ flag_decommitted) {
size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
run_ind+run_pages);
size_t nrun_pages = nrun_size >> LG_PAGE;
@@ -1112,8 +1574,18 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
run_ind+run_pages+nrun_pages-1) == nrun_size);
assert(arena_mapbits_dirty_get(chunk,
run_ind+run_pages+nrun_pages-1) == flag_dirty);
- arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages,
- false, true);
+ assert(arena_mapbits_decommitted_get(chunk,
+ run_ind+run_pages+nrun_pages-1) == flag_decommitted);
+ arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages);
+
+ /*
+ * If the successor is dirty, remove it from the set of dirty
+ * pages.
+ */
+ if (flag_dirty != 0) {
+ arena_run_dirty_remove(arena, chunk, run_ind+run_pages,
+ nrun_pages);
+ }
size += nrun_size;
run_pages += nrun_pages;
@@ -1126,7 +1598,8 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
/* Try to coalesce backward. */
if (run_ind > map_bias && arena_mapbits_allocated_get(chunk,
run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) ==
- flag_dirty) {
+ flag_dirty && arena_mapbits_decommitted_get(chunk, run_ind-1) ==
+ flag_decommitted) {
size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
run_ind-1);
size_t prun_pages = prun_size >> LG_PAGE;
@@ -1140,8 +1613,18 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
prun_size);
assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
- arena_avail_remove(arena, chunk, run_ind, prun_pages, true,
- false);
+ assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
+ flag_decommitted);
+ arena_avail_remove(arena, chunk, run_ind, prun_pages);
+
+ /*
+ * If the predecessor is dirty, remove it from the set of dirty
+ * pages.
+ */
+ if (flag_dirty != 0) {
+ arena_run_dirty_remove(arena, chunk, run_ind,
+ prun_pages);
+ }
size += prun_size;
run_pages += prun_pages;
@@ -1156,26 +1639,53 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
*p_run_pages = run_pages;
}
-static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
+static size_t
+arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+ size_t run_ind)
{
- arena_chunk_t *chunk;
- size_t size, run_ind, run_pages, flag_dirty;
+ size_t size;
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
assert(run_ind >= map_bias);
assert(run_ind < chunk_npages);
+
if (arena_mapbits_large_get(chunk, run_ind) != 0) {
size = arena_mapbits_large_size_get(chunk, run_ind);
- assert(size == PAGE ||
- arena_mapbits_large_size_get(chunk,
+ assert(size == PAGE || arena_mapbits_large_size_get(chunk,
run_ind+(size>>LG_PAGE)-1) == 0);
} else {
- size_t binind = arena_bin_index(arena, run->bin);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
+ arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
size = bin_info->run_size;
}
+
+ return (size);
+}
+
+static bool
+arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run)
+{
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+ size_t run_ind = arena_miscelm_to_pageind(miscelm);
+ size_t offset = run_ind << LG_PAGE;
+ size_t length = arena_run_size_get(arena, chunk, run, run_ind);
+
+ return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length,
+ arena->ind));
+}
+
+static void
+arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
+ bool decommitted)
+{
+ arena_chunk_t *chunk;
+ arena_chunk_map_misc_t *miscelm;
+ size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ miscelm = arena_run_to_miscelm(run);
+ run_ind = arena_miscelm_to_pageind(miscelm);
+ assert(run_ind >= map_bias);
+ assert(run_ind < chunk_npages);
+ size = arena_run_size_get(arena, chunk, run, run_ind);
run_pages = (size >> LG_PAGE);
arena_cactive_update(arena, 0, run_pages);
arena->nactive -= run_pages;
@@ -1187,16 +1697,18 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
*/
assert(arena_mapbits_dirty_get(chunk, run_ind) ==
arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
- if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0)
+ if (!cleaned && !decommitted && arena_mapbits_dirty_get(chunk, run_ind)
+ != 0)
dirty = true;
flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
+ flag_decommitted = decommitted ? CHUNK_MAP_DECOMMITTED : 0;
/* Mark pages as unallocated in the chunk map. */
- if (dirty) {
- arena_mapbits_unallocated_set(chunk, run_ind, size,
- CHUNK_MAP_DIRTY);
+ if (dirty || decommitted) {
+ size_t flags = flag_dirty | flag_decommitted;
+ arena_mapbits_unallocated_set(chunk, run_ind, size, flags);
arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
- CHUNK_MAP_DIRTY);
+ flags);
} else {
arena_mapbits_unallocated_set(chunk, run_ind, size,
arena_mapbits_unzeroed_get(chunk, run_ind));
@@ -1205,20 +1717,25 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
}
arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages,
- flag_dirty);
+ flag_dirty, flag_decommitted);
/* Insert into runs_avail, now that coalescing is complete. */
assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
assert(arena_mapbits_dirty_get(chunk, run_ind) ==
arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
- arena_avail_insert(arena, chunk, run_ind, run_pages, true, true);
+ assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
+ arena_mapbits_decommitted_get(chunk, run_ind+run_pages-1));
+ arena_avail_insert(arena, chunk, run_ind, run_pages);
+
+ if (dirty)
+ arena_run_dirty_insert(arena, chunk, run_ind, run_pages);
/* Deallocate chunk if it is now completely unused. */
- if (size == arena_maxclass) {
+ if (size == arena_maxrun) {
assert(run_ind == map_bias);
- assert(run_pages == (arena_maxclass >> LG_PAGE));
- arena_chunk_dealloc(arena, chunk);
+ assert(run_pages == (arena_maxrun >> LG_PAGE));
+ arena_chunk_dalloc(arena, chunk);
}
/*
@@ -1233,12 +1750,25 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
}
static void
+arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk,
+ arena_run_t *run)
+{
+ bool committed = arena_run_decommit(arena, chunk, run);
+
+ arena_run_dalloc(arena, run, committed, false, !committed);
+}
+
+static void
arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
size_t oldsize, size_t newsize)
{
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
size_t head_npages = (oldsize - newsize) >> LG_PAGE;
size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
+ size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
+ size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
+ CHUNK_MAP_UNZEROED : 0;
assert(oldsize > newsize);
@@ -1248,8 +1778,11 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
* run first, in case of single-page runs.
*/
assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
- arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty);
+ arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ pageind+head_npages-1)));
+ arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
if (config_debug) {
UNUSED size_t tail_npages = newsize >> LG_PAGE;
@@ -1259,18 +1792,25 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
pageind+head_npages+tail_npages-1) == flag_dirty);
}
arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
- flag_dirty);
+ flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ pageind+head_npages)));
- arena_run_dalloc(arena, run, false, false);
+ arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0));
}
static void
arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
size_t oldsize, size_t newsize, bool dirty)
{
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
size_t head_npages = newsize >> LG_PAGE;
size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
+ size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
+ size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
+ CHUNK_MAP_UNZEROED : 0;
+ arena_chunk_map_misc_t *tail_miscelm;
+ arena_run_t *tail_run;
assert(oldsize > newsize);
@@ -1280,8 +1820,11 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
* run first, in case of single-page runs.
*/
assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
- arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty);
+ arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ pageind+head_npages-1)));
+ arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
if (config_debug) {
UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
@@ -1291,29 +1834,21 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
pageind+head_npages+tail_npages-1) == flag_dirty);
}
arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
- flag_dirty);
+ flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ pageind+head_npages)));
- arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
- dirty, false);
+ tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages);
+ tail_run = &tail_miscelm->run;
+ arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
+ 0));
}
static arena_run_t *
arena_bin_runs_first(arena_bin_t *bin)
{
- arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs);
- if (mapelm != NULL) {
- arena_chunk_t *chunk;
- size_t pageind;
- arena_run_t *run;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
- pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t))) + map_bias;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) <<
- LG_PAGE));
- return (run);
- }
+ arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
+ if (miscelm != NULL)
+ return (&miscelm->run);
return (NULL);
}
@@ -1321,25 +1856,21 @@ arena_bin_runs_first(arena_bin_t *bin)
static void
arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
{
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
- assert(arena_run_tree_search(&bin->runs, mapelm) == NULL);
+ assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
- arena_run_tree_insert(&bin->runs, mapelm);
+ arena_run_tree_insert(&bin->runs, miscelm);
}
static void
arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
{
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
- assert(arena_run_tree_search(&bin->runs, mapelm) != NULL);
+ assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
- arena_run_tree_remove(&bin->runs, mapelm);
+ arena_run_tree_remove(&bin->runs, miscelm);
}
static arena_run_t *
@@ -1358,7 +1889,7 @@ static arena_run_t *
arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
{
arena_run_t *run;
- size_t binind;
+ szind_t binind;
arena_bin_info_t *bin_info;
/* Look for a usable run. */
@@ -1376,14 +1907,10 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc_small(arena, bin_info->run_size, binind);
if (run != NULL) {
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
-
/* Initialize run internals. */
- run->bin = bin;
- run->nextind = 0;
+ run->binind = binind;
run->nfree = bin_info->nregs;
- bitmap_init(bitmap, &bin_info->bitmap_info);
+ bitmap_init(run->bitmap, &bin_info->bitmap_info);
}
malloc_mutex_unlock(&arena->lock);
/********************************/
@@ -1412,8 +1939,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
static void *
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
{
- void *ret;
- size_t binind;
+ szind_t binind;
arena_bin_info_t *bin_info;
arena_run_t *run;
@@ -1426,6 +1952,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
* Another thread updated runcur while this one ran without the
* bin lock in arena_bin_nonfull_run_get().
*/
+ void *ret;
assert(bin->runcur->nfree > 0);
ret = arena_run_reg_alloc(bin->runcur, bin_info);
if (run != NULL) {
@@ -1459,13 +1986,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
}
void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
uint64_t prof_accumbytes)
{
unsigned i, nfill;
arena_bin_t *bin;
- arena_run_t *run;
- void *ptr;
assert(tbin->ncached == 0);
@@ -1475,13 +2000,26 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
malloc_mutex_lock(&bin->lock);
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
tbin->lg_fill_div); i < nfill; i++) {
+ arena_run_t *run;
+ void *ptr;
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ptr = arena_bin_malloc_hard(arena, bin);
- if (ptr == NULL)
+ if (ptr == NULL) {
+ /*
+ * OOM. tbin->avail isn't yet filled down to its first
+ * element, so the successful allocations (if any) must
+ * be moved to the base of tbin->avail before bailing
+ * out.
+ */
+ if (i > 0) {
+ memmove(tbin->avail, &tbin->avail[nfill - i],
+ i * sizeof(void *));
+ }
break;
- if (config_fill && opt_junk) {
+ }
+ if (config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ptr, &arena_bin_info[binind],
true);
}
@@ -1489,9 +2027,9 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
tbin->avail[nfill - 1 - i] = ptr;
}
if (config_stats) {
- bin->stats.allocated += i * arena_bin_info[binind].reg_size;
bin->stats.nmalloc += i;
bin->stats.nrequests += tbin->tstats.nrequests;
+ bin->stats.curregs += i;
bin->stats.nfills++;
tbin->tstats.nrequests = 0;
}
@@ -1538,29 +2076,35 @@ arena_redzone_corruption_t *arena_redzone_corruption =
static void
arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
{
- size_t size = bin_info->reg_size;
- size_t redzone_size = bin_info->redzone_size;
- size_t i;
bool error = false;
- for (i = 1; i <= redzone_size; i++) {
- uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
- if (*byte != 0xa5) {
- error = true;
- arena_redzone_corruption(ptr, size, false, i, *byte);
- if (reset)
- *byte = 0xa5;
+ if (opt_junk_alloc) {
+ size_t size = bin_info->reg_size;
+ size_t redzone_size = bin_info->redzone_size;
+ size_t i;
+
+ for (i = 1; i <= redzone_size; i++) {
+ uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
+ if (*byte != 0xa5) {
+ error = true;
+ arena_redzone_corruption(ptr, size, false, i,
+ *byte);
+ if (reset)
+ *byte = 0xa5;
+ }
}
- }
- for (i = 0; i < redzone_size; i++) {
- uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
- if (*byte != 0xa5) {
- error = true;
- arena_redzone_corruption(ptr, size, true, i, *byte);
- if (reset)
- *byte = 0xa5;
+ for (i = 0; i < redzone_size; i++) {
+ uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
+ if (*byte != 0xa5) {
+ error = true;
+ arena_redzone_corruption(ptr, size, true, i,
+ *byte);
+ if (reset)
+ *byte = 0xa5;
+ }
}
}
+
if (opt_abort && error)
abort();
}
@@ -1588,14 +2132,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small =
void
arena_quarantine_junk_small(void *ptr, size_t usize)
{
- size_t binind;
+ szind_t binind;
arena_bin_info_t *bin_info;
cassert(config_fill);
- assert(opt_junk);
+ assert(opt_junk_free);
assert(opt_quarantine);
assert(usize <= SMALL_MAXCLASS);
- binind = SMALL_SIZE2BIN(usize);
+ binind = size2index(usize);
bin_info = &arena_bin_info[binind];
arena_redzones_validate(ptr, bin_info, true);
}
@@ -1606,12 +2150,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
void *ret;
arena_bin_t *bin;
arena_run_t *run;
- size_t binind;
+ szind_t binind;
- binind = SMALL_SIZE2BIN(size);
+ binind = size2index(size);
assert(binind < NBINS);
bin = &arena->bins[binind];
- size = arena_bin_info[binind].reg_size;
+ size = index2size(binind);
malloc_mutex_lock(&bin->lock);
if ((run = bin->runcur) != NULL && run->nfree > 0)
@@ -1625,29 +2169,29 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
}
if (config_stats) {
- bin->stats.allocated += size;
bin->stats.nmalloc++;
bin->stats.nrequests++;
+ bin->stats.curregs++;
}
malloc_mutex_unlock(&bin->lock);
- if (config_prof && isthreaded == false && arena_prof_accum(arena, size))
+ if (config_prof && !isthreaded && arena_prof_accum(arena, size))
prof_idump();
- if (zero == false) {
+ if (!zero) {
if (config_fill) {
- if (opt_junk) {
+ if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
&arena_bin_info[binind], false);
- } else if (opt_zero)
+ } else if (unlikely(opt_zero))
memset(ret, 0, size);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
} else {
- if (config_fill && opt_junk) {
+ if (config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret, &arena_bin_info[binind],
true);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
memset(ret, 0, size);
}
@@ -1658,36 +2202,59 @@ void *
arena_malloc_large(arena_t *arena, size_t size, bool zero)
{
void *ret;
+ size_t usize;
+ uintptr_t random_offset;
+ arena_run_t *run;
+ arena_chunk_map_misc_t *miscelm;
UNUSED bool idump;
/* Large allocation. */
- size = PAGE_CEILING(size);
+ usize = s2u(size);
malloc_mutex_lock(&arena->lock);
- ret = (void *)arena_run_alloc_large(arena, size, zero);
- if (ret == NULL) {
+ if (config_cache_oblivious) {
+ uint64_t r;
+
+ /*
+ * Compute a uniformly distributed offset within the first page
+ * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
+ * for 4 KiB pages and 64-byte cachelines.
+ */
+ prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state,
+ UINT64_C(6364136223846793009),
+ UINT64_C(1442695040888963409));
+ random_offset = ((uintptr_t)r) << LG_CACHELINE;
+ } else
+ random_offset = 0;
+ run = arena_run_alloc_large(arena, usize + large_pad, zero);
+ if (run == NULL) {
malloc_mutex_unlock(&arena->lock);
return (NULL);
}
+ miscelm = arena_run_to_miscelm(run);
+ ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
+ random_offset);
if (config_stats) {
+ szind_t index = size2index(usize) - NBINS;
+
arena->stats.nmalloc_large++;
arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+ arena->stats.allocated_large += usize;
+ arena->stats.lstats[index].nmalloc++;
+ arena->stats.lstats[index].nrequests++;
+ arena->stats.lstats[index].curruns++;
}
if (config_prof)
- idump = arena_prof_accum_locked(arena, size);
+ idump = arena_prof_accum_locked(arena, usize);
malloc_mutex_unlock(&arena->lock);
if (config_prof && idump)
prof_idump();
- if (zero == false) {
+ if (!zero) {
if (config_fill) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
+ if (unlikely(opt_junk_alloc))
+ memset(ret, 0xa5, usize);
+ else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
}
}
@@ -1695,18 +2262,25 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
}
/* Only handles large allocations that require more than page alignment. */
-void *
-arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
+static void *
+arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+ bool zero)
{
void *ret;
size_t alloc_size, leadsize, trailsize;
arena_run_t *run;
arena_chunk_t *chunk;
+ arena_chunk_map_misc_t *miscelm;
+ void *rpages;
+
+ assert(usize == PAGE_CEILING(usize));
- assert((size & PAGE_MASK) == 0);
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
alignment = PAGE_CEILING(alignment);
- alloc_size = size + alignment - PAGE;
+ alloc_size = usize + large_pad + alignment - PAGE;
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc_large(arena, alloc_size, false);
@@ -1715,37 +2289,94 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
return (NULL);
}
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ miscelm = arena_run_to_miscelm(run);
+ rpages = arena_miscelm_to_rpages(miscelm);
- leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) -
- (uintptr_t)run;
- assert(alloc_size >= leadsize + size);
- trailsize = alloc_size - leadsize - size;
- ret = (void *)((uintptr_t)run + leadsize);
+ leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) -
+ (uintptr_t)rpages;
+ assert(alloc_size >= leadsize + usize);
+ trailsize = alloc_size - leadsize - usize - large_pad;
if (leadsize != 0) {
- arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size -
- leadsize);
+ arena_chunk_map_misc_t *head_miscelm = miscelm;
+ arena_run_t *head_run = run;
+
+ miscelm = arena_miscelm_get(chunk,
+ arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
+ LG_PAGE));
+ run = &miscelm->run;
+
+ arena_run_trim_head(arena, chunk, head_run, alloc_size,
+ alloc_size - leadsize);
}
if (trailsize != 0) {
- arena_run_trim_tail(arena, chunk, ret, size + trailsize, size,
- false);
+ arena_run_trim_tail(arena, chunk, run, usize + large_pad +
+ trailsize, usize + large_pad, false);
+ }
+ if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
+ size_t run_ind =
+ arena_miscelm_to_pageind(arena_run_to_miscelm(run));
+ bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
+ bool decommitted = (arena_mapbits_decommitted_get(chunk,
+ run_ind) != 0);
+
+ assert(decommitted); /* Cause of OOM. */
+ arena_run_dalloc(arena, run, dirty, false, decommitted);
+ malloc_mutex_unlock(&arena->lock);
+ return (NULL);
}
- arena_run_init_large(arena, (arena_run_t *)ret, size, zero);
+ ret = arena_miscelm_to_rpages(miscelm);
if (config_stats) {
+ szind_t index = size2index(usize) - NBINS;
+
arena->stats.nmalloc_large++;
arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+ arena->stats.allocated_large += usize;
+ arena->stats.lstats[index].nmalloc++;
+ arena->stats.lstats[index].nrequests++;
+ arena->stats.lstats[index].curruns++;
}
malloc_mutex_unlock(&arena->lock);
- if (config_fill && zero == false) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
+ if (config_fill && !zero) {
+ if (unlikely(opt_junk_alloc))
+ memset(ret, 0xa5, usize);
+ else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
+ }
+ return (ret);
+}
+
+void *
+arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+ bool zero, tcache_t *tcache)
+{
+ void *ret;
+
+ if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
+ && (usize & PAGE_MASK) == 0))) {
+ /* Small; alignment doesn't require special run placement. */
+ ret = arena_malloc(tsd, arena, usize, zero, tcache);
+ } else if (usize <= large_maxclass && alignment <= PAGE) {
+ /*
+ * Large; alignment doesn't require special run placement.
+ * However, the cached pointer may be at a random offset from
+ * the base of the run, so do some bit manipulation to retrieve
+ * the base.
+ */
+ ret = arena_malloc(tsd, arena, usize, zero, tcache);
+ if (config_cache_oblivious)
+ ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
+ } else {
+ if (likely(usize <= large_maxclass)) {
+ ret = arena_palloc_large(tsd, arena, usize, alignment,
+ zero);
+ } else if (likely(alignment <= chunksize))
+ ret = huge_malloc(tsd, arena, usize, zero, tcache);
+ else {
+ ret = huge_palloc(tsd, arena, usize, alignment, zero,
+ tcache);
+ }
}
return (ret);
}
@@ -1754,22 +2385,23 @@ void
arena_prof_promoted(const void *ptr, size_t size)
{
arena_chunk_t *chunk;
- size_t pageind, binind;
+ size_t pageind;
+ szind_t binind;
cassert(config_prof);
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
- assert(isalloc(ptr, false) == PAGE);
- assert(isalloc(ptr, true) == PAGE);
+ assert(isalloc(ptr, false) == LARGE_MINCLASS);
+ assert(isalloc(ptr, true) == LARGE_MINCLASS);
assert(size <= SMALL_MAXCLASS);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- binind = SMALL_SIZE2BIN(size);
+ binind = size2index(size);
assert(binind < NBINS);
arena_mapbits_large_binind_set(chunk, pageind, binind);
- assert(isalloc(ptr, false) == PAGE);
+ assert(isalloc(ptr, false) == LARGE_MINCLASS);
assert(isalloc(ptr, true) == size);
}
@@ -1782,7 +2414,8 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
if (run == bin->runcur)
bin->runcur = NULL;
else {
- size_t binind = arena_bin_index(chunk->arena, bin);
+ szind_t binind = arena_bin_index(extent_node_arena_get(
+ &chunk->node), bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
if (bin_info->nregs != 1) {
@@ -1800,46 +2433,15 @@ static void
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin)
{
- size_t binind;
- arena_bin_info_t *bin_info;
- size_t npages, run_ind, past;
assert(run != bin->runcur);
- assert(arena_run_tree_search(&bin->runs,
- arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE))
- == NULL);
-
- binind = arena_bin_index(chunk->arena, run->bin);
- bin_info = &arena_bin_info[binind];
+ assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) ==
+ NULL);
malloc_mutex_unlock(&bin->lock);
/******************************/
- npages = bin_info->run_size >> LG_PAGE;
- run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
- past = (size_t)(PAGE_CEILING((uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
- bin_info->reg_interval - bin_info->redzone_size) -
- (uintptr_t)chunk) >> LG_PAGE);
malloc_mutex_lock(&arena->lock);
-
- /*
- * If the run was originally clean, and some pages were never touched,
- * trim the clean pages before deallocating the dirty portion of the
- * run.
- */
- assert(arena_mapbits_dirty_get(chunk, run_ind) ==
- arena_mapbits_dirty_get(chunk, run_ind+npages-1));
- if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
- npages) {
- /* Trim clean pages. Convert to large run beforehand. */
- assert(npages > 0);
- arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0);
- arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0);
- arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
- ((past - run_ind) << LG_PAGE), false);
- /* npages = past - run_ind; */
- }
- arena_run_dalloc(arena, run, true, false);
+ arena_run_dalloc_decommit(arena, chunk, run);
malloc_mutex_unlock(&arena->lock);
/****************************/
malloc_mutex_lock(&bin->lock);
@@ -1868,26 +2470,24 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_bin_runs_insert(bin, run);
}
-void
-arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- arena_chunk_map_t *mapelm)
+static void
+arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ arena_chunk_map_bits_t *bitselm, bool junked)
{
- size_t pageind;
+ size_t pageind, rpages_ind;
arena_run_t *run;
arena_bin_t *bin;
arena_bin_info_t *bin_info;
- size_t size, binind;
+ szind_t binind;
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
- bin = run->bin;
- binind = arena_ptr_small_binind_get(ptr, mapelm->bits);
+ rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
+ run = &arena_miscelm_get(chunk, rpages_ind)->run;
+ binind = run->binind;
+ bin = &arena->bins[binind];
bin_info = &arena_bin_info[binind];
- if (config_fill || config_stats)
- size = bin_info->reg_size;
- if (config_fill && opt_junk)
+ if (!junked && config_fill && unlikely(opt_junk_free))
arena_dalloc_junk_small(ptr, bin_info);
arena_run_reg_dalloc(run, ptr);
@@ -1898,23 +2498,32 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_bin_lower_run(arena, chunk, run, bin);
if (config_stats) {
- bin->stats.allocated -= size;
bin->stats.ndalloc++;
+ bin->stats.curregs--;
}
}
void
+arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+ arena_chunk_map_bits_t *bitselm)
+{
+
+ arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+}
+
+void
arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind, arena_chunk_map_t *mapelm)
+ size_t pageind, arena_chunk_map_bits_t *bitselm)
{
arena_run_t *run;
arena_bin_t *bin;
+ size_t rpages_ind;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
- bin = run->bin;
+ rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
+ run = &arena_miscelm_get(chunk, rpages_ind)->run;
+ bin = &arena->bins[run->binind];
malloc_mutex_lock(&bin->lock);
- arena_dalloc_bin_locked(arena, chunk, ptr, mapelm);
+ arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
malloc_mutex_unlock(&bin->lock);
}
@@ -1922,26 +2531,26 @@ void
arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind)
{
- arena_chunk_map_t *mapelm;
+ arena_chunk_map_bits_t *bitselm;
if (config_debug) {
/* arena_ptr_small_binind_get() does extra sanity checking. */
assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
pageind)) != BININD_INVALID);
}
- mapelm = arena_mapp_get(chunk, pageind);
- arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm);
+ bitselm = arena_bitselm_get(chunk, pageind);
+ arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
}
#ifdef JEMALLOC_JET
#undef arena_dalloc_junk_large
#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
#endif
-static void
+void
arena_dalloc_junk_large(void *ptr, size_t usize)
{
- if (config_fill && opt_junk)
+ if (config_fill && unlikely(opt_junk_free))
memset(ptr, 0x5a, usize);
}
#ifdef JEMALLOC_JET
@@ -1951,24 +2560,39 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
JEMALLOC_N(arena_dalloc_junk_large_impl);
#endif
-void
-arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+static void
+arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr, bool junked)
{
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+ arena_run_t *run = &miscelm->run;
if (config_fill || config_stats) {
- size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- size_t usize = arena_mapbits_large_size_get(chunk, pageind);
+ size_t usize = arena_mapbits_large_size_get(chunk, pageind) -
+ large_pad;
- arena_dalloc_junk_large(ptr, usize);
+ if (!junked)
+ arena_dalloc_junk_large(ptr, usize);
if (config_stats) {
+ szind_t index = size2index(usize) - NBINS;
+
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= usize;
- arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--;
+ arena->stats.lstats[index].ndalloc++;
+ arena->stats.lstats[index].curruns--;
}
}
- arena_run_dalloc(arena, (arena_run_t *)ptr, true, false);
+ arena_run_dalloc_decommit(arena, chunk, run);
+}
+
+void
+arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr)
+{
+
+ arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
}
void
@@ -1976,7 +2600,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
{
malloc_mutex_lock(&arena->lock);
- arena_dalloc_large_locked(arena, chunk, ptr);
+ arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
malloc_mutex_unlock(&arena->lock);
}
@@ -1984,6 +2608,9 @@ static void
arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t oldsize, size_t size)
{
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+ arena_run_t *run = &miscelm->run;
assert(size < oldsize);
@@ -1992,54 +2619,78 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
* allocations.
*/
malloc_mutex_lock(&arena->lock);
- arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
- true);
+ arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
+ large_pad, true);
if (config_stats) {
+ szind_t oldindex = size2index(oldsize) - NBINS;
+ szind_t index = size2index(size) - NBINS;
+
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= oldsize;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+ arena->stats.lstats[oldindex].ndalloc++;
+ arena->stats.lstats[oldindex].curruns--;
arena->stats.nmalloc_large++;
arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+ arena->stats.lstats[index].nmalloc++;
+ arena->stats.lstats[index].nrequests++;
+ arena->stats.lstats[index].curruns++;
}
malloc_mutex_unlock(&arena->lock);
}
static bool
arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t oldsize, size_t size, size_t extra, bool zero)
+ size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
{
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- size_t npages = oldsize >> LG_PAGE;
+ size_t npages = (oldsize + large_pad) >> LG_PAGE;
size_t followsize;
- assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
+ assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) -
+ large_pad);
/* Try to extend the run. */
- assert(size + extra > oldsize);
malloc_mutex_lock(&arena->lock);
- if (pageind + npages < chunk_npages &&
- arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
- (followsize = arena_mapbits_unallocated_size_get(chunk,
- pageind+npages)) >= size - oldsize) {
+ if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
+ pageind+npages) != 0)
+ goto label_fail;
+ followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages);
+ if (oldsize + followsize >= usize_min) {
/*
* The next run is available and sufficiently large. Split the
* following run, then merge the first part with the existing
* allocation.
*/
- size_t flag_dirty;
- size_t splitsize = (oldsize + followsize <= size + extra)
- ? followsize : size + extra - oldsize;
- arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk +
- ((pageind+npages) << LG_PAGE)), splitsize, zero);
+ arena_run_t *run;
+ size_t usize, splitsize, size, flag_dirty, flag_unzeroed_mask;
+
+ usize = usize_max;
+ while (oldsize + followsize < usize)
+ usize = index2size(size2index(usize)-1);
+ assert(usize >= usize_min);
+ assert(usize >= oldsize);
+ splitsize = usize - oldsize;
+ if (splitsize == 0)
+ goto label_fail;
+
+ run = &arena_miscelm_get(chunk, pageind+npages)->run;
+ if (arena_run_split_large(arena, run, splitsize, zero))
+ goto label_fail;
+
+ if (config_cache_oblivious && zero) {
+ /*
+ * Zero the trailing bytes of the original allocation's
+ * last page, since they are in an indeterminate state.
+ */
+ assert(PAGE_CEILING(oldsize) == oldsize);
+ memset((void *)((uintptr_t)ptr + oldsize), 0,
+ PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr);
+ }
size = oldsize + splitsize;
- npages = size >> LG_PAGE;
+ npages = (size + large_pad) >> LG_PAGE;
/*
* Mark the extended run as dirty if either portion of the run
@@ -2051,27 +2702,35 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
*/
flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
arena_mapbits_dirty_get(chunk, pageind+npages-1);
- arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
- arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
+ flag_unzeroed_mask = flag_dirty == 0 ? CHUNK_MAP_UNZEROED : 0;
+ arena_mapbits_large_set(chunk, pageind, size + large_pad,
+ flag_dirty | (flag_unzeroed_mask &
+ arena_mapbits_unzeroed_get(chunk, pageind)));
+ arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty |
+ (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
+ pageind+npages-1)));
if (config_stats) {
+ szind_t oldindex = size2index(oldsize) - NBINS;
+ szind_t index = size2index(size) - NBINS;
+
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= oldsize;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+ arena->stats.lstats[oldindex].ndalloc++;
+ arena->stats.lstats[oldindex].curruns--;
arena->stats.nmalloc_large++;
arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+ arena->stats.lstats[index].nmalloc++;
+ arena->stats.lstats[index].nrequests++;
+ arena->stats.lstats[index].curruns++;
}
malloc_mutex_unlock(&arena->lock);
return (false);
}
+label_fail:
malloc_mutex_unlock(&arena->lock);
-
return (true);
}
@@ -2083,7 +2742,7 @@ static void
arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
{
- if (config_fill && opt_junk) {
+ if (config_fill && unlikely(opt_junk_free)) {
memset((void *)((uintptr_t)ptr + usize), 0x5a,
old_usize - usize);
}
@@ -2100,131 +2759,132 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
* always fail if growing an object, and the following run is already in use.
*/
static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
- bool zero)
+arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+ size_t usize_max, bool zero)
{
- size_t psize;
+ arena_chunk_t *chunk;
+ arena_t *arena;
- psize = PAGE_CEILING(size + extra);
- if (psize == oldsize) {
- /* Same size class. */
+ if (oldsize == usize_max) {
+ /* Current size class is compatible and maximal. */
return (false);
- } else {
- arena_chunk_t *chunk;
- arena_t *arena;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- arena = chunk->arena;
-
- if (psize < oldsize) {
- /* Fill before shrinking in order avoid a race. */
- arena_ralloc_junk_large(ptr, oldsize, psize);
- arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
- psize);
- return (false);
- } else {
- bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
- oldsize, PAGE_CEILING(size),
- psize - PAGE_CEILING(size), zero);
- if (config_fill && ret == false && zero == false) {
- if (opt_junk) {
- memset((void *)((uintptr_t)ptr +
- oldsize), 0xa5, isalloc(ptr,
- config_prof) - oldsize);
- } else if (opt_zero) {
- memset((void *)((uintptr_t)ptr +
- oldsize), 0, isalloc(ptr,
- config_prof) - oldsize);
- }
+ }
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ arena = extent_node_arena_get(&chunk->node);
+
+ if (oldsize < usize_max) {
+ bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
+ usize_min, usize_max, zero);
+ if (config_fill && !ret && !zero) {
+ if (unlikely(opt_junk_alloc)) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
+ isalloc(ptr, config_prof) - oldsize);
+ } else if (unlikely(opt_zero)) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0,
+ isalloc(ptr, config_prof) - oldsize);
}
- return (ret);
}
+ return (ret);
}
+
+ assert(oldsize > usize_max);
+ /* Fill before shrinking in order avoid a race. */
+ arena_ralloc_junk_large(ptr, oldsize, usize_max);
+ arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+ return (false);
}
bool
arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
bool zero)
{
+ size_t usize_min, usize_max;
- /*
- * Avoid moving the allocation if the size class can be left the same.
- */
- if (oldsize <= arena_maxclass) {
+ usize_min = s2u(size);
+ usize_max = s2u(size + extra);
+ if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
+ /*
+ * Avoid moving the allocation if the size class can be left the
+ * same.
+ */
if (oldsize <= SMALL_MAXCLASS) {
- assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
- == oldsize);
- if ((size + extra <= SMALL_MAXCLASS &&
- SMALL_SIZE2BIN(size + extra) ==
- SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
- size + extra >= oldsize))
+ assert(arena_bin_info[size2index(oldsize)].reg_size ==
+ oldsize);
+ if ((usize_max <= SMALL_MAXCLASS &&
+ size2index(usize_max) == size2index(oldsize)) ||
+ (size <= oldsize && usize_max >= oldsize))
return (false);
} else {
- assert(size <= arena_maxclass);
- if (size + extra > SMALL_MAXCLASS) {
- if (arena_ralloc_large(ptr, oldsize, size,
- extra, zero) == false)
+ if (usize_max > SMALL_MAXCLASS) {
+ if (!arena_ralloc_large(ptr, oldsize, usize_min,
+ usize_max, zero))
return (false);
}
}
+
+ /* Reallocation would require a move. */
+ return (true);
+ } else {
+ return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max,
+ zero));
}
+}
- /* Reallocation would require a move. */
- return (true);
+static void *
+arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache)
+{
+
+ if (alignment == 0)
+ return (arena_malloc(tsd, arena, usize, zero, tcache));
+ usize = sa2u(usize, alignment);
+ if (usize == 0)
+ return (NULL);
+ return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
}
void *
-arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
- bool try_tcache_dalloc)
+arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
+ size_t alignment, bool zero, tcache_t *tcache)
{
void *ret;
- size_t copysize;
+ size_t usize;
- /* Try to avoid moving the allocation. */
- if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false)
- return (ptr);
+ usize = s2u(size);
+ if (usize == 0)
+ return (NULL);
- /*
- * size and oldsize are different enough that we need to move the
- * object. In that case, fall back to allocating new space and
- * copying.
- */
- if (alignment != 0) {
- size_t usize = sa2u(size + extra, alignment);
- if (usize == 0)
- return (NULL);
- ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
- } else
- ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc);
+ if (likely(usize <= large_maxclass)) {
+ size_t copysize;
- if (ret == NULL) {
- if (extra == 0)
- return (NULL);
- /* Try again, this time without extra. */
- if (alignment != 0) {
- size_t usize = sa2u(size, alignment);
- if (usize == 0)
- return (NULL);
- ret = ipalloct(usize, alignment, zero, try_tcache_alloc,
- arena);
- } else
- ret = arena_malloc(arena, size, zero, try_tcache_alloc);
+ /* Try to avoid moving the allocation. */
+ if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero))
+ return (ptr);
+ /*
+ * size and oldsize are different enough that we need to move
+ * the object. In that case, fall back to allocating new space
+ * and copying.
+ */
+ ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
+ zero, tcache);
if (ret == NULL)
return (NULL);
- }
- /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
+ /*
+ * Junk/zero-filling were already done by
+ * ipalloc()/arena_malloc().
+ */
- /*
- * Copy at most size bytes (not size+extra), since the caller has no
- * expectation that the extra bytes will be reliably preserved.
- */
- copysize = (size < oldsize) ? size : oldsize;
- VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
- memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
+ copysize = (usize < oldsize) ? usize : oldsize;
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
+ memcpy(ret, ptr, copysize);
+ isqalloc(tsd, ptr, oldsize, tcache);
+ } else {
+ ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
+ zero, tcache);
+ }
return (ret);
}
@@ -2239,24 +2899,46 @@ arena_dss_prec_get(arena_t *arena)
return (ret);
}
-void
+bool
arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
{
+ if (!have_dss)
+ return (dss_prec != dss_prec_disabled);
malloc_mutex_lock(&arena->lock);
arena->dss_prec = dss_prec;
malloc_mutex_unlock(&arena->lock);
+ return (false);
+}
+
+ssize_t
+arena_lg_dirty_mult_default_get(void)
+{
+
+ return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default));
+}
+
+bool
+arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult)
+{
+
+ if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
+ return (true);
+ atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult);
+ return (false);
}
void
-arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
- size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
- malloc_large_stats_t *lstats)
+arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
+ size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+ malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+ malloc_huge_stats_t *hstats)
{
unsigned i;
malloc_mutex_lock(&arena->lock);
*dss = dss_prec_names[arena->dss_prec];
+ *lg_dirty_mult = arena->lg_dirty_mult;
*nactive += arena->nactive;
*ndirty += arena->ndirty;
@@ -2264,10 +2946,15 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
astats->npurge += arena->stats.npurge;
astats->nmadvise += arena->stats.nmadvise;
astats->purged += arena->stats.purged;
+ astats->metadata_mapped += arena->stats.metadata_mapped;
+ astats->metadata_allocated += arena_metadata_allocated_get(arena);
astats->allocated_large += arena->stats.allocated_large;
astats->nmalloc_large += arena->stats.nmalloc_large;
astats->ndalloc_large += arena->stats.ndalloc_large;
astats->nrequests_large += arena->stats.nrequests_large;
+ astats->allocated_huge += arena->stats.allocated_huge;
+ astats->nmalloc_huge += arena->stats.nmalloc_huge;
+ astats->ndalloc_huge += arena->stats.ndalloc_huge;
for (i = 0; i < nlclasses; i++) {
lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
@@ -2275,16 +2962,22 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
lstats[i].nrequests += arena->stats.lstats[i].nrequests;
lstats[i].curruns += arena->stats.lstats[i].curruns;
}
+
+ for (i = 0; i < nhclasses; i++) {
+ hstats[i].nmalloc += arena->stats.hstats[i].nmalloc;
+ hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
+ hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
+ }
malloc_mutex_unlock(&arena->lock);
for (i = 0; i < NBINS; i++) {
arena_bin_t *bin = &arena->bins[i];
malloc_mutex_lock(&bin->lock);
- bstats[i].allocated += bin->stats.allocated;
bstats[i].nmalloc += bin->stats.nmalloc;
bstats[i].ndalloc += bin->stats.ndalloc;
bstats[i].nrequests += bin->stats.nrequests;
+ bstats[i].curregs += bin->stats.curregs;
if (config_tcache) {
bstats[i].nfills += bin->stats.nfills;
bstats[i].nflushes += bin->stats.nflushes;
@@ -2296,27 +2989,42 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
}
}
-bool
-arena_new(arena_t *arena, unsigned ind)
+arena_t *
+arena_new(unsigned ind)
{
+ arena_t *arena;
unsigned i;
arena_bin_t *bin;
+ /*
+ * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
+ * because there is no way to clean up if base_alloc() OOMs.
+ */
+ if (config_stats) {
+ arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
+ + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+ nhclasses) * sizeof(malloc_huge_stats_t));
+ } else
+ arena = (arena_t *)base_alloc(sizeof(arena_t));
+ if (arena == NULL)
+ return (NULL);
+
arena->ind = ind;
arena->nthreads = 0;
-
if (malloc_mutex_init(&arena->lock))
- return (true);
+ return (NULL);
if (config_stats) {
memset(&arena->stats, 0, sizeof(arena_stats_t));
- arena->stats.lstats =
- (malloc_large_stats_t *)base_alloc(nlclasses *
- sizeof(malloc_large_stats_t));
- if (arena->stats.lstats == NULL)
- return (true);
+ arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
+ + CACHELINE_CEILING(sizeof(arena_t)));
memset(arena->stats.lstats, 0, nlclasses *
sizeof(malloc_large_stats_t));
+ arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
+ + CACHELINE_CEILING(sizeof(arena_t)) +
+ QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
+ memset(arena->stats.hstats, 0, nhclasses *
+ sizeof(malloc_huge_stats_t));
if (config_tcache)
ql_new(&arena->tcache_ql);
}
@@ -2324,56 +3032,76 @@ arena_new(arena_t *arena, unsigned ind)
if (config_prof)
arena->prof_accumbytes = 0;
+ if (config_cache_oblivious) {
+ /*
+ * A nondeterministic seed based on the address of arena reduces
+ * the likelihood of lockstep non-uniform cache index
+ * utilization among identical concurrent processes, but at the
+ * cost of test repeatability. For debug builds, instead use a
+ * deterministic seed.
+ */
+ arena->offset_state = config_debug ? ind :
+ (uint64_t)(uintptr_t)arena;
+ }
+
arena->dss_prec = chunk_dss_prec_get();
- /* Initialize chunks. */
- arena_chunk_dirty_new(&arena->chunks_dirty);
arena->spare = NULL;
+ arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
+ arena->purging = false;
arena->nactive = 0;
arena->ndirty = 0;
- arena->npurgatory = 0;
arena_avail_tree_new(&arena->runs_avail);
+ qr_new(&arena->runs_dirty, rd_link);
+ qr_new(&arena->chunks_cache, cc_link);
+
+ ql_new(&arena->huge);
+ if (malloc_mutex_init(&arena->huge_mtx))
+ return (NULL);
+
+ extent_tree_szad_new(&arena->chunks_szad_cached);
+ extent_tree_ad_new(&arena->chunks_ad_cached);
+ extent_tree_szad_new(&arena->chunks_szad_retained);
+ extent_tree_ad_new(&arena->chunks_ad_retained);
+ if (malloc_mutex_init(&arena->chunks_mtx))
+ return (NULL);
+ ql_new(&arena->node_cache);
+ if (malloc_mutex_init(&arena->node_cache_mtx))
+ return (NULL);
+
+ arena->chunk_hooks = chunk_hooks_default;
/* Initialize bins. */
for (i = 0; i < NBINS; i++) {
bin = &arena->bins[i];
if (malloc_mutex_init(&bin->lock))
- return (true);
+ return (NULL);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
if (config_stats)
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
}
- return (false);
+ return (arena);
}
/*
* Calculate bin_info->run_size such that it meets the following constraints:
*
- * *) bin_info->run_size >= min_run_size
- * *) bin_info->run_size <= arena_maxclass
- * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ * *) bin_info->run_size <= arena_maxrun
* *) bin_info->nregs <= RUN_MAXREGS
*
- * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
- * calculated here, since these settings are all interdependent.
+ * bin_info->nregs and bin_info->reg0_offset are also calculated here, since
+ * these settings are all interdependent.
*/
-static size_t
-bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
+static void
+bin_info_run_size_calc(arena_bin_info_t *bin_info)
{
size_t pad_size;
- size_t try_run_size, good_run_size;
- uint32_t try_nregs, good_nregs;
- uint32_t try_hdr_size, good_hdr_size;
- uint32_t try_bitmap_offset, good_bitmap_offset;
- uint32_t try_ctx0_offset, good_ctx0_offset;
- uint32_t try_redzone0_offset, good_redzone0_offset;
-
- assert(min_run_size >= PAGE);
- assert(min_run_size <= arena_maxclass);
+ size_t try_run_size, perfect_run_size, actual_run_size;
+ uint32_t try_nregs, perfect_nregs, actual_nregs;
/*
* Determine redzone size based on minimum alignment and minimum
@@ -2382,8 +3110,9 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
* minimum alignment; without the padding, each redzone would have to
* be twice as large in order to maintain alignment.
*/
- if (config_fill && opt_redzone) {
- size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1);
+ if (config_fill && unlikely(opt_redzone)) {
+ size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) -
+ 1);
if (align_min <= REDZONE_MINSIZE) {
bin_info->redzone_size = REDZONE_MINSIZE;
pad_size = 0;
@@ -2399,127 +3128,113 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
(bin_info->redzone_size << 1);
/*
- * Calculate known-valid settings before entering the run_size
- * expansion loop, so that the first part of the loop always copies
- * valid settings.
- *
- * The do..while loop iteratively reduces the number of regions until
- * the run header and the regions no longer overlap. A closed formula
- * would be quite messy, since there is an interdependency between the
- * header's mask length and the number of regions.
+ * Compute run size under ideal conditions (no redzones, no limit on run
+ * size).
*/
- try_run_size = min_run_size;
- try_nregs = ((try_run_size - sizeof(arena_run_t)) /
- bin_info->reg_interval)
- + 1; /* Counter-act try_nregs-- in loop. */
- if (try_nregs > RUN_MAXREGS) {
- try_nregs = RUN_MAXREGS
- + 1; /* Counter-act try_nregs-- in loop. */
- }
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
- /* Pad to a long boundary. */
- try_hdr_size = LONG_CEILING(try_hdr_size);
- try_bitmap_offset = try_hdr_size;
- /* Add space for bitmap. */
- try_hdr_size += bitmap_size(try_nregs);
- if (config_prof && opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /* Add space for one (prof_ctx_t *) per region. */
- try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
- } else
- try_ctx0_offset = 0;
- try_redzone0_offset = try_run_size - (try_nregs *
- bin_info->reg_interval) - pad_size;
- } while (try_hdr_size > try_redzone0_offset);
-
- /* run_size expansion loop. */
+ try_run_size = PAGE;
+ try_nregs = try_run_size / bin_info->reg_size;
do {
- /*
- * Copy valid settings before trying more aggressive settings.
- */
- good_run_size = try_run_size;
- good_nregs = try_nregs;
- good_hdr_size = try_hdr_size;
- good_bitmap_offset = try_bitmap_offset;
- good_ctx0_offset = try_ctx0_offset;
- good_redzone0_offset = try_redzone0_offset;
-
- /* Try more aggressive settings. */
+ perfect_run_size = try_run_size;
+ perfect_nregs = try_nregs;
+
try_run_size += PAGE;
- try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) /
- bin_info->reg_interval)
- + 1; /* Counter-act try_nregs-- in loop. */
- if (try_nregs > RUN_MAXREGS) {
- try_nregs = RUN_MAXREGS
- + 1; /* Counter-act try_nregs-- in loop. */
- }
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
- /* Pad to a long boundary. */
- try_hdr_size = LONG_CEILING(try_hdr_size);
- try_bitmap_offset = try_hdr_size;
- /* Add space for bitmap. */
- try_hdr_size += bitmap_size(try_nregs);
- if (config_prof && opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /*
- * Add space for one (prof_ctx_t *) per region.
- */
- try_hdr_size += try_nregs *
- sizeof(prof_ctx_t *);
- }
- try_redzone0_offset = try_run_size - (try_nregs *
- bin_info->reg_interval) - pad_size;
- } while (try_hdr_size > try_redzone0_offset);
- } while (try_run_size <= arena_maxclass
- && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) >
- RUN_MAX_OVRHD_RELAX
- && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
- && try_nregs < RUN_MAXREGS);
+ try_nregs = try_run_size / bin_info->reg_size;
+ } while (perfect_run_size != perfect_nregs * bin_info->reg_size);
+ assert(perfect_nregs <= RUN_MAXREGS);
+
+ actual_run_size = perfect_run_size;
+ actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval;
+
+ /*
+ * Redzones can require enough padding that not even a single region can
+ * fit within the number of pages that would normally be dedicated to a
+ * run for this size class. Increase the run size until at least one
+ * region fits.
+ */
+ while (actual_nregs == 0) {
+ assert(config_fill && unlikely(opt_redzone));
- assert(good_hdr_size <= good_redzone0_offset);
+ actual_run_size += PAGE;
+ actual_nregs = (actual_run_size - pad_size) /
+ bin_info->reg_interval;
+ }
+
+ /*
+ * Make sure that the run will fit within an arena chunk.
+ */
+ while (actual_run_size > arena_maxrun) {
+ actual_run_size -= PAGE;
+ actual_nregs = (actual_run_size - pad_size) /
+ bin_info->reg_interval;
+ }
+ assert(actual_nregs > 0);
+ assert(actual_run_size == s2u(actual_run_size));
/* Copy final settings. */
- bin_info->run_size = good_run_size;
- bin_info->nregs = good_nregs;
- bin_info->bitmap_offset = good_bitmap_offset;
- bin_info->ctx0_offset = good_ctx0_offset;
- bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
+ bin_info->run_size = actual_run_size;
+ bin_info->nregs = actual_nregs;
+ bin_info->reg0_offset = actual_run_size - (actual_nregs *
+ bin_info->reg_interval) - pad_size + bin_info->redzone_size;
+
+ if (actual_run_size > small_maxrun)
+ small_maxrun = actual_run_size;
assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
* bin_info->reg_interval) + pad_size == bin_info->run_size);
-
- return (good_run_size);
}
static void
bin_info_init(void)
{
arena_bin_info_t *bin_info;
- size_t prev_run_size = PAGE;
-#define SIZE_CLASS(bin, delta, size) \
- bin_info = &arena_bin_info[bin]; \
+#define BIN_INFO_INIT_bin_yes(index, size) \
+ bin_info = &arena_bin_info[index]; \
bin_info->reg_size = size; \
- prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
+ bin_info_run_size_calc(bin_info); \
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+#define BIN_INFO_INIT_bin_no(index, size)
+#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+ BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
SIZE_CLASSES
-#undef SIZE_CLASS
+#undef BIN_INFO_INIT_bin_yes
+#undef BIN_INFO_INIT_bin_no
+#undef SC
}
-void
+static bool
+small_run_size_init(void)
+{
+
+ assert(small_maxrun != 0);
+
+ small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >>
+ LG_PAGE));
+ if (small_run_tab == NULL)
+ return (true);
+
+#define TAB_INIT_bin_yes(index, size) { \
+ arena_bin_info_t *bin_info = &arena_bin_info[index]; \
+ small_run_tab[bin_info->run_size >> LG_PAGE] = true; \
+ }
+#define TAB_INIT_bin_no(index, size)
+#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+ TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
+ SIZE_CLASSES
+#undef TAB_INIT_bin_yes
+#undef TAB_INIT_bin_no
+#undef SC
+
+ return (false);
+}
+
+bool
arena_boot(void)
{
- size_t header_size;
unsigned i;
+ arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
+
/*
* Compute the header size such that it is large enough to contain the
* page map. The page map is biased to omit entries for the header
@@ -2534,16 +3249,33 @@ arena_boot(void)
*/
map_bias = 0;
for (i = 0; i < 3; i++) {
- header_size = offsetof(arena_chunk_t, map) +
- (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
- map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK)
- != 0);
+ size_t header_size = offsetof(arena_chunk_t, map_bits) +
+ ((sizeof(arena_chunk_map_bits_t) +
+ sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias));
+ map_bias = (header_size + PAGE_MASK) >> LG_PAGE;
}
assert(map_bias > 0);
- arena_maxclass = chunksize - (map_bias << LG_PAGE);
+ map_misc_offset = offsetof(arena_chunk_t, map_bits) +
+ sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
+
+ arena_maxrun = chunksize - (map_bias << LG_PAGE);
+ assert(arena_maxrun > 0);
+ large_maxclass = index2size(size2index(chunksize)-1);
+ if (large_maxclass > arena_maxrun) {
+ /*
+ * For small chunk sizes it's possible for there to be fewer
+ * non-header pages available than are necessary to serve the
+ * size classes just below chunksize.
+ */
+ large_maxclass = arena_maxrun;
+ }
+ assert(large_maxclass > 0);
+ nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
+ nhclasses = NSIZES - nlclasses - NBINS;
bin_info_init();
+ return (small_run_size_init());
}
void
@@ -2552,6 +3284,9 @@ arena_prefork(arena_t *arena)
unsigned i;
malloc_mutex_prefork(&arena->lock);
+ malloc_mutex_prefork(&arena->huge_mtx);
+ malloc_mutex_prefork(&arena->chunks_mtx);
+ malloc_mutex_prefork(&arena->node_cache_mtx);
for (i = 0; i < NBINS; i++)
malloc_mutex_prefork(&arena->bins[i].lock);
}
@@ -2563,6 +3298,9 @@ arena_postfork_parent(arena_t *arena)
for (i = 0; i < NBINS; i++)
malloc_mutex_postfork_parent(&arena->bins[i].lock);
+ malloc_mutex_postfork_parent(&arena->node_cache_mtx);
+ malloc_mutex_postfork_parent(&arena->chunks_mtx);
+ malloc_mutex_postfork_parent(&arena->huge_mtx);
malloc_mutex_postfork_parent(&arena->lock);
}
@@ -2573,5 +3311,8 @@ arena_postfork_child(arena_t *arena)
for (i = 0; i < NBINS; i++)
malloc_mutex_postfork_child(&arena->bins[i].lock);
+ malloc_mutex_postfork_child(&arena->node_cache_mtx);
+ malloc_mutex_postfork_child(&arena->chunks_mtx);
+ malloc_mutex_postfork_child(&arena->huge_mtx);
malloc_mutex_postfork_child(&arena->lock);
}
diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c
index 4e62e8fa9..7cdcfed86 100644
--- a/deps/jemalloc/src/base.c
+++ b/deps/jemalloc/src/base.c
@@ -5,107 +5,138 @@
/* Data. */
static malloc_mutex_t base_mtx;
-
-/*
- * Current pages that are being used for internal memory allocations. These
- * pages are carved up in cacheline-size quanta, so that there is no chance of
- * false cache line sharing.
- */
-static void *base_pages;
-static void *base_next_addr;
-static void *base_past_addr; /* Addr immediately past base_pages. */
+static extent_tree_t base_avail_szad;
static extent_node_t *base_nodes;
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static bool base_pages_alloc(size_t minsize);
+static size_t base_allocated;
+static size_t base_resident;
+static size_t base_mapped;
/******************************************************************************/
-static bool
-base_pages_alloc(size_t minsize)
+/* base_mtx must be held. */
+static extent_node_t *
+base_node_try_alloc(void)
{
- size_t csize;
- bool zero;
+ extent_node_t *node;
+
+ if (base_nodes == NULL)
+ return (NULL);
+ node = base_nodes;
+ base_nodes = *(extent_node_t **)node;
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
+ return (node);
+}
- assert(minsize != 0);
- csize = CHUNK_CEILING(minsize);
- zero = false;
- base_pages = chunk_alloc(csize, chunksize, true, &zero,
- chunk_dss_prec_get());
- if (base_pages == NULL)
- return (true);
- base_next_addr = base_pages;
- base_past_addr = (void *)((uintptr_t)base_pages + csize);
+/* base_mtx must be held. */
+static void
+base_node_dalloc(extent_node_t *node)
+{
- return (false);
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
+ *(extent_node_t **)node = base_nodes;
+ base_nodes = node;
}
-void *
-base_alloc(size_t size)
+/* base_mtx must be held. */
+static extent_node_t *
+base_chunk_alloc(size_t minsize)
{
- void *ret;
- size_t csize;
+ extent_node_t *node;
+ size_t csize, nsize;
+ void *addr;
- /* Round size up to nearest multiple of the cacheline size. */
- csize = CACHELINE_CEILING(size);
-
- malloc_mutex_lock(&base_mtx);
- /* Make sure there's enough space for the allocation. */
- if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
- if (base_pages_alloc(csize)) {
- malloc_mutex_unlock(&base_mtx);
- return (NULL);
+ assert(minsize != 0);
+ node = base_node_try_alloc();
+ /* Allocate enough space to also carve a node out if necessary. */
+ nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
+ csize = CHUNK_CEILING(minsize + nsize);
+ addr = chunk_alloc_base(csize);
+ if (addr == NULL) {
+ if (node != NULL)
+ base_node_dalloc(node);
+ return (NULL);
+ }
+ base_mapped += csize;
+ if (node == NULL) {
+ node = (extent_node_t *)addr;
+ addr = (void *)((uintptr_t)addr + nsize);
+ csize -= nsize;
+ if (config_stats) {
+ base_allocated += nsize;
+ base_resident += PAGE_CEILING(nsize);
}
}
- /* Allocate. */
- ret = base_next_addr;
- base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
- malloc_mutex_unlock(&base_mtx);
- VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
-
- return (ret);
+ extent_node_init(node, NULL, addr, csize, true, true);
+ return (node);
}
+/*
+ * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
+ * sparse data structures such as radix tree nodes efficient with respect to
+ * physical memory usage.
+ */
void *
-base_calloc(size_t number, size_t size)
-{
- void *ret = base_alloc(number * size);
-
- if (ret != NULL)
- memset(ret, 0, number * size);
-
- return (ret);
-}
-
-extent_node_t *
-base_node_alloc(void)
+base_alloc(size_t size)
{
- extent_node_t *ret;
+ void *ret;
+ size_t csize, usize;
+ extent_node_t *node;
+ extent_node_t key;
+
+ /*
+ * Round size up to nearest multiple of the cacheline size, so that
+ * there is no chance of false cache line sharing.
+ */
+ csize = CACHELINE_CEILING(size);
+ usize = s2u(csize);
+ extent_node_init(&key, NULL, NULL, usize, false, false);
malloc_mutex_lock(&base_mtx);
- if (base_nodes != NULL) {
- ret = base_nodes;
- base_nodes = *(extent_node_t **)ret;
- malloc_mutex_unlock(&base_mtx);
- VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t));
+ node = extent_tree_szad_nsearch(&base_avail_szad, &key);
+ if (node != NULL) {
+ /* Use existing space. */
+ extent_tree_szad_remove(&base_avail_szad, node);
} else {
- malloc_mutex_unlock(&base_mtx);
- ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+ /* Try to allocate more space. */
+ node = base_chunk_alloc(csize);
+ }
+ if (node == NULL) {
+ ret = NULL;
+ goto label_return;
}
+ ret = extent_node_addr_get(node);
+ if (extent_node_size_get(node) > csize) {
+ extent_node_addr_set(node, (void *)((uintptr_t)ret + csize));
+ extent_node_size_set(node, extent_node_size_get(node) - csize);
+ extent_tree_szad_insert(&base_avail_szad, node);
+ } else
+ base_node_dalloc(node);
+ if (config_stats) {
+ base_allocated += csize;
+ /*
+ * Add one PAGE to base_resident for every page boundary that is
+ * crossed by the new allocation.
+ */
+ base_resident += PAGE_CEILING((uintptr_t)ret + csize) -
+ PAGE_CEILING((uintptr_t)ret);
+ }
+ JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
+label_return:
+ malloc_mutex_unlock(&base_mtx);
return (ret);
}
void
-base_node_dealloc(extent_node_t *node)
+base_stats_get(size_t *allocated, size_t *resident, size_t *mapped)
{
- VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
malloc_mutex_lock(&base_mtx);
- *(extent_node_t **)node = base_nodes;
- base_nodes = node;
+ assert(base_allocated <= base_resident);
+ assert(base_resident <= base_mapped);
+ *allocated = base_allocated;
+ *resident = base_resident;
+ *mapped = base_mapped;
malloc_mutex_unlock(&base_mtx);
}
@@ -113,9 +144,10 @@ bool
base_boot(void)
{
- base_nodes = NULL;
if (malloc_mutex_init(&base_mtx))
return (true);
+ extent_tree_szad_new(&base_avail_szad);
+ base_nodes = NULL;
return (false);
}
diff --git a/deps/jemalloc/src/bitmap.c b/deps/jemalloc/src/bitmap.c
index e2bd907d5..c733372b4 100644
--- a/deps/jemalloc/src/bitmap.c
+++ b/deps/jemalloc/src/bitmap.c
@@ -2,19 +2,6 @@
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static size_t bits2groups(size_t nbits);
-
-/******************************************************************************/
-
-static size_t
-bits2groups(size_t nbits)
-{
-
- return ((nbits >> LG_BITMAP_GROUP_NBITS) +
- !!(nbits & BITMAP_GROUP_NBITS_MASK));
-}
void
bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
@@ -31,15 +18,16 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
* that requires only one group.
*/
binfo->levels[0].group_offset = 0;
- group_count = bits2groups(nbits);
+ group_count = BITMAP_BITS2GROUPS(nbits);
for (i = 1; group_count > 1; i++) {
assert(i < BITMAP_MAX_LEVELS);
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
- group_count = bits2groups(group_count);
+ group_count = BITMAP_BITS2GROUPS(group_count);
}
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
+ assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
binfo->nlevels = i;
binfo->nbits = nbits;
}
diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c
index 90ab116ae..6ba1ca7a5 100644
--- a/deps/jemalloc/src/chunk.c
+++ b/deps/jemalloc/src/chunk.c
@@ -5,129 +5,315 @@
/* Data. */
const char *opt_dss = DSS_DEFAULT;
-size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
+size_t opt_lg_chunk = 0;
-malloc_mutex_t chunks_mtx;
-chunk_stats_t stats_chunks;
+/* Used exclusively for gdump triggering. */
+static size_t curchunks;
+static size_t highchunks;
-/*
- * Trees of chunks that were previously allocated (trees differ only in node
- * ordering). These are used when allocating chunks, in an attempt to re-use
- * address space. Depending on function, different tree orderings are needed,
- * which is why there are two trees with the same contents.
- */
-static extent_tree_t chunks_szad_mmap;
-static extent_tree_t chunks_ad_mmap;
-static extent_tree_t chunks_szad_dss;
-static extent_tree_t chunks_ad_dss;
-
-rtree_t *chunks_rtree;
+rtree_t chunks_rtree;
/* Various chunk-related settings. */
size_t chunksize;
size_t chunksize_mask; /* (chunksize - 1). */
size_t chunk_npages;
-size_t map_bias;
-size_t arena_maxclass; /* Max size class for arenas. */
+
+static void *chunk_alloc_default(void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+static bool chunk_dalloc_default(void *chunk, size_t size, bool committed,
+ unsigned arena_ind);
+static bool chunk_commit_default(void *chunk, size_t size, size_t offset,
+ size_t length, unsigned arena_ind);
+static bool chunk_decommit_default(void *chunk, size_t size, size_t offset,
+ size_t length, unsigned arena_ind);
+static bool chunk_purge_default(void *chunk, size_t size, size_t offset,
+ size_t length, unsigned arena_ind);
+static bool chunk_split_default(void *chunk, size_t size, size_t size_a,
+ size_t size_b, bool committed, unsigned arena_ind);
+static bool chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b,
+ size_t size_b, bool committed, unsigned arena_ind);
+
+const chunk_hooks_t chunk_hooks_default = {
+ chunk_alloc_default,
+ chunk_dalloc_default,
+ chunk_commit_default,
+ chunk_decommit_default,
+ chunk_purge_default,
+ chunk_split_default,
+ chunk_merge_default
+};
/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
-static void *chunk_recycle(extent_tree_t *chunks_szad,
- extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base,
- bool *zero);
-static void chunk_record(extent_tree_t *chunks_szad,
- extent_tree_t *chunks_ad, void *chunk, size_t size);
+static void chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
+ void *chunk, size_t size, bool zeroed, bool committed);
/******************************************************************************/
+static chunk_hooks_t
+chunk_hooks_get_locked(arena_t *arena)
+{
+
+ return (arena->chunk_hooks);
+}
+
+chunk_hooks_t
+chunk_hooks_get(arena_t *arena)
+{
+ chunk_hooks_t chunk_hooks;
+
+ malloc_mutex_lock(&arena->chunks_mtx);
+ chunk_hooks = chunk_hooks_get_locked(arena);
+ malloc_mutex_unlock(&arena->chunks_mtx);
+
+ return (chunk_hooks);
+}
+
+chunk_hooks_t
+chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
+{
+ chunk_hooks_t old_chunk_hooks;
+
+ malloc_mutex_lock(&arena->chunks_mtx);
+ old_chunk_hooks = arena->chunk_hooks;
+ /*
+ * Copy each field atomically so that it is impossible for readers to
+ * see partially updated pointers. There are places where readers only
+ * need one hook function pointer (therefore no need to copy the
+ * entirety of arena->chunk_hooks), and stale reads do not affect
+ * correctness, so they perform unlocked reads.
+ */
+#define ATOMIC_COPY_HOOK(n) do { \
+ union { \
+ chunk_##n##_t **n; \
+ void **v; \
+ } u; \
+ u.n = &arena->chunk_hooks.n; \
+ atomic_write_p(u.v, chunk_hooks->n); \
+} while (0)
+ ATOMIC_COPY_HOOK(alloc);
+ ATOMIC_COPY_HOOK(dalloc);
+ ATOMIC_COPY_HOOK(commit);
+ ATOMIC_COPY_HOOK(decommit);
+ ATOMIC_COPY_HOOK(purge);
+ ATOMIC_COPY_HOOK(split);
+ ATOMIC_COPY_HOOK(merge);
+#undef ATOMIC_COPY_HOOK
+ malloc_mutex_unlock(&arena->chunks_mtx);
+
+ return (old_chunk_hooks);
+}
+
+static void
+chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ bool locked)
+{
+ static const chunk_hooks_t uninitialized_hooks =
+ CHUNK_HOOKS_INITIALIZER;
+
+ if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
+ 0) {
+ *chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
+ chunk_hooks_get(arena);
+ }
+}
+
+static void
+chunk_hooks_assure_initialized_locked(arena_t *arena,
+ chunk_hooks_t *chunk_hooks)
+{
+
+ chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true);
+}
+
+static void
+chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks)
+{
+
+ chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false);
+}
+
+bool
+chunk_register(const void *chunk, const extent_node_t *node)
+{
+
+ assert(extent_node_addr_get(node) == chunk);
+
+ if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node))
+ return (true);
+ if (config_prof && opt_prof) {
+ size_t size = extent_node_size_get(node);
+ size_t nadd = (size == 0) ? 1 : size / chunksize;
+ size_t cur = atomic_add_z(&curchunks, nadd);
+ size_t high = atomic_read_z(&highchunks);
+ while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
+ /*
+ * Don't refresh cur, because it may have decreased
+ * since this thread lost the highchunks update race.
+ */
+ high = atomic_read_z(&highchunks);
+ }
+ if (cur > high && prof_gdump_get_unlocked())
+ prof_gdump();
+ }
+
+ return (false);
+}
+
+void
+chunk_deregister(const void *chunk, const extent_node_t *node)
+{
+ bool err;
+
+ err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
+ assert(!err);
+ if (config_prof && opt_prof) {
+ size_t size = extent_node_size_get(node);
+ size_t nsub = (size == 0) ? 1 : size / chunksize;
+ assert(atomic_read_z(&curchunks) >= nsub);
+ atomic_sub_z(&curchunks, nsub);
+ }
+}
+
+/*
+ * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
+ * fits.
+ */
+static extent_node_t *
+chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
+ extent_tree_t *chunks_ad, size_t size)
+{
+ extent_node_t key;
+
+ assert(size == CHUNK_CEILING(size));
+
+ extent_node_init(&key, arena, NULL, size, false, false);
+ return (extent_tree_szad_nsearch(chunks_szad, &key));
+}
+
static void *
-chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
- size_t alignment, bool base, bool *zero)
+chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
+ void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+ bool dalloc_node)
{
void *ret;
extent_node_t *node;
- extent_node_t key;
size_t alloc_size, leadsize, trailsize;
- bool zeroed;
+ bool zeroed, committed;
- if (base) {
- /*
- * This function may need to call base_node_{,de}alloc(), but
- * the current chunk allocation request is on behalf of the
- * base allocator. Avoid deadlock (and if that weren't an
- * issue, potential for infinite recursion) by returning NULL.
- */
- return (NULL);
- }
+ assert(new_addr == NULL || alignment == chunksize);
+ /*
+ * Cached chunks use the node linkage embedded in their headers, in
+ * which case dalloc_node is true, and new_addr is non-NULL because
+ * we're operating on a specific chunk.
+ */
+ assert(dalloc_node || new_addr != NULL);
- alloc_size = size + alignment - chunksize;
+ alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
/* Beware size_t wrap-around. */
if (alloc_size < size)
return (NULL);
- key.addr = NULL;
- key.size = alloc_size;
- malloc_mutex_lock(&chunks_mtx);
- node = extent_tree_szad_nsearch(chunks_szad, &key);
- if (node == NULL) {
- malloc_mutex_unlock(&chunks_mtx);
+ malloc_mutex_lock(&arena->chunks_mtx);
+ chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+ if (new_addr != NULL) {
+ extent_node_t key;
+ extent_node_init(&key, arena, new_addr, alloc_size, false,
+ false);
+ node = extent_tree_ad_search(chunks_ad, &key);
+ } else {
+ node = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
+ alloc_size);
+ }
+ if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
+ size)) {
+ malloc_mutex_unlock(&arena->chunks_mtx);
return (NULL);
}
- leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
- (uintptr_t)node->addr;
- assert(node->size >= leadsize + size);
- trailsize = node->size - leadsize - size;
- ret = (void *)((uintptr_t)node->addr + leadsize);
- zeroed = node->zeroed;
+ leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
+ alignment) - (uintptr_t)extent_node_addr_get(node);
+ assert(new_addr == NULL || leadsize == 0);
+ assert(extent_node_size_get(node) >= leadsize + size);
+ trailsize = extent_node_size_get(node) - leadsize - size;
+ ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize);
+ zeroed = extent_node_zeroed_get(node);
if (zeroed)
- *zero = true;
+ *zero = true;
+ committed = extent_node_committed_get(node);
+ if (committed)
+ *commit = true;
+ /* Split the lead. */
+ if (leadsize != 0 &&
+ chunk_hooks->split(extent_node_addr_get(node),
+ extent_node_size_get(node), leadsize, size, false, arena->ind)) {
+ malloc_mutex_unlock(&arena->chunks_mtx);
+ return (NULL);
+ }
/* Remove node from the tree. */
extent_tree_szad_remove(chunks_szad, node);
extent_tree_ad_remove(chunks_ad, node);
+ arena_chunk_cache_maybe_remove(arena, node, cache);
if (leadsize != 0) {
/* Insert the leading space as a smaller chunk. */
- node->size = leadsize;
+ extent_node_size_set(node, leadsize);
extent_tree_szad_insert(chunks_szad, node);
extent_tree_ad_insert(chunks_ad, node);
+ arena_chunk_cache_maybe_insert(arena, node, cache);
node = NULL;
}
if (trailsize != 0) {
+ /* Split the trail. */
+ if (chunk_hooks->split(ret, size + trailsize, size,
+ trailsize, false, arena->ind)) {
+ if (dalloc_node && node != NULL)
+ arena_node_dalloc(arena, node);
+ malloc_mutex_unlock(&arena->chunks_mtx);
+ chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad,
+ cache, ret, size + trailsize, zeroed, committed);
+ return (NULL);
+ }
/* Insert the trailing space as a smaller chunk. */
if (node == NULL) {
- /*
- * An additional node is required, but
- * base_node_alloc() can cause a new base chunk to be
- * allocated. Drop chunks_mtx in order to avoid
- * deadlock, and if node allocation fails, deallocate
- * the result before returning an error.
- */
- malloc_mutex_unlock(&chunks_mtx);
- node = base_node_alloc();
+ node = arena_node_alloc(arena);
if (node == NULL) {
- chunk_dealloc(ret, size, true);
+ malloc_mutex_unlock(&arena->chunks_mtx);
+ chunk_record(arena, chunk_hooks, chunks_szad,
+ chunks_ad, cache, ret, size + trailsize,
+ zeroed, committed);
return (NULL);
}
- malloc_mutex_lock(&chunks_mtx);
}
- node->addr = (void *)((uintptr_t)(ret) + size);
- node->size = trailsize;
- node->zeroed = zeroed;
+ extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size),
+ trailsize, zeroed, committed);
extent_tree_szad_insert(chunks_szad, node);
extent_tree_ad_insert(chunks_ad, node);
+ arena_chunk_cache_maybe_insert(arena, node, cache);
node = NULL;
}
- malloc_mutex_unlock(&chunks_mtx);
+ if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
+ malloc_mutex_unlock(&arena->chunks_mtx);
+ chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache,
+ ret, size, zeroed, committed);
+ return (NULL);
+ }
+ malloc_mutex_unlock(&arena->chunks_mtx);
- if (node != NULL)
- base_node_dealloc(node);
+ assert(dalloc_node || node != NULL);
+ if (dalloc_node && node != NULL)
+ arena_node_dalloc(arena, node);
if (*zero) {
- if (zeroed == false)
+ if (!zeroed)
memset(ret, 0, size);
else if (config_debug) {
size_t i;
size_t *p = (size_t *)(uintptr_t)ret;
- VALGRIND_MAKE_MEM_DEFINED(ret, size);
+ JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
for (i = 0; i < size / sizeof(size_t); i++)
assert(p[i] == 0);
}
@@ -136,138 +322,214 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
}
/*
- * If the caller specifies (*zero == false), it is still possible to receive
- * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc()
- * takes advantage of this to avoid demanding zeroed chunks, but taking
- * advantage of them if they are returned.
+ * If the caller specifies (!*zero), it is still possible to receive zeroed
+ * memory, in which case *zero is toggled to true. arena_chunk_alloc() takes
+ * advantage of this to avoid demanding zeroed chunks, but taking advantage of
+ * them if they are returned.
*/
-void *
-chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
- dss_prec_t dss_prec)
+static void *
+chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
+ bool *zero, bool *commit, dss_prec_t dss_prec)
{
void *ret;
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
assert(size != 0);
assert((size & chunksize_mask) == 0);
assert(alignment != 0);
assert((alignment & chunksize_mask) == 0);
+ /* Retained. */
+ if ((ret = chunk_recycle(arena, &chunk_hooks,
+ &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
+ new_addr, size, alignment, zero, commit, true)) != NULL)
+ return (ret);
+
/* "primary" dss. */
- if (config_dss && dss_prec == dss_prec_primary) {
- if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
- goto label_return;
- }
- /* mmap. */
- if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
- goto label_return;
+ if (have_dss && dss_prec == dss_prec_primary && (ret =
+ chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
+ NULL)
+ return (ret);
+ /*
+ * mmap. Requesting an address is not implemented for
+ * chunk_alloc_mmap(), so only call it if (new_addr == NULL).
+ */
+ if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero,
+ commit)) != NULL)
+ return (ret);
/* "secondary" dss. */
- if (config_dss && dss_prec == dss_prec_secondary) {
- if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
- goto label_return;
- }
+ if (have_dss && dss_prec == dss_prec_secondary && (ret =
+ chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
+ NULL)
+ return (ret);
/* All strategies for allocation failed. */
- ret = NULL;
-label_return:
- if (ret != NULL) {
- if (config_ivsalloc && base == false) {
- if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) {
- chunk_dealloc(ret, size, true);
- return (NULL);
- }
- }
- if (config_stats || config_prof) {
- bool gdump;
- malloc_mutex_lock(&chunks_mtx);
- if (config_stats)
- stats_chunks.nchunks += (size / chunksize);
- stats_chunks.curchunks += (size / chunksize);
- if (stats_chunks.curchunks > stats_chunks.highchunks) {
- stats_chunks.highchunks =
- stats_chunks.curchunks;
- if (config_prof)
- gdump = true;
- } else if (config_prof)
- gdump = false;
- malloc_mutex_unlock(&chunks_mtx);
- if (config_prof && opt_prof && opt_prof_gdump && gdump)
- prof_gdump();
- }
- if (config_valgrind)
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- }
- assert(CHUNK_ADDR2BASE(ret) == ret);
+ return (NULL);
+}
+
+void *
+chunk_alloc_base(size_t size)
+{
+ void *ret;
+ bool zero, commit;
+
+ /*
+ * Directly call chunk_alloc_mmap() rather than chunk_alloc_core()
+ * because it's critical that chunk_alloc_base() return untouched
+ * demand-zeroed virtual memory.
+ */
+ zero = true;
+ commit = true;
+ ret = chunk_alloc_mmap(size, chunksize, &zero, &commit);
+ if (ret == NULL)
+ return (NULL);
+ if (config_valgrind)
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+
return (ret);
}
-static void
-chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
- size_t size)
+void *
+chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool dalloc_node)
{
- bool unzeroed;
- extent_node_t *xnode, *node, *prev, *xprev, key;
+ void *ret;
+ bool commit;
+
+ assert(size != 0);
+ assert((size & chunksize_mask) == 0);
+ assert(alignment != 0);
+ assert((alignment & chunksize_mask) == 0);
- unzeroed = pages_purge(chunk, size);
- VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
+ commit = true;
+ ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached,
+ &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
+ &commit, dalloc_node);
+ if (ret == NULL)
+ return (NULL);
+ assert(commit);
+ if (config_valgrind)
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+ return (ret);
+}
+
+static arena_t *
+chunk_arena_get(unsigned arena_ind)
+{
+ arena_t *arena;
+ /* Dodge tsd for a0 in order to avoid bootstrapping issues. */
+ arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind,
+ false, true);
/*
- * Allocate a node before acquiring chunks_mtx even though it might not
- * be needed, because base_node_alloc() may cause a new base chunk to
- * be allocated, which could cause deadlock if chunks_mtx were already
- * held.
+ * The arena we're allocating on behalf of must have been initialized
+ * already.
*/
- xnode = base_node_alloc();
- /* Use xprev to implement conditional deferred deallocation of prev. */
- xprev = NULL;
+ assert(arena != NULL);
+ return (arena);
+}
+
+static void *
+chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+ bool *commit, unsigned arena_ind)
+{
+ void *ret;
+ arena_t *arena;
+
+ arena = chunk_arena_get(arena_ind);
+ ret = chunk_alloc_core(arena, new_addr, size, alignment, zero,
+ commit, arena->dss_prec);
+ if (ret == NULL)
+ return (NULL);
+ if (config_valgrind)
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- malloc_mutex_lock(&chunks_mtx);
- key.addr = (void *)((uintptr_t)chunk + size);
+ return (ret);
+}
+
+void *
+chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool *commit)
+{
+ void *ret;
+
+ chunk_hooks_assure_initialized(arena, chunk_hooks);
+ ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
+ arena->ind);
+ if (ret == NULL)
+ return (NULL);
+ if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
+ return (ret);
+}
+
+static void
+chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
+ void *chunk, size_t size, bool zeroed, bool committed)
+{
+ bool unzeroed;
+ extent_node_t *node, *prev;
+ extent_node_t key;
+
+ assert(!cache || !zeroed);
+ unzeroed = cache || !zeroed;
+ JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
+
+ malloc_mutex_lock(&arena->chunks_mtx);
+ chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+ extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
+ false, false);
node = extent_tree_ad_nsearch(chunks_ad, &key);
/* Try to coalesce forward. */
- if (node != NULL && node->addr == key.addr) {
+ if (node != NULL && extent_node_addr_get(node) ==
+ extent_node_addr_get(&key) && extent_node_committed_get(node) ==
+ committed && !chunk_hooks->merge(chunk, size,
+ extent_node_addr_get(node), extent_node_size_get(node), false,
+ arena->ind)) {
/*
* Coalesce chunk with the following address range. This does
* not change the position within chunks_ad, so only
* remove/insert from/into chunks_szad.
*/
extent_tree_szad_remove(chunks_szad, node);
- node->addr = chunk;
- node->size += size;
- node->zeroed = (node->zeroed && (unzeroed == false));
+ arena_chunk_cache_maybe_remove(arena, node, cache);
+ extent_node_addr_set(node, chunk);
+ extent_node_size_set(node, size + extent_node_size_get(node));
+ extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
+ !unzeroed);
extent_tree_szad_insert(chunks_szad, node);
+ arena_chunk_cache_maybe_insert(arena, node, cache);
} else {
/* Coalescing forward failed, so insert a new node. */
- if (xnode == NULL) {
+ node = arena_node_alloc(arena);
+ if (node == NULL) {
/*
- * base_node_alloc() failed, which is an exceedingly
- * unlikely failure. Leak chunk; its pages have
- * already been purged, so this is only a virtual
- * memory leak.
+ * Node allocation failed, which is an exceedingly
+ * unlikely failure. Leak chunk after making sure its
+ * pages have already been purged, so that this is only
+ * a virtual memory leak.
*/
+ if (cache) {
+ chunk_purge_wrapper(arena, chunk_hooks, chunk,
+ size, 0, size);
+ }
goto label_return;
}
- node = xnode;
- xnode = NULL; /* Prevent deallocation below. */
- node->addr = chunk;
- node->size = size;
- node->zeroed = (unzeroed == false);
+ extent_node_init(node, arena, chunk, size, !unzeroed,
+ committed);
extent_tree_ad_insert(chunks_ad, node);
extent_tree_szad_insert(chunks_szad, node);
+ arena_chunk_cache_maybe_insert(arena, node, cache);
}
/* Try to coalesce backward. */
prev = extent_tree_ad_prev(chunks_ad, node);
- if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
- chunk) {
+ if (prev != NULL && (void *)((uintptr_t)extent_node_addr_get(prev) +
+ extent_node_size_get(prev)) == chunk &&
+ extent_node_committed_get(prev) == committed &&
+ !chunk_hooks->merge(extent_node_addr_get(prev),
+ extent_node_size_get(prev), chunk, size, false, arena->ind)) {
/*
* Coalesce chunk with the previous address range. This does
* not change the position within chunks_ad, so only
@@ -275,44 +537,42 @@ chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
*/
extent_tree_szad_remove(chunks_szad, prev);
extent_tree_ad_remove(chunks_ad, prev);
-
+ arena_chunk_cache_maybe_remove(arena, prev, cache);
extent_tree_szad_remove(chunks_szad, node);
- node->addr = prev->addr;
- node->size += prev->size;
- node->zeroed = (node->zeroed && prev->zeroed);
+ arena_chunk_cache_maybe_remove(arena, node, cache);
+ extent_node_addr_set(node, extent_node_addr_get(prev));
+ extent_node_size_set(node, extent_node_size_get(prev) +
+ extent_node_size_get(node));
+ extent_node_zeroed_set(node, extent_node_zeroed_get(prev) &&
+ extent_node_zeroed_get(node));
extent_tree_szad_insert(chunks_szad, node);
+ arena_chunk_cache_maybe_insert(arena, node, cache);
- xprev = prev;
+ arena_node_dalloc(arena, prev);
}
label_return:
- malloc_mutex_unlock(&chunks_mtx);
- /*
- * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to
- * avoid potential deadlock.
- */
- if (xnode != NULL)
- base_node_dealloc(xnode);
- if (xprev != NULL)
- base_node_dealloc(xprev);
+ malloc_mutex_unlock(&arena->chunks_mtx);
}
void
-chunk_unmap(void *chunk, size_t size)
+chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+ size_t size, bool committed)
{
+
assert(chunk != NULL);
assert(CHUNK_ADDR2BASE(chunk) == chunk);
assert(size != 0);
assert((size & chunksize_mask) == 0);
- if (config_dss && chunk_in_dss(chunk))
- chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
- else if (chunk_dealloc_mmap(chunk, size))
- chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
+ chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached,
+ &arena->chunks_ad_cached, true, chunk, size, false, committed);
+ arena_maybe_purge(arena);
}
void
-chunk_dealloc(void *chunk, size_t size, bool unmap)
+chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+ size_t size, bool zeroed, bool committed)
{
assert(chunk != NULL);
@@ -320,22 +580,149 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
assert(size != 0);
assert((size & chunksize_mask) == 0);
- if (config_ivsalloc)
- rtree_set(chunks_rtree, (uintptr_t)chunk, 0);
- if (config_stats || config_prof) {
- malloc_mutex_lock(&chunks_mtx);
- assert(stats_chunks.curchunks >= (size / chunksize));
- stats_chunks.curchunks -= (size / chunksize);
- malloc_mutex_unlock(&chunks_mtx);
+ chunk_hooks_assure_initialized(arena, chunk_hooks);
+ /* Try to deallocate. */
+ if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
+ return;
+ /* Try to decommit; purge if that fails. */
+ if (committed) {
+ committed = chunk_hooks->decommit(chunk, size, 0, size,
+ arena->ind);
}
+ zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
+ arena->ind);
+ chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained,
+ &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+}
+
+static bool
+chunk_dalloc_default(void *chunk, size_t size, bool committed,
+ unsigned arena_ind)
+{
+
+ if (!have_dss || !chunk_in_dss(chunk))
+ return (chunk_dalloc_mmap(chunk, size));
+ return (true);
+}
+
+void
+chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+ size_t size, bool committed)
+{
+
+ chunk_hooks_assure_initialized(arena, chunk_hooks);
+ chunk_hooks->dalloc(chunk, size, committed, arena->ind);
+ if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default)
+ JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
+}
+
+static bool
+chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
+
+ return (pages_commit((void *)((uintptr_t)chunk + (uintptr_t)offset),
+ length));
+}
+
+static bool
+chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
- if (unmap)
- chunk_unmap(chunk, size);
+ return (pages_decommit((void *)((uintptr_t)chunk + (uintptr_t)offset),
+ length));
+}
+
+bool
+chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
+{
+
+ assert(chunk != NULL);
+ assert(CHUNK_ADDR2BASE(chunk) == chunk);
+ assert((offset & PAGE_MASK) == 0);
+ assert(length != 0);
+ assert((length & PAGE_MASK) == 0);
+
+ return (pages_purge((void *)((uintptr_t)chunk + (uintptr_t)offset),
+ length));
+}
+
+static bool
+chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
+
+ return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset,
+ length));
+}
+
+bool
+chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+ size_t size, size_t offset, size_t length)
+{
+
+ chunk_hooks_assure_initialized(arena, chunk_hooks);
+ return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
+}
+
+static bool
+chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
+ bool committed, unsigned arena_ind)
+{
+
+ if (!maps_coalesce)
+ return (true);
+ return (false);
+}
+
+static bool
+chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+ bool committed, unsigned arena_ind)
+{
+
+ if (!maps_coalesce)
+ return (true);
+ if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b))
+ return (true);
+
+ return (false);
+}
+
+static rtree_node_elm_t *
+chunks_rtree_node_alloc(size_t nelms)
+{
+
+ return ((rtree_node_elm_t *)base_alloc(nelms *
+ sizeof(rtree_node_elm_t)));
}
bool
chunk_boot(void)
{
+#ifdef _WIN32
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+
+ /*
+ * Verify actual page size is equal to or an integral multiple of
+ * configured page size.
+ */
+ if (info.dwPageSize & ((1U << LG_PAGE) - 1))
+ return (true);
+
+ /*
+ * Configure chunksize (if not set) to match granularity (usually 64K),
+ * so pages_map will always take fast path.
+ */
+ if (!opt_lg_chunk) {
+ opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity)
+ - 1;
+ }
+#else
+ if (!opt_lg_chunk)
+ opt_lg_chunk = LG_CHUNK_DEFAULT;
+#endif
/* Set variables according to the value of opt_lg_chunk. */
chunksize = (ZU(1) << opt_lg_chunk);
@@ -343,23 +730,11 @@ chunk_boot(void)
chunksize_mask = chunksize - 1;
chunk_npages = (chunksize >> LG_PAGE);
- if (config_stats || config_prof) {
- if (malloc_mutex_init(&chunks_mtx))
- return (true);
- memset(&stats_chunks, 0, sizeof(chunk_stats_t));
- }
- if (config_dss && chunk_dss_boot())
+ if (have_dss && chunk_dss_boot())
+ return (true);
+ if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
+ opt_lg_chunk, chunks_rtree_node_alloc, NULL))
return (true);
- extent_tree_szad_new(&chunks_szad_mmap);
- extent_tree_ad_new(&chunks_ad_mmap);
- extent_tree_szad_new(&chunks_szad_dss);
- extent_tree_ad_new(&chunks_ad_dss);
- if (config_ivsalloc) {
- chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
- opt_lg_chunk, base_alloc, NULL);
- if (chunks_rtree == NULL)
- return (true);
- }
return (false);
}
@@ -368,9 +743,6 @@ void
chunk_prefork(void)
{
- malloc_mutex_prefork(&chunks_mtx);
- if (config_ivsalloc)
- rtree_prefork(chunks_rtree);
chunk_dss_prefork();
}
@@ -379,9 +751,6 @@ chunk_postfork_parent(void)
{
chunk_dss_postfork_parent();
- if (config_ivsalloc)
- rtree_postfork_parent(chunks_rtree);
- malloc_mutex_postfork_parent(&chunks_mtx);
}
void
@@ -389,7 +758,4 @@ chunk_postfork_child(void)
{
chunk_dss_postfork_child();
- if (config_ivsalloc)
- rtree_postfork_child(chunks_rtree);
- malloc_mutex_postfork_child(&chunks_mtx);
}
diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c
index 510bb8bee..61fc91696 100644
--- a/deps/jemalloc/src/chunk_dss.c
+++ b/deps/jemalloc/src/chunk_dss.c
@@ -32,7 +32,7 @@ static void *
chunk_dss_sbrk(intptr_t increment)
{
-#ifdef JEMALLOC_HAVE_SBRK
+#ifdef JEMALLOC_DSS
return (sbrk(increment));
#else
not_implemented();
@@ -45,7 +45,7 @@ chunk_dss_prec_get(void)
{
dss_prec_t ret;
- if (config_dss == false)
+ if (!have_dss)
return (dss_prec_disabled);
malloc_mutex_lock(&dss_mtx);
ret = dss_prec_default;
@@ -57,8 +57,8 @@ bool
chunk_dss_prec_set(dss_prec_t dss_prec)
{
- if (config_dss == false)
- return (true);
+ if (!have_dss)
+ return (dss_prec != dss_prec_disabled);
malloc_mutex_lock(&dss_mtx);
dss_prec_default = dss_prec;
malloc_mutex_unlock(&dss_mtx);
@@ -66,11 +66,10 @@ chunk_dss_prec_set(dss_prec_t dss_prec)
}
void *
-chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
+chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
+ bool *zero, bool *commit)
{
- void *ret;
-
- cassert(config_dss);
+ cassert(have_dss);
assert(size > 0 && (size & chunksize_mask) == 0);
assert(alignment > 0 && (alignment & chunksize_mask) == 0);
@@ -83,9 +82,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
malloc_mutex_lock(&dss_mtx);
if (dss_prev != (void *)-1) {
- size_t gap_size, cpad_size;
- void *cpad, *dss_next;
- intptr_t incr;
/*
* The loop is necessary to recover from races with other
@@ -93,8 +89,20 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
* malloc.
*/
do {
+ void *ret, *cpad, *dss_next;
+ size_t gap_size, cpad_size;
+ intptr_t incr;
+ /* Avoid an unnecessary system call. */
+ if (new_addr != NULL && dss_max != new_addr)
+ break;
+
/* Get the current end of the DSS. */
dss_max = chunk_dss_sbrk(0);
+
+ /* Make sure the earlier condition still holds. */
+ if (new_addr != NULL && dss_max != new_addr)
+ break;
+
/*
* Calculate how much padding is necessary to
* chunk-align the end of the DSS.
@@ -123,12 +131,20 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
/* Success. */
dss_max = dss_next;
malloc_mutex_unlock(&dss_mtx);
- if (cpad_size != 0)
- chunk_unmap(cpad, cpad_size);
+ if (cpad_size != 0) {
+ chunk_hooks_t chunk_hooks =
+ CHUNK_HOOKS_INITIALIZER;
+ chunk_dalloc_wrapper(arena,
+ &chunk_hooks, cpad, cpad_size,
+ true);
+ }
if (*zero) {
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+ JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
+ ret, size);
memset(ret, 0, size);
}
+ if (!*commit)
+ *commit = pages_decommit(ret, size);
return (ret);
}
} while (dss_prev != (void *)-1);
@@ -143,7 +159,7 @@ chunk_in_dss(void *chunk)
{
bool ret;
- cassert(config_dss);
+ cassert(have_dss);
malloc_mutex_lock(&dss_mtx);
if ((uintptr_t)chunk >= (uintptr_t)dss_base
@@ -160,7 +176,7 @@ bool
chunk_dss_boot(void)
{
- cassert(config_dss);
+ cassert(have_dss);
if (malloc_mutex_init(&dss_mtx))
return (true);
@@ -175,7 +191,7 @@ void
chunk_dss_prefork(void)
{
- if (config_dss)
+ if (have_dss)
malloc_mutex_prefork(&dss_mtx);
}
@@ -183,7 +199,7 @@ void
chunk_dss_postfork_parent(void)
{
- if (config_dss)
+ if (have_dss)
malloc_mutex_postfork_parent(&dss_mtx);
}
@@ -191,7 +207,7 @@ void
chunk_dss_postfork_child(void)
{
- if (config_dss)
+ if (have_dss)
malloc_mutex_postfork_child(&dss_mtx);
}
diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c
index 2056d793f..b9ba74191 100644
--- a/deps/jemalloc/src/chunk_mmap.c
+++ b/deps/jemalloc/src/chunk_mmap.c
@@ -2,154 +2,20 @@
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void *pages_map(void *addr, size_t size);
-static void pages_unmap(void *addr, size_t size);
-static void *chunk_alloc_mmap_slow(size_t size, size_t alignment,
- bool *zero);
-
-/******************************************************************************/
static void *
-pages_map(void *addr, size_t size)
+chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
{
void *ret;
-
- assert(size != 0);
-
-#ifdef _WIN32
- /*
- * If VirtualAlloc can't allocate at the given address when one is
- * given, it fails and returns NULL.
- */
- ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
- PAGE_READWRITE);
-#else
- /*
- * We don't use MAP_FIXED here, because it can cause the *replacement*
- * of existing mappings, and we only want to create new mappings.
- */
- ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
- -1, 0);
- assert(ret != NULL);
-
- if (ret == MAP_FAILED)
- ret = NULL;
- else if (addr != NULL && ret != addr) {
- /*
- * We succeeded in mapping memory, but not in the right place.
- */
- if (munmap(ret, size) == -1) {
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc: Error in munmap(): %s\n",
- buf);
- if (opt_abort)
- abort();
- }
- ret = NULL;
- }
-#endif
- assert(ret == NULL || (addr == NULL && ret != addr)
- || (addr != NULL && ret == addr));
- return (ret);
-}
-
-static void
-pages_unmap(void *addr, size_t size)
-{
-
-#ifdef _WIN32
- if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
-#else
- if (munmap(addr, size) == -1)
-#endif
- {
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc>: Error in "
-#ifdef _WIN32
- "VirtualFree"
-#else
- "munmap"
-#endif
- "(): %s\n", buf);
- if (opt_abort)
- abort();
- }
-}
-
-static void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
-{
- void *ret = (void *)((uintptr_t)addr + leadsize);
-
- assert(alloc_size >= leadsize + size);
-#ifdef _WIN32
- {
- void *new_addr;
-
- pages_unmap(addr, alloc_size);
- new_addr = pages_map(ret, size);
- if (new_addr == ret)
- return (ret);
- if (new_addr)
- pages_unmap(new_addr, size);
- return (NULL);
- }
-#else
- {
- size_t trailsize = alloc_size - leadsize - size;
-
- if (leadsize != 0)
- pages_unmap(addr, leadsize);
- if (trailsize != 0)
- pages_unmap((void *)((uintptr_t)ret + size), trailsize);
- return (ret);
- }
-#endif
-}
-
-bool
-pages_purge(void *addr, size_t length)
-{
- bool unzeroed;
-
-#ifdef _WIN32
- VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
- unzeroed = true;
-#else
-# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-# define JEMALLOC_MADV_PURGE MADV_DONTNEED
-# define JEMALLOC_MADV_ZEROS true
-# elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-# define JEMALLOC_MADV_PURGE MADV_FREE
-# define JEMALLOC_MADV_ZEROS false
-# else
-# error "No method defined for purging unused dirty pages."
-# endif
- int err = madvise(addr, length, JEMALLOC_MADV_PURGE);
- unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0);
-# undef JEMALLOC_MADV_PURGE
-# undef JEMALLOC_MADV_ZEROS
-#endif
- return (unzeroed);
-}
-
-static void *
-chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
-{
- void *ret, *pages;
- size_t alloc_size, leadsize;
+ size_t alloc_size;
alloc_size = size + alignment - PAGE;
/* Beware size_t wrap-around. */
if (alloc_size < size)
return (NULL);
do {
+ void *pages;
+ size_t leadsize;
pages = pages_map(NULL, alloc_size);
if (pages == NULL)
return (NULL);
@@ -160,11 +26,13 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
assert(ret != NULL);
*zero = true;
+ if (!*commit)
+ *commit = pages_decommit(ret, size);
return (ret);
}
void *
-chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
+chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit)
{
void *ret;
size_t offset;
@@ -191,20 +59,22 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
if (offset != 0) {
pages_unmap(ret, size);
- return (chunk_alloc_mmap_slow(size, alignment, zero));
+ return (chunk_alloc_mmap_slow(size, alignment, zero, commit));
}
assert(ret != NULL);
*zero = true;
+ if (!*commit)
+ *commit = pages_decommit(ret, size);
return (ret);
}
bool
-chunk_dealloc_mmap(void *chunk, size_t size)
+chunk_dalloc_mmap(void *chunk, size_t size)
{
if (config_munmap)
pages_unmap(chunk, size);
- return (config_munmap == false);
+ return (!config_munmap);
}
diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c
index 04c529661..53a1c1ef1 100644
--- a/deps/jemalloc/src/ckh.c
+++ b/deps/jemalloc/src/ckh.c
@@ -40,8 +40,8 @@
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-static bool ckh_grow(ckh_t *ckh);
-static void ckh_shrink(ckh_t *ckh);
+static bool ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void ckh_shrink(tsd_t *tsd, ckh_t *ckh);
/******************************************************************************/
@@ -185,7 +185,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
}
bucket = tbucket;
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data))
return (false);
}
}
@@ -201,12 +201,12 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
/* Try to insert in primary bucket. */
bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data))
return (false);
/* Try to insert in secondary bucket. */
bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data))
return (false);
/*
@@ -243,7 +243,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
}
static bool
-ckh_grow(ckh_t *ckh)
+ckh_grow(tsd_t *tsd, ckh_t *ckh)
{
bool ret;
ckhc_t *tab, *ttab;
@@ -270,7 +270,8 @@ ckh_grow(ckh_t *ckh)
ret = true;
goto label_return;
}
- tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
+ true, NULL);
if (tab == NULL) {
ret = true;
goto label_return;
@@ -281,13 +282,13 @@ ckh_grow(ckh_t *ckh)
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
- if (ckh_rebuild(ckh, tab) == false) {
- idalloc(tab);
+ if (!ckh_rebuild(ckh, tab)) {
+ idalloctm(tsd, tab, tcache_get(tsd, false), true);
break;
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloc(ckh->tab);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
}
@@ -298,7 +299,7 @@ label_return:
}
static void
-ckh_shrink(ckh_t *ckh)
+ckh_shrink(tsd_t *tsd, ckh_t *ckh)
{
ckhc_t *tab, *ttab;
size_t lg_curcells, usize;
@@ -313,7 +314,8 @@ ckh_shrink(ckh_t *ckh)
usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
if (usize == 0)
return;
- tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
+ NULL);
if (tab == NULL) {
/*
* An OOM error isn't worth propagating, since it doesn't
@@ -327,8 +329,8 @@ ckh_shrink(ckh_t *ckh)
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
- if (ckh_rebuild(ckh, tab) == false) {
- idalloc(tab);
+ if (!ckh_rebuild(ckh, tab)) {
+ idalloctm(tsd, tab, tcache_get(tsd, false), true);
#ifdef CKH_COUNT
ckh->nshrinks++;
#endif
@@ -336,7 +338,7 @@ ckh_shrink(ckh_t *ckh)
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloc(ckh->tab);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
#ifdef CKH_COUNT
@@ -345,7 +347,8 @@ ckh_shrink(ckh_t *ckh)
}
bool
-ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ ckh_keycomp_t *keycomp)
{
bool ret;
size_t mincells, usize;
@@ -366,10 +369,10 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ckh->count = 0;
/*
- * Find the minimum power of 2 that is large enough to fit aBaseCount
+ * Find the minimum power of 2 that is large enough to fit minitems
* entries. We are using (2+,2) cuckoo hashing, which has an expected
* maximum load factor of at least ~0.86, so 0.75 is a conservative load
- * factor that will typically allow 2^aLgMinItems to fit without ever
+ * factor that will typically allow mincells items to fit without ever
* growing the table.
*/
assert(LG_CKH_BUCKET_CELLS > 0);
@@ -388,7 +391,8 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ret = true;
goto label_return;
}
- ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
+ NULL);
if (ckh->tab == NULL) {
ret = true;
goto label_return;
@@ -400,16 +404,16 @@ label_return:
}
void
-ckh_delete(ckh_t *ckh)
+ckh_delete(tsd_t *tsd, ckh_t *ckh)
{
assert(ckh != NULL);
#ifdef CKH_VERBOSE
malloc_printf(
- "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
- " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
- " nrelocs: %"PRIu64"\n", __func__, ckh,
+ "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
+ " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
+ " nrelocs: %"FMTu64"\n", __func__, ckh,
(unsigned long long)ckh->ngrows,
(unsigned long long)ckh->nshrinks,
(unsigned long long)ckh->nshrinkfails,
@@ -417,7 +421,7 @@ ckh_delete(ckh_t *ckh)
(unsigned long long)ckh->nrelocs);
#endif
- idalloc(ckh->tab);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
if (config_debug)
memset(ckh, 0x5a, sizeof(ckh_t));
}
@@ -452,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
}
bool
-ckh_insert(ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
{
bool ret;
@@ -464,7 +468,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
#endif
while (ckh_try_insert(ckh, &key, &data)) {
- if (ckh_grow(ckh)) {
+ if (ckh_grow(tsd, ckh)) {
ret = true;
goto label_return;
}
@@ -476,7 +480,8 @@ label_return:
}
bool
-ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+ void **data)
{
size_t cell;
@@ -497,7 +502,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+ LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
> ckh->lg_minbuckets) {
/* Ignore error due to OOM. */
- ckh_shrink(ckh);
+ ckh_shrink(tsd, ckh);
}
return (false);
diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c
index cc2c5aef5..3de8e602d 100644
--- a/deps/jemalloc/src/ctl.c
+++ b/deps/jemalloc/src/ctl.c
@@ -7,7 +7,6 @@
/*
* ctl_mtx protects the following:
* - ctl_stats.*
- * - opt_prof_active
*/
static malloc_mutex_t ctl_mtx;
static bool ctl_initialized;
@@ -17,14 +16,14 @@ static ctl_stats_t ctl_stats;
/******************************************************************************/
/* Helpers for named and indexed nodes. */
-static inline const ctl_named_node_t *
+JEMALLOC_INLINE_C const ctl_named_node_t *
ctl_named_node(const ctl_node_t *node)
{
return ((node->named) ? (const ctl_named_node_t *)node : NULL);
}
-static inline const ctl_named_node_t *
+JEMALLOC_INLINE_C const ctl_named_node_t *
ctl_named_children(const ctl_named_node_t *node, int index)
{
const ctl_named_node_t *children = ctl_named_node(node->children);
@@ -32,12 +31,11 @@ ctl_named_children(const ctl_named_node_t *node, int index)
return (children ? &children[index] : NULL);
}
-static inline const ctl_indexed_node_t *
+JEMALLOC_INLINE_C const ctl_indexed_node_t *
ctl_indexed_node(const ctl_node_t *node)
{
- return ((node->named == false) ? (const ctl_indexed_node_t *)node :
- NULL);
+ return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
}
/******************************************************************************/
@@ -68,16 +66,17 @@ CTL_PROTO(version)
CTL_PROTO(epoch)
CTL_PROTO(thread_tcache_enabled)
CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_prof_name)
+CTL_PROTO(thread_prof_active)
CTL_PROTO(thread_arena)
CTL_PROTO(thread_allocated)
CTL_PROTO(thread_allocatedp)
CTL_PROTO(thread_deallocated)
CTL_PROTO(thread_deallocatedp)
+CTL_PROTO(config_cache_oblivious)
CTL_PROTO(config_debug)
-CTL_PROTO(config_dss)
CTL_PROTO(config_fill)
CTL_PROTO(config_lazy_lock)
-CTL_PROTO(config_mremap)
CTL_PROTO(config_munmap)
CTL_PROTO(config_prof)
CTL_PROTO(config_prof_libgcc)
@@ -99,22 +98,27 @@ CTL_PROTO(opt_zero)
CTL_PROTO(opt_quarantine)
CTL_PROTO(opt_redzone)
CTL_PROTO(opt_utrace)
-CTL_PROTO(opt_valgrind)
CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_tcache)
CTL_PROTO(opt_lg_tcache_max)
CTL_PROTO(opt_prof)
CTL_PROTO(opt_prof_prefix)
CTL_PROTO(opt_prof_active)
+CTL_PROTO(opt_prof_thread_active_init)
CTL_PROTO(opt_lg_prof_sample)
CTL_PROTO(opt_lg_prof_interval)
CTL_PROTO(opt_prof_gdump)
CTL_PROTO(opt_prof_final)
CTL_PROTO(opt_prof_leak)
CTL_PROTO(opt_prof_accum)
+CTL_PROTO(tcache_create)
+CTL_PROTO(tcache_flush)
+CTL_PROTO(tcache_destroy)
CTL_PROTO(arena_i_purge)
static void arena_purge(unsigned arena_ind);
CTL_PROTO(arena_i_dss)
+CTL_PROTO(arena_i_lg_dirty_mult)
+CTL_PROTO(arena_i_chunk_hooks)
INDEX_PROTO(arena_i)
CTL_PROTO(arenas_bin_i_size)
CTL_PROTO(arenas_bin_i_nregs)
@@ -122,25 +126,26 @@ CTL_PROTO(arenas_bin_i_run_size)
INDEX_PROTO(arenas_bin_i)
CTL_PROTO(arenas_lrun_i_size)
INDEX_PROTO(arenas_lrun_i)
+CTL_PROTO(arenas_hchunk_i_size)
+INDEX_PROTO(arenas_hchunk_i)
CTL_PROTO(arenas_narenas)
CTL_PROTO(arenas_initialized)
+CTL_PROTO(arenas_lg_dirty_mult)
CTL_PROTO(arenas_quantum)
CTL_PROTO(arenas_page)
CTL_PROTO(arenas_tcache_max)
CTL_PROTO(arenas_nbins)
CTL_PROTO(arenas_nhbins)
CTL_PROTO(arenas_nlruns)
-CTL_PROTO(arenas_purge)
+CTL_PROTO(arenas_nhchunks)
CTL_PROTO(arenas_extend)
+CTL_PROTO(prof_thread_active_init)
CTL_PROTO(prof_active)
CTL_PROTO(prof_dump)
+CTL_PROTO(prof_gdump)
+CTL_PROTO(prof_reset)
CTL_PROTO(prof_interval)
-CTL_PROTO(stats_chunks_current)
-CTL_PROTO(stats_chunks_total)
-CTL_PROTO(stats_chunks_high)
-CTL_PROTO(stats_huge_allocated)
-CTL_PROTO(stats_huge_nmalloc)
-CTL_PROTO(stats_huge_ndalloc)
+CTL_PROTO(lg_prof_sample)
CTL_PROTO(stats_arenas_i_small_allocated)
CTL_PROTO(stats_arenas_i_small_nmalloc)
CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -149,10 +154,14 @@ CTL_PROTO(stats_arenas_i_large_allocated)
CTL_PROTO(stats_arenas_i_large_nmalloc)
CTL_PROTO(stats_arenas_i_large_ndalloc)
CTL_PROTO(stats_arenas_i_large_nrequests)
-CTL_PROTO(stats_arenas_i_bins_j_allocated)
+CTL_PROTO(stats_arenas_i_huge_allocated)
+CTL_PROTO(stats_arenas_i_huge_nmalloc)
+CTL_PROTO(stats_arenas_i_huge_ndalloc)
+CTL_PROTO(stats_arenas_i_huge_nrequests)
CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
CTL_PROTO(stats_arenas_i_bins_j_nrequests)
+CTL_PROTO(stats_arenas_i_bins_j_curregs)
CTL_PROTO(stats_arenas_i_bins_j_nfills)
CTL_PROTO(stats_arenas_i_bins_j_nflushes)
CTL_PROTO(stats_arenas_i_bins_j_nruns)
@@ -164,18 +173,28 @@ CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
INDEX_PROTO(stats_arenas_i_lruns_j)
+CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc)
+CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc)
+CTL_PROTO(stats_arenas_i_hchunks_j_nrequests)
+CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks)
+INDEX_PROTO(stats_arenas_i_hchunks_j)
CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_dss)
+CTL_PROTO(stats_arenas_i_lg_dirty_mult)
CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty)
CTL_PROTO(stats_arenas_i_mapped)
CTL_PROTO(stats_arenas_i_npurge)
CTL_PROTO(stats_arenas_i_nmadvise)
CTL_PROTO(stats_arenas_i_purged)
+CTL_PROTO(stats_arenas_i_metadata_mapped)
+CTL_PROTO(stats_arenas_i_metadata_allocated)
INDEX_PROTO(stats_arenas_i)
CTL_PROTO(stats_cactive)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
+CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_resident)
CTL_PROTO(stats_mapped)
/******************************************************************************/
@@ -197,71 +216,84 @@ CTL_PROTO(stats_mapped)
*/
#define INDEX(i) {false}, i##_index
-static const ctl_named_node_t tcache_node[] = {
+static const ctl_named_node_t thread_tcache_node[] = {
{NAME("enabled"), CTL(thread_tcache_enabled)},
{NAME("flush"), CTL(thread_tcache_flush)}
};
+static const ctl_named_node_t thread_prof_node[] = {
+ {NAME("name"), CTL(thread_prof_name)},
+ {NAME("active"), CTL(thread_prof_active)}
+};
+
static const ctl_named_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)},
{NAME("allocated"), CTL(thread_allocated)},
{NAME("allocatedp"), CTL(thread_allocatedp)},
{NAME("deallocated"), CTL(thread_deallocated)},
{NAME("deallocatedp"), CTL(thread_deallocatedp)},
- {NAME("tcache"), CHILD(named, tcache)}
+ {NAME("tcache"), CHILD(named, thread_tcache)},
+ {NAME("prof"), CHILD(named, thread_prof)}
};
static const ctl_named_node_t config_node[] = {
- {NAME("debug"), CTL(config_debug)},
- {NAME("dss"), CTL(config_dss)},
- {NAME("fill"), CTL(config_fill)},
- {NAME("lazy_lock"), CTL(config_lazy_lock)},
- {NAME("mremap"), CTL(config_mremap)},
- {NAME("munmap"), CTL(config_munmap)},
- {NAME("prof"), CTL(config_prof)},
- {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
- {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
- {NAME("stats"), CTL(config_stats)},
- {NAME("tcache"), CTL(config_tcache)},
- {NAME("tls"), CTL(config_tls)},
- {NAME("utrace"), CTL(config_utrace)},
- {NAME("valgrind"), CTL(config_valgrind)},
- {NAME("xmalloc"), CTL(config_xmalloc)}
+ {NAME("cache_oblivious"), CTL(config_cache_oblivious)},
+ {NAME("debug"), CTL(config_debug)},
+ {NAME("fill"), CTL(config_fill)},
+ {NAME("lazy_lock"), CTL(config_lazy_lock)},
+ {NAME("munmap"), CTL(config_munmap)},
+ {NAME("prof"), CTL(config_prof)},
+ {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+ {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+ {NAME("stats"), CTL(config_stats)},
+ {NAME("tcache"), CTL(config_tcache)},
+ {NAME("tls"), CTL(config_tls)},
+ {NAME("utrace"), CTL(config_utrace)},
+ {NAME("valgrind"), CTL(config_valgrind)},
+ {NAME("xmalloc"), CTL(config_xmalloc)}
};
static const ctl_named_node_t opt_node[] = {
- {NAME("abort"), CTL(opt_abort)},
- {NAME("dss"), CTL(opt_dss)},
- {NAME("lg_chunk"), CTL(opt_lg_chunk)},
- {NAME("narenas"), CTL(opt_narenas)},
- {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
- {NAME("stats_print"), CTL(opt_stats_print)},
- {NAME("junk"), CTL(opt_junk)},
- {NAME("zero"), CTL(opt_zero)},
- {NAME("quarantine"), CTL(opt_quarantine)},
- {NAME("redzone"), CTL(opt_redzone)},
- {NAME("utrace"), CTL(opt_utrace)},
- {NAME("valgrind"), CTL(opt_valgrind)},
- {NAME("xmalloc"), CTL(opt_xmalloc)},
- {NAME("tcache"), CTL(opt_tcache)},
- {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
- {NAME("prof"), CTL(opt_prof)},
- {NAME("prof_prefix"), CTL(opt_prof_prefix)},
- {NAME("prof_active"), CTL(opt_prof_active)},
- {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
- {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
- {NAME("prof_gdump"), CTL(opt_prof_gdump)},
- {NAME("prof_final"), CTL(opt_prof_final)},
- {NAME("prof_leak"), CTL(opt_prof_leak)},
- {NAME("prof_accum"), CTL(opt_prof_accum)}
+ {NAME("abort"), CTL(opt_abort)},
+ {NAME("dss"), CTL(opt_dss)},
+ {NAME("lg_chunk"), CTL(opt_lg_chunk)},
+ {NAME("narenas"), CTL(opt_narenas)},
+ {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
+ {NAME("stats_print"), CTL(opt_stats_print)},
+ {NAME("junk"), CTL(opt_junk)},
+ {NAME("zero"), CTL(opt_zero)},
+ {NAME("quarantine"), CTL(opt_quarantine)},
+ {NAME("redzone"), CTL(opt_redzone)},
+ {NAME("utrace"), CTL(opt_utrace)},
+ {NAME("xmalloc"), CTL(opt_xmalloc)},
+ {NAME("tcache"), CTL(opt_tcache)},
+ {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
+ {NAME("prof"), CTL(opt_prof)},
+ {NAME("prof_prefix"), CTL(opt_prof_prefix)},
+ {NAME("prof_active"), CTL(opt_prof_active)},
+ {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+ {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+ {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+ {NAME("prof_gdump"), CTL(opt_prof_gdump)},
+ {NAME("prof_final"), CTL(opt_prof_final)},
+ {NAME("prof_leak"), CTL(opt_prof_leak)},
+ {NAME("prof_accum"), CTL(opt_prof_accum)}
+};
+
+static const ctl_named_node_t tcache_node[] = {
+ {NAME("create"), CTL(tcache_create)},
+ {NAME("flush"), CTL(tcache_flush)},
+ {NAME("destroy"), CTL(tcache_destroy)}
};
static const ctl_named_node_t arena_i_node[] = {
- {NAME("purge"), CTL(arena_i_purge)},
- {NAME("dss"), CTL(arena_i_dss)}
+ {NAME("purge"), CTL(arena_i_purge)},
+ {NAME("dss"), CTL(arena_i_dss)},
+ {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)},
+ {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)}
};
static const ctl_named_node_t super_arena_i_node[] = {
- {NAME(""), CHILD(named, arena_i)}
+ {NAME(""), CHILD(named, arena_i)}
};
static const ctl_indexed_node_t arena_node[] = {
@@ -269,12 +301,12 @@ static const ctl_indexed_node_t arena_node[] = {
};
static const ctl_named_node_t arenas_bin_i_node[] = {
- {NAME("size"), CTL(arenas_bin_i_size)},
- {NAME("nregs"), CTL(arenas_bin_i_nregs)},
- {NAME("run_size"), CTL(arenas_bin_i_run_size)}
+ {NAME("size"), CTL(arenas_bin_i_size)},
+ {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+ {NAME("run_size"), CTL(arenas_bin_i_run_size)}
};
static const ctl_named_node_t super_arenas_bin_i_node[] = {
- {NAME(""), CHILD(named, arenas_bin_i)}
+ {NAME(""), CHILD(named, arenas_bin_i)}
};
static const ctl_indexed_node_t arenas_bin_node[] = {
@@ -282,76 +314,93 @@ static const ctl_indexed_node_t arenas_bin_node[] = {
};
static const ctl_named_node_t arenas_lrun_i_node[] = {
- {NAME("size"), CTL(arenas_lrun_i_size)}
+ {NAME("size"), CTL(arenas_lrun_i_size)}
};
static const ctl_named_node_t super_arenas_lrun_i_node[] = {
- {NAME(""), CHILD(named, arenas_lrun_i)}
+ {NAME(""), CHILD(named, arenas_lrun_i)}
};
static const ctl_indexed_node_t arenas_lrun_node[] = {
{INDEX(arenas_lrun_i)}
};
+static const ctl_named_node_t arenas_hchunk_i_node[] = {
+ {NAME("size"), CTL(arenas_hchunk_i_size)}
+};
+static const ctl_named_node_t super_arenas_hchunk_i_node[] = {
+ {NAME(""), CHILD(named, arenas_hchunk_i)}
+};
+
+static const ctl_indexed_node_t arenas_hchunk_node[] = {
+ {INDEX(arenas_hchunk_i)}
+};
+
static const ctl_named_node_t arenas_node[] = {
- {NAME("narenas"), CTL(arenas_narenas)},
- {NAME("initialized"), CTL(arenas_initialized)},
- {NAME("quantum"), CTL(arenas_quantum)},
- {NAME("page"), CTL(arenas_page)},
- {NAME("tcache_max"), CTL(arenas_tcache_max)},
- {NAME("nbins"), CTL(arenas_nbins)},
- {NAME("nhbins"), CTL(arenas_nhbins)},
- {NAME("bin"), CHILD(indexed, arenas_bin)},
- {NAME("nlruns"), CTL(arenas_nlruns)},
- {NAME("lrun"), CHILD(indexed, arenas_lrun)},
- {NAME("purge"), CTL(arenas_purge)},
- {NAME("extend"), CTL(arenas_extend)}
+ {NAME("narenas"), CTL(arenas_narenas)},
+ {NAME("initialized"), CTL(arenas_initialized)},
+ {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)},
+ {NAME("quantum"), CTL(arenas_quantum)},
+ {NAME("page"), CTL(arenas_page)},
+ {NAME("tcache_max"), CTL(arenas_tcache_max)},
+ {NAME("nbins"), CTL(arenas_nbins)},
+ {NAME("nhbins"), CTL(arenas_nhbins)},
+ {NAME("bin"), CHILD(indexed, arenas_bin)},
+ {NAME("nlruns"), CTL(arenas_nlruns)},
+ {NAME("lrun"), CHILD(indexed, arenas_lrun)},
+ {NAME("nhchunks"), CTL(arenas_nhchunks)},
+ {NAME("hchunk"), CHILD(indexed, arenas_hchunk)},
+ {NAME("extend"), CTL(arenas_extend)}
};
static const ctl_named_node_t prof_node[] = {
+ {NAME("thread_active_init"), CTL(prof_thread_active_init)},
{NAME("active"), CTL(prof_active)},
{NAME("dump"), CTL(prof_dump)},
- {NAME("interval"), CTL(prof_interval)}
+ {NAME("gdump"), CTL(prof_gdump)},
+ {NAME("reset"), CTL(prof_reset)},
+ {NAME("interval"), CTL(prof_interval)},
+ {NAME("lg_sample"), CTL(lg_prof_sample)}
};
-static const ctl_named_node_t stats_chunks_node[] = {
- {NAME("current"), CTL(stats_chunks_current)},
- {NAME("total"), CTL(stats_chunks_total)},
- {NAME("high"), CTL(stats_chunks_high)}
-};
-
-static const ctl_named_node_t stats_huge_node[] = {
- {NAME("allocated"), CTL(stats_huge_allocated)},
- {NAME("nmalloc"), CTL(stats_huge_nmalloc)},
- {NAME("ndalloc"), CTL(stats_huge_ndalloc)}
+static const ctl_named_node_t stats_arenas_i_metadata_node[] = {
+ {NAME("mapped"), CTL(stats_arenas_i_metadata_mapped)},
+ {NAME("allocated"), CTL(stats_arenas_i_metadata_allocated)}
};
static const ctl_named_node_t stats_arenas_i_small_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
+ {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
};
static const ctl_named_node_t stats_arenas_i_large_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
+ {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
+};
+
+static const ctl_named_node_t stats_arenas_i_huge_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_huge_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_huge_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_huge_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_huge_nrequests)}
};
static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
- {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
- {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
- {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)},
- {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)},
- {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)}
+ {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+ {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)},
+ {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+ {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+ {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)},
+ {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)},
+ {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)}
};
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i_bins_j)}
+ {NAME(""), CHILD(named, stats_arenas_i_bins_j)}
};
static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
@@ -359,35 +408,53 @@ static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
};
static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = {
- {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)},
- {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)}
+ {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)},
+ {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)}
};
static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i_lruns_j)}
+ {NAME(""), CHILD(named, stats_arenas_i_lruns_j)}
};
static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = {
{INDEX(stats_arenas_i_lruns_j)}
};
+static const ctl_named_node_t stats_arenas_i_hchunks_j_node[] = {
+ {NAME("nmalloc"), CTL(stats_arenas_i_hchunks_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_hchunks_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_hchunks_j_nrequests)},
+ {NAME("curhchunks"), CTL(stats_arenas_i_hchunks_j_curhchunks)}
+};
+static const ctl_named_node_t super_stats_arenas_i_hchunks_j_node[] = {
+ {NAME(""), CHILD(named, stats_arenas_i_hchunks_j)}
+};
+
+static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = {
+ {INDEX(stats_arenas_i_hchunks_j)}
+};
+
static const ctl_named_node_t stats_arenas_i_node[] = {
- {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
- {NAME("dss"), CTL(stats_arenas_i_dss)},
- {NAME("pactive"), CTL(stats_arenas_i_pactive)},
- {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
- {NAME("mapped"), CTL(stats_arenas_i_mapped)},
- {NAME("npurge"), CTL(stats_arenas_i_npurge)},
- {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)},
- {NAME("purged"), CTL(stats_arenas_i_purged)},
- {NAME("small"), CHILD(named, stats_arenas_i_small)},
- {NAME("large"), CHILD(named, stats_arenas_i_large)},
- {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
- {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)}
+ {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
+ {NAME("dss"), CTL(stats_arenas_i_dss)},
+ {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)},
+ {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+ {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
+ {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+ {NAME("npurge"), CTL(stats_arenas_i_npurge)},
+ {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)},
+ {NAME("purged"), CTL(stats_arenas_i_purged)},
+ {NAME("metadata"), CHILD(named, stats_arenas_i_metadata)},
+ {NAME("small"), CHILD(named, stats_arenas_i_small)},
+ {NAME("large"), CHILD(named, stats_arenas_i_large)},
+ {NAME("huge"), CHILD(named, stats_arenas_i_huge)},
+ {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
+ {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)},
+ {NAME("hchunks"), CHILD(indexed, stats_arenas_i_hchunks)}
};
static const ctl_named_node_t super_stats_arenas_i_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i)}
+ {NAME(""), CHILD(named, stats_arenas_i)}
};
static const ctl_indexed_node_t stats_arenas_node[] = {
@@ -395,13 +462,13 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
};
static const ctl_named_node_t stats_node[] = {
- {NAME("cactive"), CTL(stats_cactive)},
- {NAME("allocated"), CTL(stats_allocated)},
- {NAME("active"), CTL(stats_active)},
- {NAME("mapped"), CTL(stats_mapped)},
- {NAME("chunks"), CHILD(named, stats_chunks)},
- {NAME("huge"), CHILD(named, stats_huge)},
- {NAME("arenas"), CHILD(indexed, stats_arenas)}
+ {NAME("cactive"), CTL(stats_cactive)},
+ {NAME("allocated"), CTL(stats_allocated)},
+ {NAME("active"), CTL(stats_active)},
+ {NAME("metadata"), CTL(stats_metadata)},
+ {NAME("resident"), CTL(stats_resident)},
+ {NAME("mapped"), CTL(stats_mapped)},
+ {NAME("arenas"), CHILD(indexed, stats_arenas)}
};
static const ctl_named_node_t root_node[] = {
@@ -410,6 +477,7 @@ static const ctl_named_node_t root_node[] = {
{NAME("thread"), CHILD(named, thread)},
{NAME("config"), CHILD(named, config)},
{NAME("opt"), CHILD(named, opt)},
+ {NAME("tcache"), CHILD(named, tcache)},
{NAME("arena"), CHILD(indexed, arena)},
{NAME("arenas"), CHILD(named, arenas)},
{NAME("prof"), CHILD(named, prof)},
@@ -431,12 +499,19 @@ ctl_arena_init(ctl_arena_stats_t *astats)
{
if (astats->lstats == NULL) {
- astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+ astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses *
sizeof(malloc_large_stats_t));
if (astats->lstats == NULL)
return (true);
}
+ if (astats->hstats == NULL) {
+ astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses *
+ sizeof(malloc_huge_stats_t));
+ if (astats->hstats == NULL)
+ return (true);
+ }
+
return (false);
}
@@ -445,6 +520,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
{
astats->dss = dss_prec_names[dss_prec_limit];
+ astats->lg_dirty_mult = -1;
astats->pactive = 0;
astats->pdirty = 0;
if (config_stats) {
@@ -456,6 +532,8 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
memset(astats->lstats, 0, nlclasses *
sizeof(malloc_large_stats_t));
+ memset(astats->hstats, 0, nhclasses *
+ sizeof(malloc_huge_stats_t));
}
}
@@ -464,11 +542,13 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
{
unsigned i;
- arena_stats_merge(arena, &cstats->dss, &cstats->pactive,
- &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats);
+ arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult,
+ &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats,
+ cstats->lstats, cstats->hstats);
for (i = 0; i < NBINS; i++) {
- cstats->allocated_small += cstats->bstats[i].allocated;
+ cstats->allocated_small += cstats->bstats[i].curregs *
+ index2size(i);
cstats->nmalloc_small += cstats->bstats[i].nmalloc;
cstats->ndalloc_small += cstats->bstats[i].ndalloc;
cstats->nrequests_small += cstats->bstats[i].nrequests;
@@ -488,6 +568,9 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
sstats->astats.nmadvise += astats->astats.nmadvise;
sstats->astats.purged += astats->astats.purged;
+ sstats->astats.metadata_mapped += astats->astats.metadata_mapped;
+ sstats->astats.metadata_allocated += astats->astats.metadata_allocated;
+
sstats->allocated_small += astats->allocated_small;
sstats->nmalloc_small += astats->nmalloc_small;
sstats->ndalloc_small += astats->ndalloc_small;
@@ -498,18 +581,15 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
sstats->astats.nrequests_large += astats->astats.nrequests_large;
- for (i = 0; i < nlclasses; i++) {
- sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
- sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
- sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
- sstats->lstats[i].curruns += astats->lstats[i].curruns;
- }
+ sstats->astats.allocated_huge += astats->astats.allocated_huge;
+ sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
+ sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
for (i = 0; i < NBINS; i++) {
- sstats->bstats[i].allocated += astats->bstats[i].allocated;
sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
+ sstats->bstats[i].curregs += astats->bstats[i].curregs;
if (config_tcache) {
sstats->bstats[i].nfills += astats->bstats[i].nfills;
sstats->bstats[i].nflushes +=
@@ -519,6 +599,19 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
sstats->bstats[i].reruns += astats->bstats[i].reruns;
sstats->bstats[i].curruns += astats->bstats[i].curruns;
}
+
+ for (i = 0; i < nlclasses; i++) {
+ sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+ sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+ sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
+ sstats->lstats[i].curruns += astats->lstats[i].curruns;
+ }
+
+ for (i = 0; i < nhclasses; i++) {
+ sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
+ sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
+ sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks;
+ }
}
static void
@@ -547,27 +640,23 @@ static bool
ctl_grow(void)
{
ctl_arena_stats_t *astats;
- arena_t **tarenas;
- /* Allocate extended arena stats and arenas arrays. */
- astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) *
+ /* Initialize new arena. */
+ if (arena_init(ctl_stats.narenas) == NULL)
+ return (true);
+
+ /* Allocate extended arena stats. */
+ astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) *
sizeof(ctl_arena_stats_t));
if (astats == NULL)
return (true);
- tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) *
- sizeof(arena_t *));
- if (tarenas == NULL) {
- idalloc(astats);
- return (true);
- }
/* Initialize the new astats element. */
memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
sizeof(ctl_arena_stats_t));
memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) {
- idalloc(tarenas);
- idalloc(astats);
+ a0dalloc(astats);
return (true);
}
/* Swap merged stats to their new location. */
@@ -580,32 +669,7 @@ ctl_grow(void)
memcpy(&astats[ctl_stats.narenas + 1], &tstats,
sizeof(ctl_arena_stats_t));
}
- /* Initialize the new arenas element. */
- tarenas[ctl_stats.narenas] = NULL;
- {
- arena_t **arenas_old = arenas;
- /*
- * Swap extended arenas array into place. Although ctl_mtx
- * protects this function from other threads extending the
- * array, it does not protect from other threads mutating it
- * (i.e. initializing arenas and setting array elements to
- * point to them). Therefore, array copying must happen under
- * the protection of arenas_lock.
- */
- malloc_mutex_lock(&arenas_lock);
- arenas = tarenas;
- memcpy(arenas, arenas_old, ctl_stats.narenas *
- sizeof(arena_t *));
- narenas_total++;
- arenas_extend(narenas_total - 1);
- malloc_mutex_unlock(&arenas_lock);
- /*
- * Deallocate arenas_old only if it came from imalloc() (not
- * base_alloc()).
- */
- if (ctl_stats.narenas != narenas_auto)
- idalloc(arenas_old);
- }
+ a0dalloc(ctl_stats.arenas);
ctl_stats.arenas = astats;
ctl_stats.narenas++;
@@ -615,23 +679,11 @@ ctl_grow(void)
static void
ctl_refresh(void)
{
+ tsd_t *tsd;
unsigned i;
+ bool refreshed;
VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
- if (config_stats) {
- malloc_mutex_lock(&chunks_mtx);
- ctl_stats.chunks.current = stats_chunks.curchunks;
- ctl_stats.chunks.total = stats_chunks.nchunks;
- ctl_stats.chunks.high = stats_chunks.highchunks;
- malloc_mutex_unlock(&chunks_mtx);
-
- malloc_mutex_lock(&huge_mtx);
- ctl_stats.huge.allocated = huge_allocated;
- ctl_stats.huge.nmalloc = huge_nmalloc;
- ctl_stats.huge.ndalloc = huge_ndalloc;
- malloc_mutex_unlock(&huge_mtx);
- }
-
/*
* Clear sum stats, since they will be merged into by
* ctl_arena_refresh().
@@ -639,15 +691,22 @@ ctl_refresh(void)
ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
+ tsd = tsd_fetch();
+ for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) {
+ tarenas[i] = arena_get(tsd, i, false, false);
+ if (tarenas[i] == NULL && !refreshed) {
+ tarenas[i] = arena_get(tsd, i, false, true);
+ refreshed = true;
+ }
+ }
+
for (i = 0; i < ctl_stats.narenas; i++) {
- if (arenas[i] != NULL)
- ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
+ if (tarenas[i] != NULL)
+ ctl_stats.arenas[i].nthreads = arena_nbound(i);
else
ctl_stats.arenas[i].nthreads = 0;
}
- malloc_mutex_unlock(&arenas_lock);
+
for (i = 0; i < ctl_stats.narenas; i++) {
bool initialized = (tarenas[i] != NULL);
@@ -657,14 +716,24 @@ ctl_refresh(void)
}
if (config_stats) {
+ size_t base_allocated, base_resident, base_mapped;
+ base_stats_get(&base_allocated, &base_resident, &base_mapped);
ctl_stats.allocated =
- ctl_stats.arenas[ctl_stats.narenas].allocated_small
- + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large
- + ctl_stats.huge.allocated;
+ ctl_stats.arenas[ctl_stats.narenas].allocated_small +
+ ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
+ ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge;
ctl_stats.active =
- (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE)
- + ctl_stats.huge.allocated;
- ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+ (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
+ ctl_stats.metadata = base_allocated +
+ ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
+ ctl_stats.arenas[ctl_stats.narenas].astats
+ .metadata_allocated;
+ ctl_stats.resident = base_resident +
+ ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
+ ((ctl_stats.arenas[ctl_stats.narenas].pactive +
+ ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
+ ctl_stats.mapped = base_mapped +
+ ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
}
ctl_epoch++;
@@ -676,14 +745,13 @@ ctl_init(void)
bool ret;
malloc_mutex_lock(&ctl_mtx);
- if (ctl_initialized == false) {
+ if (!ctl_initialized) {
/*
* Allocate space for one extra arena stats element, which
* contains summed stats across all arenas.
*/
- assert(narenas_auto == narenas_total_get());
- ctl_stats.narenas = narenas_auto;
- ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc(
+ ctl_stats.narenas = narenas_total_get();
+ ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc(
(ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t));
if (ctl_stats.arenas == NULL) {
ret = true;
@@ -701,6 +769,15 @@ ctl_init(void)
unsigned i;
for (i = 0; i <= ctl_stats.narenas; i++) {
if (ctl_arena_init(&ctl_stats.arenas[i])) {
+ unsigned j;
+ for (j = 0; j < i; j++) {
+ a0dalloc(
+ ctl_stats.arenas[j].lstats);
+ a0dalloc(
+ ctl_stats.arenas[j].hstats);
+ }
+ a0dalloc(ctl_stats.arenas);
+ ctl_stats.arenas = NULL;
ret = true;
goto label_return;
}
@@ -826,7 +903,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
size_t mib[CTL_MAX_DEPTH];
const ctl_named_node_t *node;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init()) {
ret = EAGAIN;
goto label_return;
}
@@ -853,7 +930,7 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
{
int ret;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init()) {
ret = EAGAIN;
goto label_return;
}
@@ -871,7 +948,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
const ctl_named_node_t *node;
size_t i;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init()) {
ret = EAGAIN;
goto label_return;
}
@@ -963,6 +1040,14 @@ ctl_postfork_child(void)
} \
} while (0)
+#define READ_XOR_WRITE() do { \
+ if ((oldp != NULL && oldlenp != NULL) && (newp != NULL || \
+ newlen != 0)) { \
+ ret = EPERM; \
+ goto label_return; \
+ } \
+} while (0)
+
#define READ(v, t) do { \
if (oldp != NULL && oldlenp != NULL) { \
if (*oldlenp != sizeof(t)) { \
@@ -971,8 +1056,8 @@ ctl_postfork_child(void)
memcpy(oldp, (void *)&(v), copylen); \
ret = EINVAL; \
goto label_return; \
- } else \
- *(t *)oldp = (v); \
+ } \
+ *(t *)oldp = (v); \
} \
} while (0)
@@ -998,7 +1083,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
int ret; \
t oldval; \
\
- if ((c) == false) \
+ if (!(c)) \
return (ENOENT); \
if (l) \
malloc_mutex_lock(&ctl_mtx); \
@@ -1021,7 +1106,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
int ret; \
t oldval; \
\
- if ((c) == false) \
+ if (!(c)) \
return (ENOENT); \
malloc_mutex_lock(&ctl_mtx); \
READONLY(); \
@@ -1065,7 +1150,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
int ret; \
t oldval; \
\
- if ((c) == false) \
+ if (!(c)) \
return (ENOENT); \
READONLY(); \
oldval = (v); \
@@ -1093,6 +1178,27 @@ label_return: \
return (ret); \
}
+#define CTL_TSD_RO_NL_CGEN(c, n, m, t) \
+static int \
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
+ void *newp, size_t newlen) \
+{ \
+ int ret; \
+ t oldval; \
+ tsd_t *tsd; \
+ \
+ if (!(c)) \
+ return (ENOENT); \
+ READONLY(); \
+ tsd = tsd_fetch(); \
+ oldval = (m(tsd)); \
+ READ(oldval, t); \
+ \
+ ret = 0; \
+label_return: \
+ return (ret); \
+}
+
#define CTL_RO_BOOL_CONFIG_GEN(n) \
static int \
n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
@@ -1135,11 +1241,10 @@ label_return:
/******************************************************************************/
+CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious)
CTL_RO_BOOL_CONFIG_GEN(config_debug)
-CTL_RO_BOOL_CONFIG_GEN(config_dss)
CTL_RO_BOOL_CONFIG_GEN(config_fill)
CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
-CTL_RO_BOOL_CONFIG_GEN(config_mremap)
CTL_RO_BOOL_CONFIG_GEN(config_munmap)
CTL_RO_BOOL_CONFIG_GEN(config_prof)
CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
@@ -1159,18 +1264,19 @@ CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
-CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool)
+CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
-CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool)
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
-CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */
+CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
+ opt_prof_thread_active_init, bool)
CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
@@ -1185,14 +1291,21 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
int ret;
+ tsd_t *tsd;
+ arena_t *oldarena;
unsigned newind, oldind;
+ tsd = tsd_fetch();
+ oldarena = arena_choose(tsd, NULL);
+ if (oldarena == NULL)
+ return (EAGAIN);
+
malloc_mutex_lock(&ctl_mtx);
- newind = oldind = choose_arena(NULL)->ind;
+ newind = oldind = oldarena->ind;
WRITE(newind, unsigned);
READ(oldind, unsigned);
if (newind != oldind) {
- arena_t *arena;
+ arena_t *newarena;
if (newind >= ctl_stats.narenas) {
/* New arena index is out of range. */
@@ -1201,28 +1314,20 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
}
/* Initialize arena if necessary. */
- malloc_mutex_lock(&arenas_lock);
- if ((arena = arenas[newind]) == NULL && (arena =
- arenas_extend(newind)) == NULL) {
- malloc_mutex_unlock(&arenas_lock);
+ newarena = arena_get(tsd, newind, true, true);
+ if (newarena == NULL) {
ret = EAGAIN;
goto label_return;
}
- assert(arena == arenas[newind]);
- arenas[oldind]->nthreads--;
- arenas[newind]->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
-
- /* Set new arena association. */
+ /* Set new arena/tcache associations. */
+ arena_migrate(tsd, oldind, newind);
if (config_tcache) {
- tcache_t *tcache;
- if ((uintptr_t)(tcache = *tcache_tsd_get()) >
- (uintptr_t)TCACHE_STATE_MAX) {
- tcache_arena_dissociate(tcache);
- tcache_arena_associate(tcache, arena);
+ tcache_t *tcache = tsd_tcache_get(tsd);
+ if (tcache != NULL) {
+ tcache_arena_reassociate(tcache, oldarena,
+ newarena);
}
}
- arenas_tsd_set(&arena);
}
ret = 0;
@@ -1231,14 +1336,14 @@ label_return:
return (ret);
}
-CTL_RO_NL_CGEN(config_stats, thread_allocated,
- thread_allocated_tsd_get()->allocated, uint64_t)
-CTL_RO_NL_CGEN(config_stats, thread_allocatedp,
- &thread_allocated_tsd_get()->allocated, uint64_t *)
-CTL_RO_NL_CGEN(config_stats, thread_deallocated,
- thread_allocated_tsd_get()->deallocated, uint64_t)
-CTL_RO_NL_CGEN(config_stats, thread_deallocatedp,
- &thread_allocated_tsd_get()->deallocated, uint64_t *)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
+ uint64_t)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get,
+ uint64_t *)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get,
+ uint64_t)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
+ tsd_thread_deallocatedp_get, uint64_t *)
static int
thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
@@ -1247,7 +1352,7 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
int ret;
bool oldval;
- if (config_tcache == false)
+ if (!config_tcache)
return (ENOENT);
oldval = tcache_enabled_get();
@@ -1271,7 +1376,7 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
{
int ret;
- if (config_tcache == false)
+ if (!config_tcache)
return (ENOENT);
READONLY();
@@ -1284,17 +1389,170 @@ label_return:
return (ret);
}
+static int
+thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+
+ if (!config_prof)
+ return (ENOENT);
+
+ READ_XOR_WRITE();
+
+ if (newp != NULL) {
+ tsd_t *tsd;
+
+ if (newlen != sizeof(const char *)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+
+ tsd = tsd_fetch();
+
+ if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
+ 0)
+ goto label_return;
+ } else {
+ const char *oldname = prof_thread_name_get();
+ READ(oldname, const char *);
+ }
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+static int
+thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ bool oldval;
+
+ if (!config_prof)
+ return (ENOENT);
+
+ oldval = prof_thread_active_get();
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (prof_thread_active_set(*(bool *)newp)) {
+ ret = EAGAIN;
+ goto label_return;
+ }
+ }
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+/******************************************************************************/
+
+static int
+tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ tsd_t *tsd;
+ unsigned tcache_ind;
+
+ if (!config_tcache)
+ return (ENOENT);
+
+ tsd = tsd_fetch();
+
+ malloc_mutex_lock(&ctl_mtx);
+ READONLY();
+ if (tcaches_create(tsd, &tcache_ind)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ READ(tcache_ind, unsigned);
+
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(&ctl_mtx);
+ return (ret);
+}
+
+static int
+tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ tsd_t *tsd;
+ unsigned tcache_ind;
+
+ if (!config_tcache)
+ return (ENOENT);
+
+ tsd = tsd_fetch();
+
+ WRITEONLY();
+ tcache_ind = UINT_MAX;
+ WRITE(tcache_ind, unsigned);
+ if (tcache_ind == UINT_MAX) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ tcaches_flush(tsd, tcache_ind);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+static int
+tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ tsd_t *tsd;
+ unsigned tcache_ind;
+
+ if (!config_tcache)
+ return (ENOENT);
+
+ tsd = tsd_fetch();
+
+ WRITEONLY();
+ tcache_ind = UINT_MAX;
+ WRITE(tcache_ind, unsigned);
+ if (tcache_ind == UINT_MAX) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ tcaches_destroy(tsd, tcache_ind);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
/******************************************************************************/
/* ctl_mutex must be held during execution of this function. */
static void
arena_purge(unsigned arena_ind)
{
+ tsd_t *tsd;
+ unsigned i;
+ bool refreshed;
VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
- malloc_mutex_unlock(&arenas_lock);
+ tsd = tsd_fetch();
+ for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) {
+ tarenas[i] = arena_get(tsd, i, false, false);
+ if (tarenas[i] == NULL && !refreshed) {
+ tarenas[i] = arena_get(tsd, i, false, true);
+ refreshed = true;
+ }
+ }
if (arena_ind == ctl_stats.narenas) {
unsigned i;
@@ -1330,47 +1588,117 @@ static int
arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
- int ret, i;
- bool match, err;
- const char *dss;
+ int ret;
+ const char *dss = NULL;
unsigned arena_ind = mib[1];
dss_prec_t dss_prec_old = dss_prec_limit;
dss_prec_t dss_prec = dss_prec_limit;
malloc_mutex_lock(&ctl_mtx);
WRITE(dss, const char *);
- match = false;
- for (i = 0; i < dss_prec_limit; i++) {
- if (strcmp(dss_prec_names[i], dss) == 0) {
- dss_prec = i;
- match = true;
- break;
+ if (dss != NULL) {
+ int i;
+ bool match = false;
+
+ for (i = 0; i < dss_prec_limit; i++) {
+ if (strcmp(dss_prec_names[i], dss) == 0) {
+ dss_prec = i;
+ match = true;
+ break;
+ }
+ }
+
+ if (!match) {
+ ret = EINVAL;
+ goto label_return;
}
- }
- if (match == false) {
- ret = EINVAL;
- goto label_return;
}
if (arena_ind < ctl_stats.narenas) {
- arena_t *arena = arenas[arena_ind];
- if (arena != NULL) {
- dss_prec_old = arena_dss_prec_get(arena);
- arena_dss_prec_set(arena, dss_prec);
- err = false;
- } else
- err = true;
+ arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true);
+ if (arena == NULL || (dss_prec != dss_prec_limit &&
+ arena_dss_prec_set(arena, dss_prec))) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ dss_prec_old = arena_dss_prec_get(arena);
} else {
+ if (dss_prec != dss_prec_limit &&
+ chunk_dss_prec_set(dss_prec)) {
+ ret = EFAULT;
+ goto label_return;
+ }
dss_prec_old = chunk_dss_prec_get();
- err = chunk_dss_prec_set(dss_prec);
}
+
dss = dss_prec_names[dss_prec_old];
READ(dss, const char *);
- if (err) {
+
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(&ctl_mtx);
+ return (ret);
+}
+
+static int
+arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ unsigned arena_ind = mib[1];
+ arena_t *arena;
+
+ arena = arena_get(tsd_fetch(), arena_ind, false, true);
+ if (arena == NULL) {
ret = EFAULT;
goto label_return;
}
+ if (oldp != NULL && oldlenp != NULL) {
+ size_t oldval = arena_lg_dirty_mult_get(arena);
+ READ(oldval, ssize_t);
+ }
+ if (newp != NULL) {
+ if (newlen != sizeof(ssize_t)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ }
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+static int
+arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ unsigned arena_ind = mib[1];
+ arena_t *arena;
+
+ malloc_mutex_lock(&ctl_mtx);
+ if (arena_ind < narenas_total_get() && (arena =
+ arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) {
+ if (newp != NULL) {
+ chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
+ WRITE(new_chunk_hooks, chunk_hooks_t);
+ old_chunk_hooks = chunk_hooks_set(arena,
+ &new_chunk_hooks);
+ READ(old_chunk_hooks, chunk_hooks_t);
+ } else {
+ chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena);
+ READ(old_chunk_hooks, chunk_hooks_t);
+ }
+ } else {
+ ret = EFAULT;
+ goto label_return;
+ }
ret = 0;
label_return:
malloc_mutex_unlock(&ctl_mtx);
@@ -1444,6 +1772,32 @@ label_return:
return (ret);
}
+static int
+arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+
+ if (oldp != NULL && oldlenp != NULL) {
+ size_t oldval = arena_lg_dirty_mult_default_get();
+ READ(oldval, ssize_t);
+ }
+ if (newp != NULL) {
+ if (newlen != sizeof(ssize_t)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (arena_lg_dirty_mult_default_set(*(ssize_t *)newp)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ }
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
@@ -1461,8 +1815,8 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
return (super_arenas_bin_i_node);
}
-CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t)
-CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t)
+CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
+CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t)
static const ctl_named_node_t *
arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
{
@@ -1472,29 +1826,15 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
return (super_arenas_lrun_i_node);
}
-static int
-arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
+CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
+CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t)
+static const ctl_named_node_t *
+arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
{
- int ret;
- unsigned arena_ind;
-
- malloc_mutex_lock(&ctl_mtx);
- WRITEONLY();
- arena_ind = UINT_MAX;
- WRITE(arena_ind, unsigned);
- if (newp != NULL && arena_ind >= ctl_stats.narenas)
- ret = EFAULT;
- else {
- if (arena_ind == UINT_MAX)
- arena_ind = ctl_stats.narenas;
- arena_purge(arena_ind);
- ret = 0;
- }
-label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ if (i > nhclasses)
+ return (NULL);
+ return (super_arenas_hchunk_i_node);
}
static int
@@ -1522,31 +1862,52 @@ label_return:
/******************************************************************************/
static int
+prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen)
+{
+ int ret;
+ bool oldval;
+
+ if (!config_prof)
+ return (ENOENT);
+
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_thread_active_init_set(*(bool *)newp);
+ } else
+ oldval = prof_thread_active_init_get();
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+static int
prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
int ret;
bool oldval;
- if (config_prof == false)
+ if (!config_prof)
return (ENOENT);
- malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */
- oldval = opt_prof_active;
if (newp != NULL) {
- /*
- * The memory barriers will tend to make opt_prof_active
- * propagate faster on systems with weak memory ordering.
- */
- mb_write();
- WRITE(opt_prof_active, bool);
- mb_write();
- }
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_active_set(*(bool *)newp);
+ } else
+ oldval = prof_active_get();
READ(oldval, bool);
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
return (ret);
}
@@ -1557,7 +1918,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
int ret;
const char *filename = NULL;
- if (config_prof == false)
+ if (!config_prof)
return (ENOENT);
WRITEONLY();
@@ -1573,24 +1934,71 @@ label_return:
return (ret);
}
+static int
+prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ bool oldval;
+
+ if (!config_prof)
+ return (ENOENT);
+
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_gdump_set(*(bool *)newp);
+ } else
+ oldval = prof_gdump_get();
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
+static int
+prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ int ret;
+ size_t lg_sample = lg_prof_sample;
+ tsd_t *tsd;
+
+ if (!config_prof)
+ return (ENOENT);
+
+ WRITEONLY();
+ WRITE(lg_sample, size_t);
+ if (lg_sample >= (sizeof(uint64_t) << 3))
+ lg_sample = (sizeof(uint64_t) << 3) - 1;
+
+ tsd = tsd_fetch();
+
+ prof_reset(tsd, lg_sample);
+
+ ret = 0;
+label_return:
+ return (ret);
+}
+
CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
+CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
/******************************************************************************/
CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *)
CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
-CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current,
- size_t)
-CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t)
-CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t)
-CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t)
-CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t)
-
CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
+CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
+ ssize_t)
CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
@@ -1602,6 +2010,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_mapped,
+ ctl_stats.arenas[mib[2]].astats.metadata_mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_allocated,
+ ctl_stats.arenas[mib[2]].astats.metadata_allocated, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
ctl_stats.arenas[mib[2]].allocated_small, size_t)
@@ -1619,15 +2031,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_huge_allocated,
+ ctl_stats.arenas[mib[2]].astats.allocated_huge, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc,
+ ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_huge_ndalloc,
+ ctl_stats.arenas[mib[2]].astats.ndalloc_huge, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nrequests,
+ ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) /* Intentional. */
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
+ ctl_stats.arenas[mib[2]].bstats[mib[4]].curregs, size_t)
CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
@@ -1666,13 +2086,32 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
return (super_stats_arenas_i_lruns_j_node);
}
+CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc,
+ ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_ndalloc,
+ ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests,
+ ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, /* Intentional. */
+ uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
+ ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
+
+static const ctl_named_node_t *
+stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
+{
+
+ if (j > nhclasses)
+ return (NULL);
+ return (super_stats_arenas_i_hchunks_j_node);
+}
+
static const ctl_named_node_t *
stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
{
const ctl_named_node_t * ret;
malloc_mutex_lock(&ctl_mtx);
- if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) {
+ if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
ret = NULL;
goto label_return;
}
diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c
index 8c09b486e..13f94411c 100644
--- a/deps/jemalloc/src/extent.c
+++ b/deps/jemalloc/src/extent.c
@@ -3,17 +3,32 @@
/******************************************************************************/
-static inline int
+JEMALLOC_INLINE_C size_t
+extent_quantize(size_t size)
+{
+
+ /*
+ * Round down to the nearest chunk size that can actually be requested
+ * during normal huge allocation.
+ */
+ return (index2size(size2index(size + 1) - 1));
+}
+
+JEMALLOC_INLINE_C int
extent_szad_comp(extent_node_t *a, extent_node_t *b)
{
int ret;
- size_t a_size = a->size;
- size_t b_size = b->size;
-
- ret = (a_size > b_size) - (a_size < b_size);
+ size_t a_qsize = extent_quantize(extent_node_size_get(a));
+ size_t b_qsize = extent_quantize(extent_node_size_get(b));
+
+ /*
+ * Compare based on quantized size rather than size, in order to sort
+ * equally useful extents only by address.
+ */
+ ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
if (ret == 0) {
- uintptr_t a_addr = (uintptr_t)a->addr;
- uintptr_t b_addr = (uintptr_t)b->addr;
+ uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
+ uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
ret = (a_addr > b_addr) - (a_addr < b_addr);
}
@@ -22,18 +37,17 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b)
}
/* Generate red-black tree functions. */
-rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link,
extent_szad_comp)
-static inline int
+JEMALLOC_INLINE_C int
extent_ad_comp(extent_node_t *a, extent_node_t *b)
{
- uintptr_t a_addr = (uintptr_t)a->addr;
- uintptr_t b_addr = (uintptr_t)b->addr;
+ uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
+ uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
return ((a_addr > b_addr) - (a_addr < b_addr));
}
/* Generate red-black tree functions. */
-rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
- extent_ad_comp)
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp)
diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c
index d72f21357..1e9a66512 100644
--- a/deps/jemalloc/src/huge.c
+++ b/deps/jemalloc/src/huge.c
@@ -2,44 +2,68 @@
#include "jemalloc/internal/jemalloc_internal.h"
/******************************************************************************/
-/* Data. */
-uint64_t huge_nmalloc;
-uint64_t huge_ndalloc;
-size_t huge_allocated;
+static extent_node_t *
+huge_node_get(const void *ptr)
+{
+ extent_node_t *node;
-malloc_mutex_t huge_mtx;
+ node = chunk_lookup(ptr, true);
+ assert(!extent_node_achunk_get(node));
-/******************************************************************************/
+ return (node);
+}
+
+static bool
+huge_node_set(const void *ptr, extent_node_t *node)
+{
+
+ assert(extent_node_addr_get(node) == ptr);
+ assert(!extent_node_achunk_get(node));
+ return (chunk_register(ptr, node));
+}
-/* Tree of chunks that are stand-alone huge allocations. */
-static extent_tree_t huge;
+static void
+huge_node_unset(const void *ptr, const extent_node_t *node)
+{
+
+ chunk_deregister(ptr, node);
+}
void *
-huge_malloc(size_t size, bool zero, dss_prec_t dss_prec)
+huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache)
{
+ size_t usize;
- return (huge_palloc(size, chunksize, zero, dss_prec));
+ usize = s2u(size);
+ if (usize == 0) {
+ /* size_t overflow. */
+ return (NULL);
+ }
+
+ return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
}
void *
-huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec)
+huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache)
{
void *ret;
- size_t csize;
+ size_t usize;
extent_node_t *node;
bool is_zeroed;
/* Allocate one or more contiguous chunks for this request. */
- csize = CHUNK_CEILING(size);
- if (csize == 0) {
- /* size is large enough to cause size_t wrap-around. */
+ usize = sa2u(size, alignment);
+ if (unlikely(usize == 0))
return (NULL);
- }
+ assert(usize >= chunksize);
/* Allocate an extent node with which to track the chunk. */
- node = base_node_alloc();
+ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
+ CACHELINE, false, tcache, true, arena);
if (node == NULL)
return (NULL);
@@ -48,145 +72,33 @@ huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec)
* it is possible to make correct junk/zero fill decisions below.
*/
is_zeroed = zero;
- ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec);
- if (ret == NULL) {
- base_node_dealloc(node);
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
+ size, alignment, &is_zeroed)) == NULL) {
+ idalloctm(tsd, node, tcache, true);
return (NULL);
}
- /* Insert node into huge. */
- node->addr = ret;
- node->size = csize;
-
- malloc_mutex_lock(&huge_mtx);
- extent_tree_ad_insert(&huge, node);
- if (config_stats) {
- stats_cactive_add(csize);
- huge_nmalloc++;
- huge_allocated += csize;
- }
- malloc_mutex_unlock(&huge_mtx);
-
- if (config_fill && zero == false) {
- if (opt_junk)
- memset(ret, 0xa5, csize);
- else if (opt_zero && is_zeroed == false)
- memset(ret, 0, csize);
- }
-
- return (ret);
-}
-
-bool
-huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
-{
-
- /*
- * Avoid moving the allocation if the size class can be left the same.
- */
- if (oldsize > arena_maxclass
- && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
- && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
- assert(CHUNK_CEILING(oldsize) == oldsize);
- return (false);
- }
-
- /* Reallocation would require a move. */
- return (true);
-}
-
-void *
-huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec)
-{
- void *ret;
- size_t copysize;
-
- /* Try to avoid moving the allocation. */
- if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false)
- return (ptr);
+ extent_node_init(node, arena, ret, size, is_zeroed, true);
- /*
- * size and oldsize are different enough that we need to use a
- * different size class. In that case, fall back to allocating new
- * space and copying.
- */
- if (alignment > chunksize)
- ret = huge_palloc(size + extra, alignment, zero, dss_prec);
- else
- ret = huge_malloc(size + extra, zero, dss_prec);
-
- if (ret == NULL) {
- if (extra == 0)
- return (NULL);
- /* Try again, this time without extra. */
- if (alignment > chunksize)
- ret = huge_palloc(size, alignment, zero, dss_prec);
- else
- ret = huge_malloc(size, zero, dss_prec);
-
- if (ret == NULL)
- return (NULL);
+ if (huge_node_set(ret, node)) {
+ arena_chunk_dalloc_huge(arena, ret, size);
+ idalloctm(tsd, node, tcache, true);
+ return (NULL);
}
- /*
- * Copy at most size bytes (not size+extra), since the caller has no
- * expectation that the extra bytes will be reliably preserved.
- */
- copysize = (size < oldsize) ? size : oldsize;
+ /* Insert node into huge. */
+ malloc_mutex_lock(&arena->huge_mtx);
+ ql_elm_new(node, ql_link);
+ ql_tail_insert(&arena->huge, node, ql_link);
+ malloc_mutex_unlock(&arena->huge_mtx);
-#ifdef JEMALLOC_MREMAP
- /*
- * Use mremap(2) if this is a huge-->huge reallocation, and neither the
- * source nor the destination are in dss.
- */
- if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr)
- == false && chunk_in_dss(ret) == false))) {
- size_t newsize = huge_salloc(ret);
+ if (zero || (config_fill && unlikely(opt_zero))) {
+ if (!is_zeroed)
+ memset(ret, 0, size);
+ } else if (config_fill && unlikely(opt_junk_alloc))
+ memset(ret, 0xa5, size);
- /*
- * Remove ptr from the tree of huge allocations before
- * performing the remap operation, in order to avoid the
- * possibility of another thread acquiring that mapping before
- * this one removes it from the tree.
- */
- huge_dalloc(ptr, false);
- if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
- ret) == MAP_FAILED) {
- /*
- * Assuming no chunk management bugs in the allocator,
- * the only documented way an error can occur here is
- * if the application changed the map type for a
- * portion of the old allocation. This is firmly in
- * undefined behavior territory, so write a diagnostic
- * message, and optionally abort.
- */
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc>: Error in mremap(): %s\n",
- buf);
- if (opt_abort)
- abort();
- memcpy(ret, ptr, copysize);
- chunk_dealloc_mmap(ptr, oldsize);
- } else if (config_fill && zero == false && opt_junk && oldsize
- < newsize) {
- /*
- * mremap(2) clobbers the original mapping, so
- * junk/zero filling is not preserved. There is no
- * need to zero fill here, since any trailing
- * uninititialized memory is demand-zeroed by the
- * kernel, but junk filling must be redone.
- */
- memset(ret + oldsize, 0xa5, newsize - oldsize);
- }
- } else
-#endif
- {
- memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
- }
return (ret);
}
@@ -198,12 +110,12 @@ static void
huge_dalloc_junk(void *ptr, size_t usize)
{
- if (config_fill && config_dss && opt_junk) {
+ if (config_fill && have_dss && unlikely(opt_junk_free)) {
/*
* Only bother junk filling if the chunk isn't about to be
* unmapped.
*/
- if (config_munmap == false || (config_dss && chunk_in_dss(ptr)))
+ if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
memset(ptr, 0x5a, usize);
}
}
@@ -213,135 +125,311 @@ huge_dalloc_junk(void *ptr, size_t usize)
huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
#endif
-void
-huge_dalloc(void *ptr, bool unmap)
+static void
+huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
+ size_t usize_max, bool zero)
{
- extent_node_t *node, key;
+ size_t usize, usize_next;
+ extent_node_t *node;
+ arena_t *arena;
+ chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+ bool pre_zeroed, post_zeroed;
+
+ /* Increase usize to incorporate extra. */
+ for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
+ <= oldsize; usize = usize_next)
+ ; /* Do nothing. */
+
+ if (oldsize == usize)
+ return;
+
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ pre_zeroed = extent_node_zeroed_get(node);
+
+ /* Fill if necessary (shrinking). */
+ if (oldsize > usize) {
+ size_t sdiff = oldsize - usize;
+ if (config_fill && unlikely(opt_junk_free)) {
+ memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
+ post_zeroed = false;
+ } else {
+ post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
+ ptr, CHUNK_CEILING(oldsize), usize, sdiff);
+ }
+ } else
+ post_zeroed = pre_zeroed;
+
+ malloc_mutex_lock(&arena->huge_mtx);
+ /* Update the size of the huge allocation. */
+ assert(extent_node_size_get(node) != usize);
+ extent_node_size_set(node, usize);
+ /* Update zeroed. */
+ extent_node_zeroed_set(node, post_zeroed);
+ malloc_mutex_unlock(&arena->huge_mtx);
+
+ arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
+
+ /* Fill if necessary (growing). */
+ if (oldsize < usize) {
+ if (zero || (config_fill && unlikely(opt_zero))) {
+ if (!pre_zeroed) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0,
+ usize - oldsize);
+ }
+ } else if (config_fill && unlikely(opt_junk_alloc)) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
+ oldsize);
+ }
+ }
+}
- malloc_mutex_lock(&huge_mtx);
+static bool
+huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
+{
+ extent_node_t *node;
+ arena_t *arena;
+ chunk_hooks_t chunk_hooks;
+ size_t cdiff;
+ bool pre_zeroed, post_zeroed;
+
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ pre_zeroed = extent_node_zeroed_get(node);
+ chunk_hooks = chunk_hooks_get(arena);
+
+ assert(oldsize > usize);
+
+ /* Split excess chunks. */
+ cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
+ if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize),
+ CHUNK_CEILING(usize), cdiff, true, arena->ind))
+ return (true);
- /* Extract from tree of huge allocations. */
- key.addr = ptr;
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
- assert(node->addr == ptr);
- extent_tree_ad_remove(&huge, node);
+ if (oldsize > usize) {
+ size_t sdiff = oldsize - usize;
+ if (config_fill && unlikely(opt_junk_free)) {
+ huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
+ sdiff);
+ post_zeroed = false;
+ } else {
+ post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
+ CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
+ CHUNK_CEILING(oldsize),
+ CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
+ }
+ } else
+ post_zeroed = pre_zeroed;
- if (config_stats) {
- stats_cactive_sub(node->size);
- huge_ndalloc++;
- huge_allocated -= node->size;
- }
+ malloc_mutex_lock(&arena->huge_mtx);
+ /* Update the size of the huge allocation. */
+ extent_node_size_set(node, usize);
+ /* Update zeroed. */
+ extent_node_zeroed_set(node, post_zeroed);
+ malloc_mutex_unlock(&arena->huge_mtx);
+
+ /* Zap the excess chunks. */
+ arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
+
+ return (false);
+}
+
+static bool
+huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
+ extent_node_t *node;
+ arena_t *arena;
+ bool is_zeroed_subchunk, is_zeroed_chunk;
+
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ malloc_mutex_lock(&arena->huge_mtx);
+ is_zeroed_subchunk = extent_node_zeroed_get(node);
+ malloc_mutex_unlock(&arena->huge_mtx);
+
+ /*
+ * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
+ * that it is possible to make correct junk/zero fill decisions below.
+ */
+ is_zeroed_chunk = zero;
- malloc_mutex_unlock(&huge_mtx);
+ if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize,
+ &is_zeroed_chunk))
+ return (true);
- if (unmap)
- huge_dalloc_junk(node->addr, node->size);
+ malloc_mutex_lock(&arena->huge_mtx);
+ /* Update the size of the huge allocation. */
+ extent_node_size_set(node, usize);
+ malloc_mutex_unlock(&arena->huge_mtx);
- chunk_dealloc(node->addr, node->size, unmap);
+ if (zero || (config_fill && unlikely(opt_zero))) {
+ if (!is_zeroed_subchunk) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0,
+ CHUNK_CEILING(oldsize) - oldsize);
+ }
+ if (!is_zeroed_chunk) {
+ memset((void *)((uintptr_t)ptr +
+ CHUNK_CEILING(oldsize)), 0, usize -
+ CHUNK_CEILING(oldsize));
+ }
+ } else if (config_fill && unlikely(opt_junk_alloc)) {
+ memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
+ oldsize);
+ }
- base_node_dealloc(node);
+ return (false);
}
-size_t
-huge_salloc(const void *ptr)
+bool
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+ size_t usize_max, bool zero)
{
- size_t ret;
- extent_node_t *node, key;
- malloc_mutex_lock(&huge_mtx);
+ assert(s2u(oldsize) == oldsize);
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
+ /* Both allocations must be huge to avoid a move. */
+ if (oldsize < chunksize || usize_max < chunksize)
+ return (true);
- ret = node->size;
+ if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
+ /* Attempt to expand the allocation in-place. */
+ if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero))
+ return (false);
+ /* Try again, this time with usize_min. */
+ if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
+ CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
+ oldsize, usize_min, zero))
+ return (false);
+ }
- malloc_mutex_unlock(&huge_mtx);
+ /*
+ * Avoid moving the allocation if the existing chunk size accommodates
+ * the new size.
+ */
+ if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
+ && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
+ huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
+ zero);
+ return (false);
+ }
- return (ret);
+ /* Attempt to shrink the allocation in-place. */
+ if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max))
+ return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max));
+ return (true);
}
-dss_prec_t
-huge_dss_prec_get(arena_t *arena)
+static void *
+huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache)
{
- return (arena_dss_prec_get(choose_arena(arena)));
+ if (alignment <= chunksize)
+ return (huge_malloc(tsd, arena, usize, zero, tcache));
+ return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
}
-prof_ctx_t *
-huge_prof_ctx_get(const void *ptr)
+void *
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache)
{
- prof_ctx_t *ret;
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
+ void *ret;
+ size_t copysize;
- ret = node->prof_ctx;
+ /* Try to avoid moving the allocation. */
+ if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero))
+ return (ptr);
- malloc_mutex_unlock(&huge_mtx);
+ /*
+ * usize and oldsize are different enough that we need to use a
+ * different size class. In that case, fall back to allocating new
+ * space and copying.
+ */
+ ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
+ tcache);
+ if (ret == NULL)
+ return (NULL);
+ copysize = (usize < oldsize) ? usize : oldsize;
+ memcpy(ret, ptr, copysize);
+ isqalloc(tsd, ptr, oldsize, tcache);
return (ret);
}
void
-huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
+ extent_node_t *node;
+ arena_t *arena;
+
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ huge_node_unset(ptr, node);
+ malloc_mutex_lock(&arena->huge_mtx);
+ ql_remove(&arena->huge, node, ql_link);
+ malloc_mutex_unlock(&arena->huge_mtx);
+
+ huge_dalloc_junk(extent_node_addr_get(node),
+ extent_node_size_get(node));
+ arena_chunk_dalloc_huge(extent_node_arena_get(node),
+ extent_node_addr_get(node), extent_node_size_get(node));
+ idalloctm(tsd, node, tcache, true);
+}
- node->prof_ctx = ctx;
+arena_t *
+huge_aalloc(const void *ptr)
+{
- malloc_mutex_unlock(&huge_mtx);
+ return (extent_node_arena_get(huge_node_get(ptr)));
}
-bool
-huge_boot(void)
+size_t
+huge_salloc(const void *ptr)
{
+ size_t size;
+ extent_node_t *node;
+ arena_t *arena;
- /* Initialize chunks data. */
- if (malloc_mutex_init(&huge_mtx))
- return (true);
- extent_tree_ad_new(&huge);
-
- if (config_stats) {
- huge_nmalloc = 0;
- huge_ndalloc = 0;
- huge_allocated = 0;
- }
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ malloc_mutex_lock(&arena->huge_mtx);
+ size = extent_node_size_get(node);
+ malloc_mutex_unlock(&arena->huge_mtx);
- return (false);
+ return (size);
}
-void
-huge_prefork(void)
+prof_tctx_t *
+huge_prof_tctx_get(const void *ptr)
{
+ prof_tctx_t *tctx;
+ extent_node_t *node;
+ arena_t *arena;
+
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ malloc_mutex_lock(&arena->huge_mtx);
+ tctx = extent_node_prof_tctx_get(node);
+ malloc_mutex_unlock(&arena->huge_mtx);
- malloc_mutex_prefork(&huge_mtx);
+ return (tctx);
}
void
-huge_postfork_parent(void)
+huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
{
+ extent_node_t *node;
+ arena_t *arena;
- malloc_mutex_postfork_parent(&huge_mtx);
+ node = huge_node_get(ptr);
+ arena = extent_node_arena_get(node);
+ malloc_mutex_lock(&arena->huge_mtx);
+ extent_node_prof_tctx_set(node, tctx);
+ malloc_mutex_unlock(&arena->huge_mtx);
}
void
-huge_postfork_child(void)
+huge_prof_tctx_reset(const void *ptr)
{
- malloc_mutex_postfork_child(&huge_mtx);
+ huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
}
diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c
index 204778bc8..fe77c2475 100644
--- a/deps/jemalloc/src/jemalloc.c
+++ b/deps/jemalloc/src/jemalloc.c
@@ -4,12 +4,8 @@
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, arenas, arena_t *, NULL)
-malloc_tsd_data(, thread_allocated, thread_allocated_t,
- THREAD_ALLOCATED_INITIALIZER)
-
/* Runtime configuration options. */
-const char *je_malloc_conf;
+const char *je_malloc_conf JEMALLOC_ATTR(weak);
bool opt_abort =
#ifdef JEMALLOC_DEBUG
true
@@ -17,30 +13,152 @@ bool opt_abort =
false
#endif
;
-bool opt_junk =
+const char *opt_junk =
+#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
+ "true"
+#else
+ "false"
+#endif
+ ;
+bool opt_junk_alloc =
+#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
+ true
+#else
+ false
+#endif
+ ;
+bool opt_junk_free =
#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
true
#else
false
#endif
;
+
size_t opt_quarantine = ZU(0);
bool opt_redzone = false;
bool opt_utrace = false;
-bool opt_valgrind = false;
bool opt_xmalloc = false;
bool opt_zero = false;
size_t opt_narenas = 0;
-unsigned ncpus;
+/* Initialized to true if the process is running inside Valgrind. */
+bool in_valgrind;
-malloc_mutex_t arenas_lock;
-arena_t **arenas;
-unsigned narenas_total;
-unsigned narenas_auto;
+unsigned ncpus;
-/* Set to true once the allocator has been initialized. */
-static bool malloc_initialized = false;
+/* Protects arenas initialization (arenas, narenas_total). */
+static malloc_mutex_t arenas_lock;
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ *
+ * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
+ * arenas. arenas[narenas_auto..narenas_total) are only used if the application
+ * takes some action to create them and allocate from them.
+ */
+static arena_t **arenas;
+static unsigned narenas_total;
+static arena_t *a0; /* arenas[0]; read-only after initialization. */
+static unsigned narenas_auto; /* Read-only after initialization. */
+
+typedef enum {
+ malloc_init_uninitialized = 3,
+ malloc_init_a0_initialized = 2,
+ malloc_init_recursible = 1,
+ malloc_init_initialized = 0 /* Common case --> jnz. */
+} malloc_init_t;
+static malloc_init_t malloc_init_state = malloc_init_uninitialized;
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t index2size_tab[NSIZES] = {
+#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+ ((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
+ SIZE_CLASSES
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t size2index_tab[] = {
+#if LG_TINY_MIN == 0
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_0(i) i,
+#elif LG_TINY_MIN == 1
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_1(i) i,
+#elif LG_TINY_MIN == 2
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_2(i) i,
+#elif LG_TINY_MIN == 3
+#define S2B_3(i) i,
+#elif LG_TINY_MIN == 4
+#define S2B_4(i) i,
+#elif LG_TINY_MIN == 5
+#define S2B_5(i) i,
+#elif LG_TINY_MIN == 6
+#define S2B_6(i) i,
+#elif LG_TINY_MIN == 7
+#define S2B_7(i) i,
+#elif LG_TINY_MIN == 8
+#define S2B_8(i) i,
+#elif LG_TINY_MIN == 9
+#define S2B_9(i) i,
+#elif LG_TINY_MIN == 10
+#define S2B_10(i) i,
+#elif LG_TINY_MIN == 11
+#define S2B_11(i) i,
+#else
+#error "Unsupported LG_TINY_MIN"
+#endif
+#if LG_TINY_MIN < 1
+#define S2B_1(i) S2B_0(i) S2B_0(i)
+#endif
+#if LG_TINY_MIN < 2
+#define S2B_2(i) S2B_1(i) S2B_1(i)
+#endif
+#if LG_TINY_MIN < 3
+#define S2B_3(i) S2B_2(i) S2B_2(i)
+#endif
+#if LG_TINY_MIN < 4
+#define S2B_4(i) S2B_3(i) S2B_3(i)
+#endif
+#if LG_TINY_MIN < 5
+#define S2B_5(i) S2B_4(i) S2B_4(i)
+#endif
+#if LG_TINY_MIN < 6
+#define S2B_6(i) S2B_5(i) S2B_5(i)
+#endif
+#if LG_TINY_MIN < 7
+#define S2B_7(i) S2B_6(i) S2B_6(i)
+#endif
+#if LG_TINY_MIN < 8
+#define S2B_8(i) S2B_7(i) S2B_7(i)
+#endif
+#if LG_TINY_MIN < 9
+#define S2B_9(i) S2B_8(i) S2B_8(i)
+#endif
+#if LG_TINY_MIN < 10
+#define S2B_10(i) S2B_9(i) S2B_9(i)
+#endif
+#if LG_TINY_MIN < 11
+#define S2B_11(i) S2B_10(i) S2B_10(i)
+#endif
+#define S2B_no(i)
+#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+ S2B_##lg_delta_lookup(index)
+ SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
+#undef S2B_8
+#undef S2B_9
+#undef S2B_10
+#undef S2B_11
+#undef S2B_no
+#undef SC
+};
#ifdef JEMALLOC_THREADED_INIT
/* Used to let the initializing thread recursively allocate. */
@@ -57,14 +175,28 @@ static bool malloc_initializer = NO_INITIALIZER;
/* Used to avoid initialization races. */
#ifdef _WIN32
+#if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+#else
static malloc_mutex_t init_lock;
+static bool init_lock_initialized = false;
JEMALLOC_ATTR(constructor)
static void WINAPI
_init_init_lock(void)
{
- malloc_mutex_init(&init_lock);
+ /* If another constructor in the same binary is using mallctl to
+ * e.g. setup chunk hooks, it may end up running before this one,
+ * and malloc_init_hard will crash trying to lock the uninitialized
+ * lock. So we force an initialization of the lock in
+ * malloc_init_hard as well. We don't try to care about atomicity
+ * of the accessed to the init_lock_initialized boolean, since it
+ * really only matters early in the process creation, before any
+ * separate thread normally starts doing anything. */
+ if (!init_lock_initialized)
+ malloc_mutex_init(&init_lock);
+ init_lock_initialized = true;
}
#ifdef _MSC_VER
@@ -72,7 +204,7 @@ _init_init_lock(void)
JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
#endif
-
+#endif
#else
static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#endif
@@ -85,7 +217,7 @@ typedef struct {
#ifdef JEMALLOC_UTRACE
# define UTRACE(a, b, c) do { \
- if (opt_utrace) { \
+ if (unlikely(opt_utrace)) { \
int utrace_serrno = errno; \
malloc_utrace_t ut; \
ut.p = (a); \
@@ -105,6 +237,7 @@ typedef struct {
* definition.
*/
+static bool malloc_init_hard_a0(void);
static bool malloc_init_hard(void);
/******************************************************************************/
@@ -112,35 +245,333 @@ static bool malloc_init_hard(void);
* Begin miscellaneous support functions.
*/
+JEMALLOC_ALWAYS_INLINE_C bool
+malloc_initialized(void)
+{
+
+ return (malloc_init_state == malloc_init_initialized);
+}
+
+JEMALLOC_ALWAYS_INLINE_C void
+malloc_thread_init(void)
+{
+
+ /*
+ * TSD initialization can't be safely done as a side effect of
+ * deallocation, because it is possible for a thread to do nothing but
+ * deallocate its TLS data via free(), in which case writing to TLS
+ * would cause write-after-free memory corruption. The quarantine
+ * facility *only* gets used as a side effect of deallocation, so make
+ * a best effort attempt at initializing its TSD by hooking all
+ * allocation events.
+ */
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine_alloc_hook();
+}
+
+JEMALLOC_ALWAYS_INLINE_C bool
+malloc_init_a0(void)
+{
+
+ if (unlikely(malloc_init_state == malloc_init_uninitialized))
+ return (malloc_init_hard_a0());
+ return (false);
+}
+
+JEMALLOC_ALWAYS_INLINE_C bool
+malloc_init(void)
+{
+
+ if (unlikely(!malloc_initialized()) && malloc_init_hard())
+ return (true);
+ malloc_thread_init();
+
+ return (false);
+}
+
+/*
+ * The a0*() functions are used instead of i[mcd]alloc() in situations that
+ * cannot tolerate TLS variable access.
+ */
+
+arena_t *
+a0get(void)
+{
+
+ assert(a0 != NULL);
+ return (a0);
+}
+
+static void *
+a0ialloc(size_t size, bool zero, bool is_metadata)
+{
+
+ if (unlikely(malloc_init_a0()))
+ return (NULL);
+
+ return (iallocztm(NULL, size, zero, false, is_metadata, a0get()));
+}
+
+static void
+a0idalloc(void *ptr, bool is_metadata)
+{
+
+ idalloctm(NULL, ptr, false, is_metadata);
+}
+
+void *
+a0malloc(size_t size)
+{
+
+ return (a0ialloc(size, false, true));
+}
+
+void
+a0dalloc(void *ptr)
+{
+
+ a0idalloc(ptr, true);
+}
+
+/*
+ * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
+ * situations that cannot tolerate TLS variable access (TLS allocation and very
+ * early internal data structure initialization).
+ */
+
+void *
+bootstrap_malloc(size_t size)
+{
+
+ if (unlikely(size == 0))
+ size = 1;
+
+ return (a0ialloc(size, false, false));
+}
+
+void *
+bootstrap_calloc(size_t num, size_t size)
+{
+ size_t num_size;
+
+ num_size = num * size;
+ if (unlikely(num_size == 0)) {
+ assert(num == 0 || size == 0);
+ num_size = 1;
+ }
+
+ return (a0ialloc(num_size, true, false));
+}
+
+void
+bootstrap_free(void *ptr)
+{
+
+ if (unlikely(ptr == NULL))
+ return;
+
+ a0idalloc(ptr, false);
+}
+
/* Create a new arena and insert it into the arenas array at index ind. */
+static arena_t *
+arena_init_locked(unsigned ind)
+{
+ arena_t *arena;
+
+ /* Expand arenas if necessary. */
+ assert(ind <= narenas_total);
+ if (ind > MALLOCX_ARENA_MAX)
+ return (NULL);
+ if (ind == narenas_total) {
+ unsigned narenas_new = narenas_total + 1;
+ arena_t **arenas_new =
+ (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new *
+ sizeof(arena_t *)));
+ if (arenas_new == NULL)
+ return (NULL);
+ memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *));
+ arenas_new[ind] = NULL;
+ /*
+ * Deallocate only if arenas came from a0malloc() (not
+ * base_alloc()).
+ */
+ if (narenas_total != narenas_auto)
+ a0dalloc(arenas);
+ arenas = arenas_new;
+ narenas_total = narenas_new;
+ }
+
+ /*
+ * Another thread may have already initialized arenas[ind] if it's an
+ * auto arena.
+ */
+ arena = arenas[ind];
+ if (arena != NULL) {
+ assert(ind < narenas_auto);
+ return (arena);
+ }
+
+ /* Actually initialize the arena. */
+ arena = arenas[ind] = arena_new(ind);
+ return (arena);
+}
+
arena_t *
-arenas_extend(unsigned ind)
+arena_init(unsigned ind)
{
- arena_t *ret;
+ arena_t *arena;
- ret = (arena_t *)base_alloc(sizeof(arena_t));
- if (ret != NULL && arena_new(ret, ind) == false) {
- arenas[ind] = ret;
- return (ret);
+ malloc_mutex_lock(&arenas_lock);
+ arena = arena_init_locked(ind);
+ malloc_mutex_unlock(&arenas_lock);
+ return (arena);
+}
+
+unsigned
+narenas_total_get(void)
+{
+ unsigned narenas;
+
+ malloc_mutex_lock(&arenas_lock);
+ narenas = narenas_total;
+ malloc_mutex_unlock(&arenas_lock);
+
+ return (narenas);
+}
+
+static void
+arena_bind_locked(tsd_t *tsd, unsigned ind)
+{
+ arena_t *arena;
+
+ arena = arenas[ind];
+ arena->nthreads++;
+
+ if (tsd_nominal(tsd))
+ tsd_arena_set(tsd, arena);
+}
+
+static void
+arena_bind(tsd_t *tsd, unsigned ind)
+{
+
+ malloc_mutex_lock(&arenas_lock);
+ arena_bind_locked(tsd, ind);
+ malloc_mutex_unlock(&arenas_lock);
+}
+
+void
+arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
+{
+ arena_t *oldarena, *newarena;
+
+ malloc_mutex_lock(&arenas_lock);
+ oldarena = arenas[oldind];
+ newarena = arenas[newind];
+ oldarena->nthreads--;
+ newarena->nthreads++;
+ malloc_mutex_unlock(&arenas_lock);
+ tsd_arena_set(tsd, newarena);
+}
+
+unsigned
+arena_nbound(unsigned ind)
+{
+ unsigned nthreads;
+
+ malloc_mutex_lock(&arenas_lock);
+ nthreads = arenas[ind]->nthreads;
+ malloc_mutex_unlock(&arenas_lock);
+ return (nthreads);
+}
+
+static void
+arena_unbind(tsd_t *tsd, unsigned ind)
+{
+ arena_t *arena;
+
+ malloc_mutex_lock(&arenas_lock);
+ arena = arenas[ind];
+ arena->nthreads--;
+ malloc_mutex_unlock(&arenas_lock);
+ tsd_arena_set(tsd, NULL);
+}
+
+arena_t *
+arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing)
+{
+ arena_t *arena;
+ arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+ unsigned narenas_cache = tsd_narenas_cache_get(tsd);
+ unsigned narenas_actual = narenas_total_get();
+
+ /* Deallocate old cache if it's too small. */
+ if (arenas_cache != NULL && narenas_cache < narenas_actual) {
+ a0dalloc(arenas_cache);
+ arenas_cache = NULL;
+ narenas_cache = 0;
+ tsd_arenas_cache_set(tsd, arenas_cache);
+ tsd_narenas_cache_set(tsd, narenas_cache);
+ }
+
+ /* Allocate cache if it's missing. */
+ if (arenas_cache == NULL) {
+ bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd);
+ assert(ind < narenas_actual || !init_if_missing);
+ narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1;
+
+ if (tsd_nominal(tsd) && !*arenas_cache_bypassp) {
+ *arenas_cache_bypassp = true;
+ arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) *
+ narenas_cache);
+ *arenas_cache_bypassp = false;
+ }
+ if (arenas_cache == NULL) {
+ /*
+ * This function must always tell the truth, even if
+ * it's slow, so don't let OOM, thread cleanup (note
+ * tsd_nominal check), nor recursive allocation
+ * avoidance (note arenas_cache_bypass check) get in the
+ * way.
+ */
+ if (ind >= narenas_actual)
+ return (NULL);
+ malloc_mutex_lock(&arenas_lock);
+ arena = arenas[ind];
+ malloc_mutex_unlock(&arenas_lock);
+ return (arena);
+ }
+ assert(tsd_nominal(tsd) && !*arenas_cache_bypassp);
+ tsd_arenas_cache_set(tsd, arenas_cache);
+ tsd_narenas_cache_set(tsd, narenas_cache);
}
- /* Only reached if there is an OOM error. */
/*
- * OOM here is quite inconvenient to propagate, since dealing with it
- * would require a check for failure in the fast path. Instead, punt
- * by using arenas[0]. In practice, this is an extremely unlikely
- * failure.
+ * Copy to cache. It's possible that the actual number of arenas has
+ * increased since narenas_total_get() was called above, but that causes
+ * no correctness issues unless two threads concurrently execute the
+ * arenas.extend mallctl, which we trust mallctl synchronization to
+ * prevent.
*/
- malloc_write("<jemalloc>: Error initializing arena\n");
- if (opt_abort)
- abort();
+ malloc_mutex_lock(&arenas_lock);
+ memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual);
+ malloc_mutex_unlock(&arenas_lock);
+ if (narenas_cache > narenas_actual) {
+ memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) *
+ (narenas_cache - narenas_actual));
+ }
- return (arenas[0]);
+ /* Read the refreshed cache, and init the arena if necessary. */
+ arena = arenas_cache[ind];
+ if (init_if_missing && arena == NULL)
+ arena = arenas_cache[ind] = arena_init(ind);
+ return (arena);
}
-/* Slow path, called only by choose_arena(). */
+/* Slow path, called only by arena_choose(). */
arena_t *
-choose_arena_hard(void)
+arena_choose_hard(tsd_t *tsd)
{
arena_t *ret;
@@ -150,7 +581,7 @@ choose_arena_hard(void)
choose = 0;
first_null = narenas_auto;
malloc_mutex_lock(&arenas_lock);
- assert(arenas[0] != NULL);
+ assert(a0get() != NULL);
for (i = 1; i < narenas_auto; i++) {
if (arenas[i] != NULL) {
/*
@@ -183,22 +614,73 @@ choose_arena_hard(void)
ret = arenas[choose];
} else {
/* Initialize a new arena. */
- ret = arenas_extend(first_null);
+ choose = first_null;
+ ret = arena_init_locked(choose);
+ if (ret == NULL) {
+ malloc_mutex_unlock(&arenas_lock);
+ return (NULL);
+ }
}
- ret->nthreads++;
+ arena_bind_locked(tsd, choose);
malloc_mutex_unlock(&arenas_lock);
} else {
- ret = arenas[0];
- malloc_mutex_lock(&arenas_lock);
- ret->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
+ ret = a0get();
+ arena_bind(tsd, 0);
}
- arenas_tsd_set(&ret);
-
return (ret);
}
+void
+thread_allocated_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
+void
+thread_deallocated_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
+void
+arena_cleanup(tsd_t *tsd)
+{
+ arena_t *arena;
+
+ arena = tsd_arena_get(tsd);
+ if (arena != NULL)
+ arena_unbind(tsd, arena->ind);
+}
+
+void
+arenas_cache_cleanup(tsd_t *tsd)
+{
+ arena_t **arenas_cache;
+
+ arenas_cache = tsd_arenas_cache_get(tsd);
+ if (arenas_cache != NULL) {
+ tsd_arenas_cache_set(tsd, NULL);
+ a0dalloc(arenas_cache);
+ }
+}
+
+void
+narenas_cache_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
+void
+arenas_cache_bypass_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
static void
stats_print_atexit(void)
{
@@ -243,6 +725,19 @@ stats_print_atexit(void)
* Begin initialization functions.
*/
+#ifndef JEMALLOC_HAVE_SECURE_GETENV
+static char *
+secure_getenv(const char *name)
+{
+
+# ifdef JEMALLOC_HAVE_ISSETUGID
+ if (issetugid() != 0)
+ return (NULL);
+# endif
+ return (getenv(name));
+}
+#endif
+
static unsigned
malloc_ncpus(void)
{
@@ -258,44 +753,6 @@ malloc_ncpus(void)
return ((result == -1) ? 1 : (unsigned)result);
}
-void
-arenas_cleanup(void *arg)
-{
- arena_t *arena = *(arena_t **)arg;
-
- malloc_mutex_lock(&arenas_lock);
- arena->nthreads--;
- malloc_mutex_unlock(&arenas_lock);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void
-malloc_thread_init(void)
-{
-
- /*
- * TSD initialization can't be safely done as a side effect of
- * deallocation, because it is possible for a thread to do nothing but
- * deallocate its TLS data via free(), in which case writing to TLS
- * would cause write-after-free memory corruption. The quarantine
- * facility *only* gets used as a side effect of deallocation, so make
- * a best effort attempt at initializing its TSD by hooking all
- * allocation events.
- */
- if (config_fill && opt_quarantine)
- quarantine_alloc_hook();
-}
-
-JEMALLOC_ALWAYS_INLINE_C bool
-malloc_init(void)
-{
-
- if (malloc_initialized == false && malloc_init_hard())
- return (true);
- malloc_thread_init();
-
- return (false);
-}
-
static bool
malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
char const **v_p, size_t *vlen_p)
@@ -305,7 +762,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
*k_p = opts;
- for (accept = false; accept == false;) {
+ for (accept = false; !accept;) {
switch (*opts) {
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
@@ -340,7 +797,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
}
}
- for (accept = false; accept == false;) {
+ for (accept = false; !accept;) {
switch (*opts) {
case ',':
opts++;
@@ -394,14 +851,16 @@ malloc_conf_init(void)
* valgrind option remains in jemalloc 3.x for compatibility reasons.
*/
if (config_valgrind) {
- opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false;
- if (config_fill && opt_valgrind) {
- opt_junk = false;
- assert(opt_zero == false);
+ in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false;
+ if (config_fill && unlikely(in_valgrind)) {
+ opt_junk = "false";
+ opt_junk_alloc = false;
+ opt_junk_free = false;
+ assert(!opt_zero);
opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT;
opt_redzone = true;
}
- if (config_tcache && opt_valgrind)
+ if (config_tcache && unlikely(in_valgrind))
opt_tcache = false;
}
@@ -441,7 +900,7 @@ malloc_conf_init(void)
if (linklen == -1) {
/* No configuration specified. */
linklen = 0;
- /* restore errno */
+ /* Restore errno. */
set_errno(saved_errno);
}
#endif
@@ -457,7 +916,7 @@ malloc_conf_init(void)
#endif
;
- if ((opts = getenv(envname)) != NULL) {
+ if ((opts = secure_getenv(envname)) != NULL) {
/*
* Do nothing; opts is already initialized to
* the value of the MALLOC_CONF environment
@@ -475,27 +934,28 @@ malloc_conf_init(void)
opts = buf;
}
- while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
- &vlen) == false) {
-#define CONF_HANDLE_BOOL(o, n) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
- if (strncmp("true", v, vlen) == 0 && \
- vlen == sizeof("true")-1) \
+ while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
+ &vlen)) {
+#define CONF_MATCH(n) \
+ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) \
+ (sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n, cont) \
+ if (CONF_MATCH(n)) { \
+ if (CONF_MATCH_VALUE("true")) \
o = true; \
- else if (strncmp("false", v, vlen) == \
- 0 && vlen == sizeof("false")-1) \
+ else if (CONF_MATCH_VALUE("false")) \
o = false; \
else { \
malloc_conf_error( \
"Invalid conf value", \
k, klen, v, vlen); \
} \
- continue; \
+ if (cont) \
+ continue; \
}
#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+ if (CONF_MATCH(n)) { \
uintmax_t um; \
char *end; \
\
@@ -507,15 +967,15 @@ malloc_conf_init(void)
"Invalid conf value", \
k, klen, v, vlen); \
} else if (clip) { \
- if (min != 0 && um < min) \
- o = min; \
- else if (um > max) \
- o = max; \
+ if ((min) != 0 && um < (min)) \
+ o = (min); \
+ else if (um > (max)) \
+ o = (max); \
else \
o = um; \
} else { \
- if ((min != 0 && um < min) || \
- um > max) { \
+ if (((min) != 0 && um < (min)) \
+ || um > (max)) { \
malloc_conf_error( \
"Out-of-range " \
"conf value", \
@@ -526,8 +986,7 @@ malloc_conf_init(void)
continue; \
}
#define CONF_HANDLE_SSIZE_T(o, n, min, max) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+ if (CONF_MATCH(n)) { \
long l; \
char *end; \
\
@@ -538,8 +997,8 @@ malloc_conf_init(void)
malloc_conf_error( \
"Invalid conf value", \
k, klen, v, vlen); \
- } else if (l < (ssize_t)min || l > \
- (ssize_t)max) { \
+ } else if (l < (ssize_t)(min) || l > \
+ (ssize_t)(max)) { \
malloc_conf_error( \
"Out-of-range conf value", \
k, klen, v, vlen); \
@@ -548,8 +1007,7 @@ malloc_conf_init(void)
continue; \
}
#define CONF_HANDLE_CHAR_P(o, n, d) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+ if (CONF_MATCH(n)) { \
size_t cpylen = (vlen <= \
sizeof(o)-1) ? vlen : \
sizeof(o)-1; \
@@ -558,17 +1016,18 @@ malloc_conf_init(void)
continue; \
}
- CONF_HANDLE_BOOL(opt_abort, "abort")
+ CONF_HANDLE_BOOL(opt_abort, "abort", true)
/*
- * Chunks always require at least one header page, plus
- * one data page in the absence of redzones, or three
- * pages in the presence of redzones. In order to
- * simplify options processing, fix the limit based on
- * config_fill.
+ * Chunks always require at least one header page,
+ * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and
+ * possibly an additional page in the presence of
+ * redzones. In order to simplify options processing,
+ * use a conservative bound that accommodates all these
+ * constraints.
*/
CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
- (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1,
- true)
+ LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
+ (sizeof(size_t) << 3) - 1, true)
if (strncmp("dss", k, klen) == 0) {
int i;
bool match = false;
@@ -587,7 +1046,7 @@ malloc_conf_init(void)
}
}
}
- if (match == false) {
+ if (!match) {
malloc_conf_error("Invalid conf value",
k, klen, v, vlen);
}
@@ -597,47 +1056,87 @@ malloc_conf_init(void)
SIZE_T_MAX, false)
CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
-1, (sizeof(size_t) << 3) - 1)
- CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+ CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
if (config_fill) {
- CONF_HANDLE_BOOL(opt_junk, "junk")
+ if (CONF_MATCH("junk")) {
+ if (CONF_MATCH_VALUE("true")) {
+ opt_junk = "true";
+ opt_junk_alloc = opt_junk_free =
+ true;
+ } else if (CONF_MATCH_VALUE("false")) {
+ opt_junk = "false";
+ opt_junk_alloc = opt_junk_free =
+ false;
+ } else if (CONF_MATCH_VALUE("alloc")) {
+ opt_junk = "alloc";
+ opt_junk_alloc = true;
+ opt_junk_free = false;
+ } else if (CONF_MATCH_VALUE("free")) {
+ opt_junk = "free";
+ opt_junk_alloc = false;
+ opt_junk_free = true;
+ } else {
+ malloc_conf_error(
+ "Invalid conf value", k,
+ klen, v, vlen);
+ }
+ continue;
+ }
CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
0, SIZE_T_MAX, false)
- CONF_HANDLE_BOOL(opt_redzone, "redzone")
- CONF_HANDLE_BOOL(opt_zero, "zero")
+ CONF_HANDLE_BOOL(opt_redzone, "redzone", true)
+ CONF_HANDLE_BOOL(opt_zero, "zero", true)
}
if (config_utrace) {
- CONF_HANDLE_BOOL(opt_utrace, "utrace")
- }
- if (config_valgrind) {
- CONF_HANDLE_BOOL(opt_valgrind, "valgrind")
+ CONF_HANDLE_BOOL(opt_utrace, "utrace", true)
}
if (config_xmalloc) {
- CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
+ CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true)
}
if (config_tcache) {
- CONF_HANDLE_BOOL(opt_tcache, "tcache")
+ CONF_HANDLE_BOOL(opt_tcache, "tcache",
+ !config_valgrind || !in_valgrind)
+ if (CONF_MATCH("tcache")) {
+ assert(config_valgrind && in_valgrind);
+ if (opt_tcache) {
+ opt_tcache = false;
+ malloc_conf_error(
+ "tcache cannot be enabled "
+ "while running inside Valgrind",
+ k, klen, v, vlen);
+ }
+ continue;
+ }
CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
"lg_tcache_max", -1,
(sizeof(size_t) << 3) - 1)
}
if (config_prof) {
- CONF_HANDLE_BOOL(opt_prof, "prof")
+ CONF_HANDLE_BOOL(opt_prof, "prof", true)
CONF_HANDLE_CHAR_P(opt_prof_prefix,
"prof_prefix", "jeprof")
- CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
- CONF_HANDLE_SSIZE_T(opt_lg_prof_sample,
+ CONF_HANDLE_BOOL(opt_prof_active, "prof_active",
+ true)
+ CONF_HANDLE_BOOL(opt_prof_thread_active_init,
+ "prof_thread_active_init", true)
+ CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
"lg_prof_sample", 0,
- (sizeof(uint64_t) << 3) - 1)
- CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+ (sizeof(uint64_t) << 3) - 1, true)
+ CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
+ true)
CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
"lg_prof_interval", -1,
(sizeof(uint64_t) << 3) - 1)
- CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
- CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
- CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+ CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump",
+ true)
+ CONF_HANDLE_BOOL(opt_prof_final, "prof_final",
+ true)
+ CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak",
+ true)
}
malloc_conf_error("Invalid conf pair", k, klen, v,
vlen);
+#undef CONF_MATCH
#undef CONF_HANDLE_BOOL
#undef CONF_HANDLE_SIZE_T
#undef CONF_HANDLE_SSIZE_T
@@ -646,41 +1145,44 @@ malloc_conf_init(void)
}
}
+/* init_lock must be held. */
static bool
-malloc_init_hard(void)
+malloc_init_hard_needed(void)
{
- arena_t *init_arenas[1];
- malloc_mutex_lock(&init_lock);
- if (malloc_initialized || IS_INITIALIZER) {
+ if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
+ malloc_init_recursible)) {
/*
* Another thread initialized the allocator before this one
* acquired init_lock, or this thread is the initializing
* thread, and it is recursively allocating.
*/
- malloc_mutex_unlock(&init_lock);
return (false);
}
#ifdef JEMALLOC_THREADED_INIT
- if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) {
+ if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
/* Busy-wait until the initializing thread completes. */
do {
malloc_mutex_unlock(&init_lock);
CPU_SPINWAIT;
malloc_mutex_lock(&init_lock);
- } while (malloc_initialized == false);
- malloc_mutex_unlock(&init_lock);
+ } while (!malloc_initialized());
return (false);
}
#endif
+ return (true);
+}
+
+/* init_lock must be held. */
+static bool
+malloc_init_hard_a0_locked(void)
+{
+
malloc_initializer = INITIALIZER;
- malloc_tsd_boot();
if (config_prof)
prof_boot0();
-
malloc_conf_init();
-
if (opt_stats_print) {
/* Print statistics at exit. */
if (atexit(stats_print_atexit) != 0) {
@@ -689,94 +1191,64 @@ malloc_init_hard(void)
abort();
}
}
-
- if (base_boot()) {
- malloc_mutex_unlock(&init_lock);
+ if (base_boot())
return (true);
- }
-
- if (chunk_boot()) {
- malloc_mutex_unlock(&init_lock);
+ if (chunk_boot())
return (true);
- }
-
- if (ctl_boot()) {
- malloc_mutex_unlock(&init_lock);
+ if (ctl_boot())
return (true);
- }
-
if (config_prof)
prof_boot1();
-
- arena_boot();
-
- if (config_tcache && tcache_boot0()) {
- malloc_mutex_unlock(&init_lock);
+ if (arena_boot())
return (true);
- }
-
- if (huge_boot()) {
- malloc_mutex_unlock(&init_lock);
+ if (config_tcache && tcache_boot())
return (true);
- }
-
- if (malloc_mutex_init(&arenas_lock)) {
- malloc_mutex_unlock(&init_lock);
+ if (malloc_mutex_init(&arenas_lock))
return (true);
- }
-
/*
* Create enough scaffolding to allow recursive allocation in
* malloc_ncpus().
*/
narenas_total = narenas_auto = 1;
- arenas = init_arenas;
+ arenas = &a0;
memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
-
/*
* Initialize one arena here. The rest are lazily created in
- * choose_arena_hard().
+ * arena_choose_hard().
*/
- arenas_extend(0);
- if (arenas[0] == NULL) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
-
- /* Initialize allocation counters before any allocations can occur. */
- if (config_stats && thread_allocated_tsd_boot()) {
- malloc_mutex_unlock(&init_lock);
+ if (arena_init(0) == NULL)
return (true);
- }
-
- if (arenas_tsd_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+ malloc_init_state = malloc_init_a0_initialized;
+ return (false);
+}
- if (config_tcache && tcache_boot1()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+static bool
+malloc_init_hard_a0(void)
+{
+ bool ret;
- if (config_fill && quarantine_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+ malloc_mutex_lock(&init_lock);
+ ret = malloc_init_hard_a0_locked();
+ malloc_mutex_unlock(&init_lock);
+ return (ret);
+}
- if (config_prof && prof_boot2()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+/*
+ * Initialize data structures which may trigger recursive allocation.
+ *
+ * init_lock must be held.
+ */
+static void
+malloc_init_hard_recursible(void)
+{
+ malloc_init_state = malloc_init_recursible;
malloc_mutex_unlock(&init_lock);
- /**********************************************************************/
- /* Recursive allocation may follow. */
ncpus = malloc_ncpus();
#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
- && !defined(_WIN32))
+ && !defined(_WIN32) && !defined(__native_client__))
/* LinuxThreads's pthread_atfork() allocates. */
if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
jemalloc_postfork_child) != 0) {
@@ -785,15 +1257,16 @@ malloc_init_hard(void)
abort();
}
#endif
-
- /* Done recursively allocating. */
- /**********************************************************************/
malloc_mutex_lock(&init_lock);
+}
- if (mutex_boot()) {
- malloc_mutex_unlock(&init_lock);
+/* init_lock must be held. */
+static bool
+malloc_init_hard_finish(void)
+{
+
+ if (mutex_boot())
return (true);
- }
if (opt_narenas == 0) {
/*
@@ -820,21 +1293,56 @@ malloc_init_hard(void)
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total);
- if (arenas == NULL) {
- malloc_mutex_unlock(&init_lock);
+ if (arenas == NULL)
return (true);
- }
/*
* Zero the array. In practice, this should always be pre-zeroed,
* since it was just mmap()ed, but let's be sure.
*/
memset(arenas, 0, sizeof(arena_t *) * narenas_total);
/* Copy the pointer to the one arena that was already initialized. */
- arenas[0] = init_arenas[0];
+ arenas[0] = a0;
- malloc_initialized = true;
- malloc_mutex_unlock(&init_lock);
+ malloc_init_state = malloc_init_initialized;
+ return (false);
+}
+
+static bool
+malloc_init_hard(void)
+{
+#if defined(_WIN32) && _WIN32_WINNT < 0x0600
+ _init_init_lock();
+#endif
+ malloc_mutex_lock(&init_lock);
+ if (!malloc_init_hard_needed()) {
+ malloc_mutex_unlock(&init_lock);
+ return (false);
+ }
+
+ if (malloc_init_state != malloc_init_a0_initialized &&
+ malloc_init_hard_a0_locked()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+ if (malloc_tsd_boot0()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+ if (config_prof && prof_boot2()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+ malloc_init_hard_recursible();
+
+ if (malloc_init_hard_finish()) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+
+ malloc_mutex_unlock(&init_lock);
+ malloc_tsd_boot1();
return (false);
}
@@ -847,98 +1355,87 @@ malloc_init_hard(void)
*/
static void *
-imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
+imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
{
void *p;
- if (cnt == NULL)
+ if (tctx == NULL)
return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = imalloc(SMALL_MAXCLASS+1);
+ if (usize <= SMALL_MAXCLASS) {
+ p = imalloc(tsd, LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = imalloc(usize);
+ p = imalloc(tsd, usize);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(size_t usize, prof_thr_cnt_t *cnt)
+imalloc_prof(tsd_t *tsd, size_t usize)
{
void *p;
+ prof_tctx_t *tctx;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = imalloc_prof_sample(usize, cnt);
+ tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
+ p = imalloc_prof_sample(tsd, usize, tctx);
else
- p = imalloc(usize);
- if (p == NULL)
+ p = imalloc(tsd, usize);
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
- prof_malloc(p, usize, cnt);
+ }
+ prof_malloc(p, usize, tctx);
return (p);
}
-/*
- * MALLOC_BODY() is a macro rather than a function because its contents are in
- * the fast path, but inlining would cause reliability issues when determining
- * how many frames to discard from heap profiling backtraces.
- */
-#define MALLOC_BODY(ret, size, usize) do { \
- if (malloc_init()) \
- ret = NULL; \
- else { \
- if (config_prof && opt_prof) { \
- prof_thr_cnt_t *cnt; \
- \
- usize = s2u(size); \
- /* \
- * Call PROF_ALLOC_PREP() here rather than in \
- * imalloc_prof() so that imalloc_prof() can be \
- * inlined without introducing uncertainty \
- * about the number of backtrace frames to \
- * ignore. imalloc_prof() is in the fast path \
- * when heap profiling is enabled, so inlining \
- * is critical to performance. (For \
- * consistency all callers of PROF_ALLOC_PREP() \
- * are structured similarly, even though e.g. \
- * realloc() isn't called enough for inlining \
- * to be critical.) \
- */ \
- PROF_ALLOC_PREP(1, usize, cnt); \
- ret = imalloc_prof(usize, cnt); \
- } else { \
- if (config_stats || (config_valgrind && \
- opt_valgrind)) \
- usize = s2u(size); \
- ret = imalloc(size); \
- } \
- } \
-} while (0)
+JEMALLOC_ALWAYS_INLINE_C void *
+imalloc_body(size_t size, tsd_t **tsd, size_t *usize)
+{
-void *
+ if (unlikely(malloc_init()))
+ return (NULL);
+ *tsd = tsd_fetch();
+
+ if (config_prof && opt_prof) {
+ *usize = s2u(size);
+ if (unlikely(*usize == 0))
+ return (NULL);
+ return (imalloc_prof(*tsd, *usize));
+ }
+
+ if (config_stats || (config_valgrind && unlikely(in_valgrind)))
+ *usize = s2u(size);
+ return (imalloc(*tsd, size));
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
je_malloc(size_t size)
{
void *ret;
+ tsd_t *tsd;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
if (size == 0)
size = 1;
- MALLOC_BODY(ret, size, usize);
-
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
+ ret = imalloc_body(size, &tsd, &usize);
+ if (unlikely(ret == NULL)) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in malloc(): "
"out of memory\n");
abort();
}
set_errno(ENOMEM);
}
- if (config_stats && ret != NULL) {
+ if (config_stats && likely(ret != NULL)) {
assert(usize == isalloc(ret, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, size, ret);
JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
@@ -946,107 +1443,103 @@ je_malloc(size_t size)
}
static void *
-imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
+imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
+ prof_tctx_t *tctx)
{
void *p;
- if (cnt == NULL)
+ if (tctx == NULL)
return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
- p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment,
- false);
+ if (usize <= SMALL_MAXCLASS) {
+ assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
+ p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = ipalloc(usize, alignment, false);
+ p = ipalloc(tsd, usize, alignment, false);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
+imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
{
void *p;
+ prof_tctx_t *tctx;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = imemalign_prof_sample(alignment, usize, cnt);
+ tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
+ p = imemalign_prof_sample(tsd, alignment, usize, tctx);
else
- p = ipalloc(usize, alignment, false);
- if (p == NULL)
+ p = ipalloc(tsd, usize, alignment, false);
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
- prof_malloc(p, usize, cnt);
+ }
+ prof_malloc(p, usize, tctx);
return (p);
}
JEMALLOC_ATTR(nonnull(1))
-#ifdef JEMALLOC_PROF
-/*
- * Avoid any uncertainty as to how many backtrace frames to ignore in
- * PROF_ALLOC_PREP().
- */
-JEMALLOC_NOINLINE
-#endif
static int
imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
{
int ret;
+ tsd_t *tsd;
size_t usize;
void *result;
assert(min_alignment != 0);
- if (malloc_init()) {
+ if (unlikely(malloc_init())) {
result = NULL;
goto label_oom;
- } else {
- if (size == 0)
- size = 1;
-
- /* Make sure that alignment is a large enough power of 2. */
- if (((alignment - 1) & alignment) != 0
- || (alignment < min_alignment)) {
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error allocating "
- "aligned memory: invalid alignment\n");
- abort();
- }
- result = NULL;
- ret = EINVAL;
- goto label_return;
- }
+ }
+ tsd = tsd_fetch();
+ if (size == 0)
+ size = 1;
- usize = sa2u(size, alignment);
- if (usize == 0) {
- result = NULL;
- goto label_oom;
+ /* Make sure that alignment is a large enough power of 2. */
+ if (unlikely(((alignment - 1) & alignment) != 0
+ || (alignment < min_alignment))) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
+ malloc_write("<jemalloc>: Error allocating "
+ "aligned memory: invalid alignment\n");
+ abort();
}
+ result = NULL;
+ ret = EINVAL;
+ goto label_return;
+ }
- if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
- PROF_ALLOC_PREP(2, usize, cnt);
- result = imemalign_prof(alignment, usize, cnt);
- } else
- result = ipalloc(usize, alignment, false);
- if (result == NULL)
- goto label_oom;
+ usize = sa2u(size, alignment);
+ if (unlikely(usize == 0)) {
+ result = NULL;
+ goto label_oom;
}
+ if (config_prof && opt_prof)
+ result = imemalign_prof(tsd, alignment, usize);
+ else
+ result = ipalloc(tsd, usize, alignment, false);
+ if (unlikely(result == NULL))
+ goto label_oom;
+ assert(((uintptr_t)result & (alignment - 1)) == ZU(0));
+
*memptr = result;
ret = 0;
label_return:
- if (config_stats && result != NULL) {
+ if (config_stats && likely(result != NULL)) {
assert(usize == isalloc(result, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, size, result);
return (ret);
label_oom:
assert(result == NULL);
- if (config_xmalloc && opt_xmalloc) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error allocating aligned memory: "
"out of memory\n");
abort();
@@ -1055,7 +1548,8 @@ label_oom:
goto label_return;
}
-int
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+JEMALLOC_ATTR(nonnull(1))
je_posix_memalign(void **memptr, size_t alignment, size_t size)
{
int ret = imemalign(memptr, alignment, size, sizeof(void *));
@@ -1064,13 +1558,15 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size)
return (ret);
}
-void *
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
je_aligned_alloc(size_t alignment, size_t size)
{
void *ret;
int err;
- if ((err = imemalign(&ret, alignment, size, 1)) != 0) {
+ if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) {
ret = NULL;
set_errno(err);
}
@@ -1080,54 +1576,62 @@ je_aligned_alloc(size_t alignment, size_t size)
}
static void *
-icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
+icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
{
void *p;
- if (cnt == NULL)
+ if (tctx == NULL)
return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = icalloc(SMALL_MAXCLASS+1);
+ if (usize <= SMALL_MAXCLASS) {
+ p = icalloc(tsd, LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = icalloc(usize);
+ p = icalloc(tsd, usize);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(size_t usize, prof_thr_cnt_t *cnt)
+icalloc_prof(tsd_t *tsd, size_t usize)
{
void *p;
+ prof_tctx_t *tctx;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = icalloc_prof_sample(usize, cnt);
+ tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
+ p = icalloc_prof_sample(tsd, usize, tctx);
else
- p = icalloc(usize);
- if (p == NULL)
+ p = icalloc(tsd, usize);
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
- prof_malloc(p, usize, cnt);
+ }
+ prof_malloc(p, usize, tctx);
return (p);
}
-void *
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
je_calloc(size_t num, size_t size)
{
void *ret;
+ tsd_t *tsd;
size_t num_size;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
- if (malloc_init()) {
+ if (unlikely(malloc_init())) {
num_size = 0;
ret = NULL;
goto label_return;
}
+ tsd = tsd_fetch();
num_size = num * size;
- if (num_size == 0) {
+ if (unlikely(num_size == 0)) {
if (num == 0 || size == 0)
num_size = 1;
else {
@@ -1139,37 +1643,38 @@ je_calloc(size_t num, size_t size)
* overflow during multiplication if neither operand uses any of the
* most significant half of the bits in a size_t.
*/
- } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
- && (num_size / size != num)) {
+ } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
+ 2))) && (num_size / size != num))) {
/* size_t overflow. */
ret = NULL;
goto label_return;
}
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
usize = s2u(num_size);
- PROF_ALLOC_PREP(1, usize, cnt);
- ret = icalloc_prof(usize, cnt);
+ if (unlikely(usize == 0)) {
+ ret = NULL;
+ goto label_return;
+ }
+ ret = icalloc_prof(tsd, usize);
} else {
- if (config_stats || (config_valgrind && opt_valgrind))
+ if (config_stats || (config_valgrind && unlikely(in_valgrind)))
usize = s2u(num_size);
- ret = icalloc(num_size);
+ ret = icalloc(tsd, num_size);
}
label_return:
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
+ if (unlikely(ret == NULL)) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in calloc(): out of "
"memory\n");
abort();
}
set_errno(ENOMEM);
}
- if (config_stats && ret != NULL) {
+ if (config_stats && likely(ret != NULL)) {
assert(usize == isalloc(ret, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, num_size, ret);
JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
@@ -1177,135 +1682,162 @@ label_return:
}
static void *
-irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt)
+irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
+ prof_tctx_t *tctx)
{
void *p;
- if (cnt == NULL)
+ if (tctx == NULL)
return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
+ if (usize <= SMALL_MAXCLASS) {
+ p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = iralloc(oldptr, usize, 0, 0, false);
+ p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt)
+irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
{
void *p;
- prof_ctx_t *old_ctx;
-
- old_ctx = prof_ctx_get(oldptr);
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = irealloc_prof_sample(oldptr, usize, cnt);
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
+
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(old_ptr);
+ tctx = prof_alloc_prep(tsd, usize, prof_active, true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
+ p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
else
- p = iralloc(oldptr, usize, 0, 0, false);
- if (p == NULL)
+ p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
- prof_realloc(p, usize, cnt, old_usize, old_ctx);
+ }
+ prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
+ old_tctx);
return (p);
}
JEMALLOC_INLINE_C void
-ifree(void *ptr)
+ifree(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
size_t usize;
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
assert(ptr != NULL);
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
if (config_prof && opt_prof) {
usize = isalloc(ptr, config_prof);
- prof_free(ptr, usize);
+ prof_free(tsd, ptr, usize);
} else if (config_stats || config_valgrind)
usize = isalloc(ptr, config_prof);
if (config_stats)
- thread_allocated_tsd_get()->deallocated += usize;
- if (config_valgrind && opt_valgrind)
+ *tsd_thread_deallocatedp_get(tsd) += usize;
+ if (config_valgrind && unlikely(in_valgrind))
rzsize = p2rz(ptr);
- iqalloc(ptr);
+ iqalloc(tsd, ptr, tcache);
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}
-void *
+JEMALLOC_INLINE_C void
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
+{
+ UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
+
+ assert(ptr != NULL);
+ assert(malloc_initialized() || IS_INITIALIZER);
+
+ if (config_prof && opt_prof)
+ prof_free(tsd, ptr, usize);
+ if (config_stats)
+ *tsd_thread_deallocatedp_get(tsd) += usize;
+ if (config_valgrind && unlikely(in_valgrind))
+ rzsize = p2rz(ptr);
+ isqalloc(tsd, ptr, usize, tcache);
+ JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
je_realloc(void *ptr, size_t size)
{
void *ret;
+ tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL);
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
size_t old_usize = 0;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- if (size == 0) {
+ if (unlikely(size == 0)) {
if (ptr != NULL) {
/* realloc(ptr, 0) is equivalent to free(ptr). */
UTRACE(ptr, 0, 0);
- ifree(ptr);
+ tsd = tsd_fetch();
+ ifree(tsd, ptr, tcache_get(tsd, false));
return (NULL);
}
size = 1;
}
- if (ptr != NULL) {
- assert(malloc_initialized || IS_INITIALIZER);
+ if (likely(ptr != NULL)) {
+ assert(malloc_initialized() || IS_INITIALIZER);
malloc_thread_init();
+ tsd = tsd_fetch();
- if ((config_prof && opt_prof) || config_stats ||
- (config_valgrind && opt_valgrind))
- old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
+ old_usize = isalloc(ptr, config_prof);
+ if (config_valgrind && unlikely(in_valgrind))
old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
usize = s2u(size);
- PROF_ALLOC_PREP(1, usize, cnt);
- ret = irealloc_prof(ptr, old_usize, usize, cnt);
+ ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd,
+ ptr, old_usize, usize);
} else {
- if (config_stats || (config_valgrind && opt_valgrind))
+ if (config_stats || (config_valgrind &&
+ unlikely(in_valgrind)))
usize = s2u(size);
- ret = iralloc(ptr, size, 0, 0, false);
+ ret = iralloc(tsd, ptr, old_usize, size, 0, false);
}
} else {
/* realloc(NULL, size) is equivalent to malloc(size). */
- MALLOC_BODY(ret, size, usize);
+ ret = imalloc_body(size, &tsd, &usize);
}
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
+ if (unlikely(ret == NULL)) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in realloc(): "
"out of memory\n");
abort();
}
set_errno(ENOMEM);
}
- if (config_stats && ret != NULL) {
- thread_allocated_t *ta;
+ if (config_stats && likely(ret != NULL)) {
assert(usize == isalloc(ret, config_prof));
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
UTRACE(ptr, size, ret);
- JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize,
- false);
+ JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize,
+ old_rzsize, true, false);
return (ret);
}
-void
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
je_free(void *ptr)
{
UTRACE(ptr, 0, 0);
- if (ptr != NULL)
- ifree(ptr);
+ if (likely(ptr != NULL)) {
+ tsd_t *tsd = tsd_fetch();
+ ifree(tsd, ptr, tcache_get(tsd, false));
+ }
}
/*
@@ -1317,22 +1849,28 @@ je_free(void *ptr)
*/
#ifdef JEMALLOC_OVERRIDE_MEMALIGN
-void *
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
je_memalign(size_t alignment, size_t size)
{
void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
- imemalign(&ret, alignment, size, 1);
+ if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
+ ret = NULL;
JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
return (ret);
}
#endif
#ifdef JEMALLOC_OVERRIDE_VALLOC
-void *
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
je_valloc(size_t size)
{
void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
- imemalign(&ret, PAGE, size, 1);
+ if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
+ ret = NULL;
JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
return (ret);
}
@@ -1346,7 +1884,7 @@ je_valloc(size_t size)
#define is_malloc_(a) malloc_is_ ## a
#define is_malloc(a) is_malloc_(a)
-#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__))
+#if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK))
/*
* glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
* to inconsistently reference libc's malloc(3)-compatible functions
@@ -1356,11 +1894,13 @@ je_valloc(size_t size)
* passed an extra argument for the caller return address, which will be
* ignored.
*/
-JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free;
-JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc;
-JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc;
-JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) =
+JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
+JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
+JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
+# ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
je_memalign;
+# endif
#endif
/*
@@ -1371,111 +1911,173 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) =
* Begin non-standard functions.
*/
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena)
+JEMALLOC_ALWAYS_INLINE_C bool
+imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
+ size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
{
- assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize,
- alignment)));
+ if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
+ *alignment = 0;
+ *usize = s2u(size);
+ } else {
+ *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+ *usize = sa2u(size, *alignment);
+ }
+ assert(*usize != 0);
+ *zero = MALLOCX_ZERO_GET(flags);
+ if ((flags & MALLOCX_TCACHE_MASK) != 0) {
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+ *tcache = NULL;
+ else
+ *tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ } else
+ *tcache = tcache_get(tsd, true);
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ *arena = arena_get(tsd, arena_ind, true, true);
+ if (unlikely(*arena == NULL))
+ return (true);
+ } else
+ *arena = NULL;
+ return (false);
+}
- if (alignment != 0)
- return (ipalloct(usize, alignment, zero, try_tcache, arena));
- else if (zero)
- return (icalloct(usize, try_tcache, arena));
- else
- return (imalloct(usize, try_tcache, arena));
+JEMALLOC_ALWAYS_INLINE_C bool
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
+ size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
+{
+
+ if (likely(flags == 0)) {
+ *usize = s2u(size);
+ assert(*usize != 0);
+ *alignment = 0;
+ *zero = false;
+ *tcache = tcache_get(tsd, true);
+ *arena = NULL;
+ return (false);
+ } else {
+ return (imallocx_flags_decode_hard(tsd, size, flags, usize,
+ alignment, zero, tcache, arena));
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena)
+{
+
+ if (unlikely(alignment != 0))
+ return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+ if (unlikely(zero))
+ return (icalloct(tsd, usize, tcache, arena));
+ return (imalloct(tsd, usize, tcache, arena));
}
static void *
-imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_thr_cnt_t *cnt)
+imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena)
{
void *p;
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- size_t usize_promoted = (alignment == 0) ?
- s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment);
- assert(usize_promoted != 0);
- p = imallocx(usize_promoted, alignment, zero, try_tcache,
+ if (usize <= SMALL_MAXCLASS) {
+ assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
+ sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
+ p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
arena);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
+ p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_thr_cnt_t *cnt)
+imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
{
void *p;
+ size_t alignment;
+ bool zero;
+ tcache_t *tcache;
+ arena_t *arena;
+ prof_tctx_t *tctx;
- if ((uintptr_t)cnt != (uintptr_t)1U) {
- p = imallocx_prof_sample(usize, alignment, zero, try_tcache,
- arena, cnt);
+ if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+ &zero, &tcache, &arena)))
+ return (NULL);
+ tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
+ if (likely((uintptr_t)tctx == (uintptr_t)1U))
+ p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
+ else if ((uintptr_t)tctx > (uintptr_t)1U) {
+ p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
+ arena);
} else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
- if (p == NULL)
+ p = NULL;
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
- prof_malloc(p, usize, cnt);
+ }
+ prof_malloc(p, *usize, tctx);
+ assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
return (p);
}
-void *
-je_mallocx(size_t size, int flags)
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
{
void *p;
- size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
- bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
+ size_t alignment;
+ bool zero;
+ tcache_t *tcache;
arena_t *arena;
- bool try_tcache;
- assert(size != 0);
+ if (likely(flags == 0)) {
+ if (config_stats || (config_valgrind && unlikely(in_valgrind)))
+ *usize = s2u(size);
+ return (imalloc(tsd, size));
+ }
- if (malloc_init())
- goto label_oom;
+ if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
+ &alignment, &zero, &tcache, &arena)))
+ return (NULL);
+ p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
+ assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+ return (p);
+}
- if (arena_ind != UINT_MAX) {
- arena = arenas[arena_ind];
- try_tcache = false;
- } else {
- arena = NULL;
- try_tcache = true;
- }
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+je_mallocx(size_t size, int flags)
+{
+ tsd_t *tsd;
+ void *p;
+ size_t usize;
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
+ assert(size != 0);
- if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
+ if (unlikely(malloc_init()))
+ goto label_oom;
+ tsd = tsd_fetch();
- PROF_ALLOC_PREP(1, usize, cnt);
- p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
- cnt);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
- if (p == NULL)
+ if (config_prof && opt_prof)
+ p = imallocx_prof(tsd, size, flags, &usize);
+ else
+ p = imallocx_no_prof(tsd, size, flags, &usize);
+ if (unlikely(p == NULL))
goto label_oom;
if (config_stats) {
assert(usize == isalloc(p, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, size, p);
- JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero);
+ JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
return (p);
label_oom:
- if (config_xmalloc && opt_xmalloc) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
abort();
}
@@ -1484,49 +2086,53 @@ label_oom:
}
static void *
-irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
- bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena,
- prof_thr_cnt_t *cnt)
+irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
+ size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+ prof_tctx_t *tctx)
{
void *p;
- if (cnt == NULL)
+ if (tctx == NULL)
return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
- size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ if (usize <= SMALL_MAXCLASS) {
+ p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment,
+ zero, tcache, arena);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
+ tcache, arena);
}
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
- size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena, prof_thr_cnt_t *cnt)
+irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
+ size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+ arena_t *arena)
{
void *p;
- prof_ctx_t *old_ctx;
-
- old_ctx = prof_ctx_get(oldptr);
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
- try_tcache_alloc, try_tcache_dalloc, arena, cnt);
- else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
+
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(old_ptr);
+ tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+ p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
+ alignment, zero, tcache, arena, tctx);
+ } else {
+ p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
+ tcache, arena);
}
- if (p == NULL)
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
return (NULL);
+ }
- if (p == oldptr && alignment != 0) {
+ if (p == old_ptr && alignment != 0) {
/*
* The allocation did not move, so it is possible that the size
* class is smaller than would guarantee the requested
@@ -1537,78 +2143,80 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
*/
*usize = isalloc(p, config_prof);
}
- prof_realloc(p, *usize, cnt, old_usize, old_ctx);
+ prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
+ old_usize, old_tctx);
return (p);
}
-void *
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
je_rallocx(void *ptr, size_t size, int flags)
{
void *p;
- size_t usize, old_usize;
+ tsd_t *tsd;
+ size_t usize;
+ size_t old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- bool try_tcache_alloc, try_tcache_dalloc;
arena_t *arena;
+ tcache_t *tcache;
assert(ptr != NULL);
assert(size != 0);
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
malloc_thread_init();
+ tsd = tsd_fetch();
- if (arena_ind != UINT_MAX) {
- arena_chunk_t *chunk;
- try_tcache_alloc = false;
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache_dalloc = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
- arena = arenas[arena_ind];
- } else {
- try_tcache_alloc = true;
- try_tcache_dalloc = true;
+ if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ arena = arena_get(tsd, arena_ind, true, true);
+ if (unlikely(arena == NULL))
+ goto label_oom;
+ } else
arena = NULL;
- }
- if ((config_prof && opt_prof) || config_stats ||
- (config_valgrind && opt_valgrind))
- old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+ tcache = NULL;
+ else
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ } else
+ tcache = tcache_get(tsd, true);
+
+ old_usize = isalloc(ptr, config_prof);
+ if (config_valgrind && unlikely(in_valgrind))
old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
assert(usize != 0);
- PROF_ALLOC_PREP(1, usize, cnt);
- p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero,
- try_tcache_alloc, try_tcache_dalloc, arena, cnt);
- if (p == NULL)
+ p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
+ zero, tcache, arena);
+ if (unlikely(p == NULL))
goto label_oom;
} else {
- p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc,
- try_tcache_dalloc, arena);
- if (p == NULL)
+ p = iralloct(tsd, ptr, old_usize, size, alignment, zero,
+ tcache, arena);
+ if (unlikely(p == NULL))
goto label_oom;
- if (config_stats || (config_valgrind && opt_valgrind))
+ if (config_stats || (config_valgrind && unlikely(in_valgrind)))
usize = isalloc(p, config_prof);
}
+ assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
if (config_stats) {
- thread_allocated_t *ta;
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
UTRACE(ptr, size, p);
- JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero);
+ JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize,
+ old_rzsize, false, zero);
return (p);
label_oom:
- if (config_xmalloc && opt_xmalloc) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
abort();
}
@@ -1618,11 +2226,11 @@ label_oom:
JEMALLOC_ALWAYS_INLINE_C size_t
ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, bool zero, arena_t *arena)
+ size_t alignment, bool zero)
{
size_t usize;
- if (ixalloc(ptr, size, extra, alignment, zero))
+ if (ixalloc(ptr, old_usize, size, extra, alignment, zero))
return (old_usize);
usize = isalloc(ptr, config_prof);
@@ -1631,215 +2239,229 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
static size_t
ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, size_t max_usize, bool zero, arena_t *arena,
- prof_thr_cnt_t *cnt)
+ size_t alignment, bool zero, prof_tctx_t *tctx)
{
size_t usize;
- if (cnt == NULL)
+ if (tctx == NULL)
return (old_usize);
- /* Use minimum usize to determine whether promotion may happen. */
- if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size,
- alignment)) <= SMALL_MAXCLASS) {
- if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
- size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1),
- alignment, zero))
- return (old_usize);
- usize = isalloc(ptr, config_prof);
- if (max_usize < PAGE)
- arena_prof_promoted(ptr, usize);
- } else {
- usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
- }
+ usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero);
return (usize);
}
JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, size_t max_usize, bool zero, arena_t *arena,
- prof_thr_cnt_t *cnt)
+ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ size_t extra, size_t alignment, bool zero)
{
- size_t usize;
- prof_ctx_t *old_ctx;
+ size_t usize_max, usize;
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
- old_ctx = prof_ctx_get(ptr);
- if ((uintptr_t)cnt != (uintptr_t)1U) {
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(ptr);
+ /*
+ * usize isn't knowable before ixalloc() returns when extra is non-zero.
+ * Therefore, compute its maximum possible value and use that in
+ * prof_alloc_prep() to decide whether to capture a backtrace.
+ * prof_realloc() will use the actual usize to decide whether to sample.
+ */
+ usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
+ alignment);
+ assert(usize_max != 0);
+ tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
- alignment, zero, max_usize, arena, cnt);
+ alignment, zero, tctx);
} else {
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
+ zero);
}
- if (usize == old_usize)
+ if (usize == old_usize) {
+ prof_alloc_rollback(tsd, tctx, false);
return (usize);
- prof_realloc(ptr, usize, cnt, old_usize, old_ctx);
+ }
+ prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
+ old_tctx);
return (usize);
}
-size_t
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
je_xallocx(void *ptr, size_t size, size_t extra, int flags)
{
+ tsd_t *tsd;
size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- arena_t *arena;
assert(ptr != NULL);
assert(size != 0);
assert(SIZE_T_MAX - size >= extra);
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
malloc_thread_init();
-
- if (arena_ind != UINT_MAX)
- arena = arenas[arena_ind];
- else
- arena = NULL;
+ tsd = tsd_fetch();
old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
+
+ /* Clamp extra if necessary to avoid (size + extra) overflow. */
+ if (unlikely(size + extra > HUGE_MAXCLASS)) {
+ /* Check for size overflow. */
+ if (unlikely(size > HUGE_MAXCLASS)) {
+ usize = old_usize;
+ goto label_not_resized;
+ }
+ extra = HUGE_MAXCLASS - size;
+ }
+
+ if (config_valgrind && unlikely(in_valgrind))
old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
- /*
- * usize isn't knowable before ixalloc() returns when extra is
- * non-zero. Therefore, compute its maximum possible value and
- * use that in PROF_ALLOC_PREP() to decide whether to capture a
- * backtrace. prof_realloc() will use the actual usize to
- * decide whether to sample.
- */
- size_t max_usize = (alignment == 0) ? s2u(size+extra) :
- sa2u(size+extra, alignment);
- PROF_ALLOC_PREP(1, max_usize, cnt);
- usize = ixallocx_prof(ptr, old_usize, size, extra, alignment,
- max_usize, zero, arena, cnt);
+ usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
+ alignment, zero);
} else {
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
+ zero);
}
- if (usize == old_usize)
+ if (unlikely(usize == old_usize))
goto label_not_resized;
if (config_stats) {
- thread_allocated_t *ta;
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
- JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero);
+ JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize,
+ old_rzsize, false, zero);
label_not_resized:
UTRACE(ptr, size, ptr);
return (usize);
}
-size_t
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+JEMALLOC_ATTR(pure)
je_sallocx(const void *ptr, int flags)
{
size_t usize;
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
malloc_thread_init();
if (config_ivsalloc)
usize = ivsalloc(ptr, config_prof);
- else {
- assert(ptr != NULL);
+ else
usize = isalloc(ptr, config_prof);
- }
return (usize);
}
-void
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
je_dallocx(void *ptr, int flags)
{
- size_t usize;
- UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- bool try_tcache;
+ tsd_t *tsd;
+ tcache_t *tcache;
assert(ptr != NULL);
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
- if (arena_ind != UINT_MAX) {
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
+ tsd = tsd_fetch();
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+ tcache = NULL;
+ else
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
} else
- try_tcache = true;
+ tcache = tcache_get(tsd, false);
UTRACE(ptr, 0, 0);
- if (config_stats || config_valgrind)
- usize = isalloc(ptr, config_prof);
- if (config_prof && opt_prof) {
- if (config_stats == false && config_valgrind == false)
- usize = isalloc(ptr, config_prof);
- prof_free(ptr, usize);
- }
- if (config_stats)
- thread_allocated_tsd_get()->deallocated += usize;
- if (config_valgrind && opt_valgrind)
- rzsize = p2rz(ptr);
- iqalloct(ptr, try_tcache);
- JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+ ifree(tsd_fetch(), ptr, tcache);
}
-size_t
-je_nallocx(size_t size, int flags)
+JEMALLOC_ALWAYS_INLINE_C size_t
+inallocx(size_t size, int flags)
{
size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+
+ if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
+ usize = s2u(size);
+ else
+ usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+ assert(usize != 0);
+ return (usize);
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_sdallocx(void *ptr, size_t size, int flags)
+{
+ tsd_t *tsd;
+ tcache_t *tcache;
+ size_t usize;
+
+ assert(ptr != NULL);
+ assert(malloc_initialized() || IS_INITIALIZER);
+ usize = inallocx(size, flags);
+ assert(usize == isalloc(ptr, config_prof));
+
+ tsd = tsd_fetch();
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+ tcache = NULL;
+ else
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ } else
+ tcache = tcache_get(tsd, false);
+
+ UTRACE(ptr, 0, 0);
+ isfree(tsd, ptr, usize, tcache);
+}
+
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+JEMALLOC_ATTR(pure)
+je_nallocx(size_t size, int flags)
+{
assert(size != 0);
- if (malloc_init())
+ if (unlikely(malloc_init()))
return (0);
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
- return (usize);
+ return (inallocx(size, flags));
}
-int
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
{
- if (malloc_init())
+ if (unlikely(malloc_init()))
return (EAGAIN);
return (ctl_byname(name, oldp, oldlenp, newp, newlen));
}
-int
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
{
- if (malloc_init())
+ if (unlikely(malloc_init()))
return (EAGAIN);
return (ctl_nametomib(name, mibp, miblenp));
}
-int
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
- if (malloc_init())
+ if (unlikely(malloc_init()))
return (EAGAIN);
return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
}
-void
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
const char *opts)
{
@@ -1847,18 +2469,18 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
stats_print(write_cb, cbopaque, opts);
}
-size_t
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
{
size_t ret;
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
malloc_thread_init();
if (config_ivsalloc)
ret = ivsalloc(ptr, config_prof);
else
- ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0;
+ ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
return (ret);
}
@@ -1868,91 +2490,6 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
*/
/******************************************************************************/
/*
- * Begin experimental functions.
- */
-#ifdef JEMALLOC_EXPERIMENTAL
-
-int
-je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
-{
- void *p;
-
- assert(ptr != NULL);
-
- p = je_mallocx(size, flags);
- if (p == NULL)
- return (ALLOCM_ERR_OOM);
- if (rsize != NULL)
- *rsize = isalloc(p, config_prof);
- *ptr = p;
- return (ALLOCM_SUCCESS);
-}
-
-int
-je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
-{
- int ret;
- bool no_move = flags & ALLOCM_NO_MOVE;
-
- assert(ptr != NULL);
- assert(*ptr != NULL);
- assert(size != 0);
- assert(SIZE_T_MAX - size >= extra);
-
- if (no_move) {
- size_t usize = je_xallocx(*ptr, size, extra, flags);
- ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED;
- if (rsize != NULL)
- *rsize = usize;
- } else {
- void *p = je_rallocx(*ptr, size+extra, flags);
- if (p != NULL) {
- *ptr = p;
- ret = ALLOCM_SUCCESS;
- } else
- ret = ALLOCM_ERR_OOM;
- if (rsize != NULL)
- *rsize = isalloc(*ptr, config_prof);
- }
- return (ret);
-}
-
-int
-je_sallocm(const void *ptr, size_t *rsize, int flags)
-{
-
- assert(rsize != NULL);
- *rsize = je_sallocx(ptr, flags);
- return (ALLOCM_SUCCESS);
-}
-
-int
-je_dallocm(void *ptr, int flags)
-{
-
- je_dallocx(ptr, flags);
- return (ALLOCM_SUCCESS);
-}
-
-int
-je_nallocm(size_t *rsize, size_t size, int flags)
-{
- size_t usize;
-
- usize = je_nallocx(size, flags);
- if (usize == 0)
- return (ALLOCM_ERR_OOM);
- if (rsize != NULL)
- *rsize = usize;
- return (ALLOCM_SUCCESS);
-}
-
-#endif
-/*
- * End experimental functions.
- */
-/******************************************************************************/
-/*
* The following functions are used by threading libraries for protection of
* malloc during fork().
*/
@@ -1966,9 +2503,9 @@ je_nallocm(size_t *rsize, size_t size, int flags)
* fork/malloc races via the following functions it registers during
* initialization using pthread_atfork(), but of course that does no good if
* the allocator isn't fully initialized at fork time. The following library
- * constructor is a partial solution to this problem. It may still possible to
- * trigger the deadlock described above, but doing so would involve forking via
- * a library constructor that runs before jemalloc's runs.
+ * constructor is a partial solution to this problem. It may still be possible
+ * to trigger the deadlock described above, but doing so would involve forking
+ * via a library constructor that runs before jemalloc's runs.
*/
JEMALLOC_ATTR(constructor)
static void
@@ -1989,10 +2526,10 @@ _malloc_prefork(void)
unsigned i;
#ifdef JEMALLOC_MUTEX_INIT_CB
- if (malloc_initialized == false)
+ if (!malloc_initialized())
return;
#endif
- assert(malloc_initialized);
+ assert(malloc_initialized());
/* Acquire all mutexes in a safe order. */
ctl_prefork();
@@ -2004,7 +2541,6 @@ _malloc_prefork(void)
}
chunk_prefork();
base_prefork();
- huge_prefork();
}
#ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2018,13 +2554,12 @@ _malloc_postfork(void)
unsigned i;
#ifdef JEMALLOC_MUTEX_INIT_CB
- if (malloc_initialized == false)
+ if (!malloc_initialized())
return;
#endif
- assert(malloc_initialized);
+ assert(malloc_initialized());
/* Release all mutexes, now that fork() has completed. */
- huge_postfork_parent();
base_postfork_parent();
chunk_postfork_parent();
for (i = 0; i < narenas_total; i++) {
@@ -2041,10 +2576,9 @@ jemalloc_postfork_child(void)
{
unsigned i;
- assert(malloc_initialized);
+ assert(malloc_initialized());
/* Release all mutexes, now that fork() has completed. */
- huge_postfork_child();
base_postfork_child();
chunk_postfork_child();
for (i = 0; i < narenas_total; i++) {
@@ -2057,55 +2591,35 @@ jemalloc_postfork_child(void)
}
/******************************************************************************/
-/*
- * The following functions are used for TLS allocation/deallocation in static
- * binaries on FreeBSD. The primary difference between these and i[mcd]alloc()
- * is that these avoid accessing TLS variables.
- */
-static void *
-a0alloc(size_t size, bool zero)
-{
-
- if (malloc_init())
- return (NULL);
-
- if (size == 0)
- size = 1;
-
- if (size <= arena_maxclass)
- return (arena_malloc(arenas[0], size, zero, false));
- else
- return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0])));
+/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation.
+ * returns 0 if the allocation is in the currently active run,
+ * or when it is not causing any frag issue (large or huge bin)
+ * returns the bin utilization and run utilization both in fixed point 16:16.
+ * If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+je_get_defrag_hint(void* ptr, int *bin_util, int *run_util) {
+ int defrag = 0;
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) { /* indication that this is not a HUGE alloc */
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ size_t mapbits = arena_mapbits_get(chunk, pageind);
+ if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* indication that this is not a LARGE alloc */
+ arena_t *arena = extent_node_arena_get(&chunk->node);
+ size_t rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
+ arena_run_t *run = &arena_miscelm_get(chunk, rpages_ind)->run;
+ arena_bin_t *bin = &arena->bins[run->binind];
+ malloc_mutex_lock(&bin->lock);
+ /* runs that are in the same chunk in as the current chunk, are likely to be the next currun */
+ if (chunk != (arena_chunk_t *)CHUNK_ADDR2BASE(bin->runcur)) {
+ arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
+ size_t availregs = bin_info->nregs * bin->stats.curruns;
+ *bin_util = (bin->stats.curregs<<16) / availregs;
+ *run_util = ((bin_info->nregs - run->nfree)<<16) / bin_info->nregs;
+ defrag = 1;
+ }
+ malloc_mutex_unlock(&bin->lock);
+ }
+ }
+ return defrag;
}
-
-void *
-a0malloc(size_t size)
-{
-
- return (a0alloc(size, false));
-}
-
-void *
-a0calloc(size_t num, size_t size)
-{
-
- return (a0alloc(num * size, true));
-}
-
-void
-a0free(void *ptr)
-{
- arena_chunk_t *chunk;
-
- if (ptr == NULL)
- return;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(chunk->arena, chunk, ptr, false);
- else
- huge_dalloc(ptr, true);
-}
-
-/******************************************************************************/
diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c
index 788eca387..2d47af976 100644
--- a/deps/jemalloc/src/mutex.c
+++ b/deps/jemalloc/src/mutex.c
@@ -73,9 +73,13 @@ malloc_mutex_init(malloc_mutex_t *mutex)
{
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ InitializeSRWLock(&mutex->lock);
+# else
if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
_CRT_SPINCOUNT))
return (true);
+# endif
#elif (defined(JEMALLOC_OSSPIN))
mutex->lock = 0;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -83,8 +87,8 @@ malloc_mutex_init(malloc_mutex_t *mutex)
mutex->postponed_next = postponed_mutexes;
postponed_mutexes = mutex;
} else {
- if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
- 0)
+ if (_pthread_mutex_init_calloc_cb(&mutex->lock,
+ bootstrap_calloc) != 0)
return (true);
}
#else
@@ -140,7 +144,7 @@ mutex_boot(void)
postpone_init = false;
while (postponed_mutexes != NULL) {
if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
- base_calloc) != 0)
+ bootstrap_calloc) != 0)
return (true);
postponed_mutexes = postponed_mutexes->postponed_next;
}
diff --git a/deps/jemalloc/src/pages.c b/deps/jemalloc/src/pages.c
new file mode 100644
index 000000000..83a167f67
--- /dev/null
+++ b/deps/jemalloc/src/pages.c
@@ -0,0 +1,173 @@
+#define JEMALLOC_PAGES_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+
+void *
+pages_map(void *addr, size_t size)
+{
+ void *ret;
+
+ assert(size != 0);
+
+#ifdef _WIN32
+ /*
+ * If VirtualAlloc can't allocate at the given address when one is
+ * given, it fails and returns NULL.
+ */
+ ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+ PAGE_READWRITE);
+#else
+ /*
+ * We don't use MAP_FIXED here, because it can cause the *replacement*
+ * of existing mappings, and we only want to create new mappings.
+ */
+ ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ -1, 0);
+ assert(ret != NULL);
+
+ if (ret == MAP_FAILED)
+ ret = NULL;
+ else if (addr != NULL && ret != addr) {
+ /*
+ * We succeeded in mapping memory, but not in the right place.
+ */
+ pages_unmap(ret, size);
+ ret = NULL;
+ }
+#endif
+ assert(ret == NULL || (addr == NULL && ret != addr)
+ || (addr != NULL && ret == addr));
+ return (ret);
+}
+
+void
+pages_unmap(void *addr, size_t size)
+{
+
+#ifdef _WIN32
+ if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+ if (munmap(addr, size) == -1)
+#endif
+ {
+ char buf[BUFERROR_BUF];
+
+ buferror(get_errno(), buf, sizeof(buf));
+ malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+ "VirtualFree"
+#else
+ "munmap"
+#endif
+ "(): %s\n", buf);
+ if (opt_abort)
+ abort();
+ }
+}
+
+void *
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+{
+ void *ret = (void *)((uintptr_t)addr + leadsize);
+
+ assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+ {
+ void *new_addr;
+
+ pages_unmap(addr, alloc_size);
+ new_addr = pages_map(ret, size);
+ if (new_addr == ret)
+ return (ret);
+ if (new_addr)
+ pages_unmap(new_addr, size);
+ return (NULL);
+ }
+#else
+ {
+ size_t trailsize = alloc_size - leadsize - size;
+
+ if (leadsize != 0)
+ pages_unmap(addr, leadsize);
+ if (trailsize != 0)
+ pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+ return (ret);
+ }
+#endif
+}
+
+static bool
+pages_commit_impl(void *addr, size_t size, bool commit)
+{
+
+#ifndef _WIN32
+ /*
+ * The following decommit/commit implementation is functional, but
+ * always disabled because it doesn't add value beyong improved
+ * debugging (at the cost of extra system calls) on systems that
+ * overcommit.
+ */
+ if (false) {
+ int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE;
+ void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON |
+ MAP_FIXED, -1, 0);
+ if (result == MAP_FAILED)
+ return (true);
+ if (result != addr) {
+ /*
+ * We succeeded in mapping memory, but not in the right
+ * place.
+ */
+ pages_unmap(result, size);
+ return (true);
+ }
+ return (false);
+ }
+#endif
+ return (true);
+}
+
+bool
+pages_commit(void *addr, size_t size)
+{
+
+ return (pages_commit_impl(addr, size, true));
+}
+
+bool
+pages_decommit(void *addr, size_t size)
+{
+
+ return (pages_commit_impl(addr, size, false));
+}
+
+bool
+pages_purge(void *addr, size_t size)
+{
+ bool unzeroed;
+
+#ifdef _WIN32
+ VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
+ unzeroed = true;
+#elif defined(JEMALLOC_HAVE_MADVISE)
+# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+# define JEMALLOC_MADV_PURGE MADV_DONTNEED
+# define JEMALLOC_MADV_ZEROS true
+# elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+# define JEMALLOC_MADV_PURGE MADV_FREE
+# define JEMALLOC_MADV_ZEROS false
+# else
+# error "No madvise(2) flag defined for purging unused dirty pages."
+# endif
+ int err = madvise(addr, size, JEMALLOC_MADV_PURGE);
+ unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0);
+# undef JEMALLOC_MADV_PURGE
+# undef JEMALLOC_MADV_ZEROS
+#else
+ /* Last resort no-op. */
+ unzeroed = true;
+#endif
+ return (unzeroed);
+}
+
diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c
index 7722b7b43..5d2b9598f 100644
--- a/deps/jemalloc/src/prof.c
+++ b/deps/jemalloc/src/prof.c
@@ -14,14 +14,13 @@
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
-
bool opt_prof = false;
bool opt_prof_active = true;
+bool opt_prof_thread_active_init = true;
size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
bool opt_prof_gdump = false;
-bool opt_prof_final = true;
+bool opt_prof_final = false;
bool opt_prof_leak = false;
bool opt_prof_accum = false;
char opt_prof_prefix[
@@ -31,25 +30,65 @@ char opt_prof_prefix[
#endif
1];
+/*
+ * Initialized as opt_prof_active, and accessed via
+ * prof_active_[gs]et{_unlocked,}().
+ */
+bool prof_active;
+static malloc_mutex_t prof_active_mtx;
+
+/*
+ * Initialized as opt_prof_thread_active_init, and accessed via
+ * prof_thread_active_init_[gs]et().
+ */
+static bool prof_thread_active_init;
+static malloc_mutex_t prof_thread_active_init_mtx;
+
+/*
+ * Initialized as opt_prof_gdump, and accessed via
+ * prof_gdump_[gs]et{_unlocked,}().
+ */
+bool prof_gdump_val;
+static malloc_mutex_t prof_gdump_mtx;
+
uint64_t prof_interval = 0;
-bool prof_promote;
+
+size_t lg_prof_sample;
/*
- * Table of mutexes that are shared among ctx's. These are leaf locks, so
- * there is no problem with using them for more than one ctx at the same time.
- * The primary motivation for this sharing though is that ctx's are ephemeral,
+ * Table of mutexes that are shared among gctx's. These are leaf locks, so
+ * there is no problem with using them for more than one gctx at the same time.
+ * The primary motivation for this sharing though is that gctx's are ephemeral,
* and destroying mutexes causes complications for systems that allocate when
* creating/destroying mutexes.
*/
-static malloc_mutex_t *ctx_locks;
-static unsigned cum_ctxs; /* Atomic counter. */
+static malloc_mutex_t *gctx_locks;
+static unsigned cum_gctxs; /* Atomic counter. */
+
+/*
+ * Table of mutexes that are shared among tdata's. No operations require
+ * holding multiple tdata locks, so there is no problem with using them for more
+ * than one tdata at the same time, even though a gctx lock may be acquired
+ * while holding a tdata lock.
+ */
+static malloc_mutex_t *tdata_locks;
/*
- * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data
* structure that knows about all backtraces currently captured.
*/
-static ckh_t bt2ctx;
-static malloc_mutex_t bt2ctx_mtx;
+static ckh_t bt2gctx;
+static malloc_mutex_t bt2gctx_mtx;
+
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t tdatas;
+static malloc_mutex_t tdatas_mtx;
+
+static uint64_t next_thr_uid;
+static malloc_mutex_t next_thr_uid_mtx;
static malloc_mutex_t prof_dump_seq_mtx;
static uint64_t prof_dump_seq;
@@ -77,120 +116,210 @@ static int prof_dump_fd;
static bool prof_booted = false;
/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static bool prof_tctx_should_destroy(prof_tctx_t *tctx);
+static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+static bool prof_tdata_should_destroy(prof_tdata_t *tdata,
+ bool even_if_attached);
+static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+ bool even_if_attached);
+static char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+
+/******************************************************************************/
+/* Red-black trees. */
+
+JEMALLOC_INLINE_C int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
+{
+ uint64_t a_thr_uid = a->thr_uid;
+ uint64_t b_thr_uid = b->thr_uid;
+ int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+ if (ret == 0) {
+ uint64_t a_thr_discrim = a->thr_discrim;
+ uint64_t b_thr_discrim = b->thr_discrim;
+ ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+ b_thr_discrim);
+ if (ret == 0) {
+ uint64_t a_tctx_uid = a->tctx_uid;
+ uint64_t b_tctx_uid = b->tctx_uid;
+ ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+ b_tctx_uid);
+ }
+ }
+ return (ret);
+}
+
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+ tctx_link, prof_tctx_comp)
+
+JEMALLOC_INLINE_C int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b)
+{
+ unsigned a_len = a->bt.len;
+ unsigned b_len = b->bt.len;
+ unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+ int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+ if (ret == 0)
+ ret = (a_len > b_len) - (a_len < b_len);
+ return (ret);
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+ prof_gctx_comp)
+
+JEMALLOC_INLINE_C int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b)
+{
+ int ret;
+ uint64_t a_uid = a->thr_uid;
+ uint64_t b_uid = b->thr_uid;
+
+ ret = ((a_uid > b_uid) - (a_uid < b_uid));
+ if (ret == 0) {
+ uint64_t a_discrim = a->thr_discrim;
+ uint64_t b_discrim = b->thr_discrim;
+
+ ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+ }
+ return (ret);
+}
+
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+ prof_tdata_comp)
+
+/******************************************************************************/
void
-bt_init(prof_bt_t *bt, void **vec)
+prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
{
+ prof_tdata_t *tdata;
cassert(config_prof);
- bt->vec = vec;
- bt->len = 0;
+ if (updated) {
+ /*
+ * Compute a new sample threshold. This isn't very important in
+ * practice, because this function is rarely executed, so the
+ * potential for sample bias is minimal except in contrived
+ * programs.
+ */
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata != NULL)
+ prof_sample_threshold_update(tdata);
+ }
+
+ if ((uintptr_t)tctx > (uintptr_t)1U) {
+ malloc_mutex_lock(tctx->tdata->lock);
+ tctx->prepared = false;
+ if (prof_tctx_should_destroy(tctx))
+ prof_tctx_destroy(tsd, tctx);
+ else
+ malloc_mutex_unlock(tctx->tdata->lock);
+ }
}
-static void
-bt_destroy(prof_bt_t *bt)
+void
+prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
- cassert(config_prof);
+ prof_tctx_set(ptr, usize, tctx);
- idalloc(bt);
+ malloc_mutex_lock(tctx->tdata->lock);
+ tctx->cnts.curobjs++;
+ tctx->cnts.curbytes += usize;
+ if (opt_prof_accum) {
+ tctx->cnts.accumobjs++;
+ tctx->cnts.accumbytes += usize;
+ }
+ tctx->prepared = false;
+ malloc_mutex_unlock(tctx->tdata->lock);
}
-static prof_bt_t *
-bt_dup(prof_bt_t *bt)
+void
+prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
{
- prof_bt_t *ret;
- cassert(config_prof);
+ malloc_mutex_lock(tctx->tdata->lock);
+ assert(tctx->cnts.curobjs > 0);
+ assert(tctx->cnts.curbytes >= usize);
+ tctx->cnts.curobjs--;
+ tctx->cnts.curbytes -= usize;
- /*
- * Create a single allocation that has space for vec immediately
- * following the prof_bt_t structure. The backtraces that get
- * stored in the backtrace caches are copied from stack-allocated
- * temporary variables, so size is known at creation time. Making this
- * a contiguous object improves cache locality.
- */
- ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
- (bt->len * sizeof(void *)));
- if (ret == NULL)
- return (NULL);
- ret->vec = (void **)((uintptr_t)ret +
- QUANTUM_CEILING(sizeof(prof_bt_t)));
- memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
- ret->len = bt->len;
+ if (prof_tctx_should_destroy(tctx))
+ prof_tctx_destroy(tsd, tctx);
+ else
+ malloc_mutex_unlock(tctx->tdata->lock);
+}
- return (ret);
+void
+bt_init(prof_bt_t *bt, void **vec)
+{
+
+ cassert(config_prof);
+
+ bt->vec = vec;
+ bt->len = 0;
}
-static inline void
-prof_enter(prof_tdata_t *prof_tdata)
+JEMALLOC_INLINE_C void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
{
cassert(config_prof);
+ assert(tdata == prof_tdata_get(tsd, false));
- assert(prof_tdata->enq == false);
- prof_tdata->enq = true;
+ if (tdata != NULL) {
+ assert(!tdata->enq);
+ tdata->enq = true;
+ }
- malloc_mutex_lock(&bt2ctx_mtx);
+ malloc_mutex_lock(&bt2gctx_mtx);
}
-static inline void
-prof_leave(prof_tdata_t *prof_tdata)
+JEMALLOC_INLINE_C void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
{
- bool idump, gdump;
cassert(config_prof);
+ assert(tdata == prof_tdata_get(tsd, false));
+
+ malloc_mutex_unlock(&bt2gctx_mtx);
- malloc_mutex_unlock(&bt2ctx_mtx);
+ if (tdata != NULL) {
+ bool idump, gdump;
- assert(prof_tdata->enq);
- prof_tdata->enq = false;
- idump = prof_tdata->enq_idump;
- prof_tdata->enq_idump = false;
- gdump = prof_tdata->enq_gdump;
- prof_tdata->enq_gdump = false;
+ assert(tdata->enq);
+ tdata->enq = false;
+ idump = tdata->enq_idump;
+ tdata->enq_idump = false;
+ gdump = tdata->enq_gdump;
+ tdata->enq_gdump = false;
- if (idump)
- prof_idump();
- if (gdump)
- prof_gdump();
+ if (idump)
+ prof_idump();
+ if (gdump)
+ prof_gdump();
+ }
}
#ifdef JEMALLOC_PROF_LIBUNWIND
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
+prof_backtrace(prof_bt_t *bt)
{
- unw_context_t uc;
- unw_cursor_t cursor;
- unsigned i;
- int err;
+ int nframes;
cassert(config_prof);
assert(bt->len == 0);
assert(bt->vec != NULL);
- unw_getcontext(&uc);
- unw_init_local(&cursor, &uc);
-
- /* Throw away (nignore+1) stack frames, if that many exist. */
- for (i = 0; i < nignore + 1; i++) {
- err = unw_step(&cursor);
- if (err <= 0)
- return;
- }
-
- /*
- * Iterate over stack frames until there are no more, or until no space
- * remains in bt.
- */
- for (i = 0; i < PROF_BT_MAX; i++) {
- unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
- bt->len++;
- err = unw_step(&cursor);
- if (err <= 0)
- break;
- }
+ nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+ if (nframes <= 0)
+ return;
+ bt->len = nframes;
}
#elif (defined(JEMALLOC_PROF_LIBGCC))
static _Unwind_Reason_Code
@@ -206,25 +335,25 @@ static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context *context, void *arg)
{
prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+ void *ip;
cassert(config_prof);
- if (data->nignore > 0)
- data->nignore--;
- else {
- data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
- data->bt->len++;
- if (data->bt->len == data->max)
- return (_URC_END_OF_STACK);
- }
+ ip = (void *)_Unwind_GetIP(context);
+ if (ip == NULL)
+ return (_URC_END_OF_STACK);
+ data->bt->vec[data->bt->len] = ip;
+ data->bt->len++;
+ if (data->bt->len == data->max)
+ return (_URC_END_OF_STACK);
return (_URC_NO_REASON);
}
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
+prof_backtrace(prof_bt_t *bt)
{
- prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
+ prof_unwind_data_t data = {bt, PROF_BT_MAX};
cassert(config_prof);
@@ -232,25 +361,22 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
}
#elif (defined(JEMALLOC_PROF_GCC))
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
+prof_backtrace(prof_bt_t *bt)
{
#define BT_FRAME(i) \
- if ((i) < nignore + PROF_BT_MAX) { \
+ if ((i) < PROF_BT_MAX) { \
void *p; \
if (__builtin_frame_address(i) == 0) \
return; \
p = __builtin_return_address(i); \
if (p == NULL) \
return; \
- if (i >= nignore) { \
- bt->vec[(i) - nignore] = p; \
- bt->len = (i) - nignore + 1; \
- } \
+ bt->vec[(i)] = p; \
+ bt->len = (i) + 1; \
} else \
return;
cassert(config_prof);
- assert(nignore <= 3);
BT_FRAME(0)
BT_FRAME(1)
@@ -392,16 +518,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
BT_FRAME(125)
BT_FRAME(126)
BT_FRAME(127)
-
- /* Extras to compensate for nignore. */
- BT_FRAME(128)
- BT_FRAME(129)
- BT_FRAME(130)
#undef BT_FRAME
}
#else
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
+prof_backtrace(prof_bt_t *bt)
{
cassert(config_prof);
@@ -410,256 +531,394 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
#endif
static malloc_mutex_t *
-prof_ctx_mutex_choose(void)
+prof_gctx_mutex_choose(void)
{
- unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
+ unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
- return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
+ return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]);
}
-static void
-prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
+static malloc_mutex_t *
+prof_tdata_mutex_choose(uint64_t thr_uid)
{
- ctx->bt = bt;
- ctx->lock = prof_ctx_mutex_choose();
+ return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
+}
+
+static prof_gctx_t *
+prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
+{
+ /*
+ * Create a single allocation that has space for vec of length bt->len.
+ */
+ prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t,
+ vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true),
+ true, NULL);
+ if (gctx == NULL)
+ return (NULL);
+ gctx->lock = prof_gctx_mutex_choose();
/*
* Set nlimbo to 1, in order to avoid a race condition with
- * prof_ctx_merge()/prof_ctx_destroy().
+ * prof_tctx_destroy()/prof_gctx_try_destroy().
*/
- ctx->nlimbo = 1;
- ql_elm_new(ctx, dump_link);
- memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
- ql_new(&ctx->cnts_ql);
+ gctx->nlimbo = 1;
+ tctx_tree_new(&gctx->tctxs);
+ /* Duplicate bt. */
+ memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+ gctx->bt.vec = gctx->vec;
+ gctx->bt.len = bt->len;
+ return (gctx);
}
static void
-prof_ctx_destroy(prof_ctx_t *ctx)
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+ prof_tdata_t *tdata)
{
- prof_tdata_t *prof_tdata;
cassert(config_prof);
/*
- * Check that ctx is still unused by any thread cache before destroying
- * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
- * condition with this function, as does prof_ctx_merge() in order to
- * avoid a race between the main body of prof_ctx_merge() and entry
+ * Check that gctx is still unused by any thread cache before destroying
+ * it. prof_lookup() increments gctx->nlimbo in order to avoid a race
+ * condition with this function, as does prof_tctx_destroy() in order to
+ * avoid a race between the main body of prof_tctx_destroy() and entry
* into this function.
*/
- prof_tdata = prof_tdata_get(false);
- assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
- prof_enter(prof_tdata);
- malloc_mutex_lock(ctx->lock);
- if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
- ctx->nlimbo == 1) {
- assert(ctx->cnt_merged.curbytes == 0);
- assert(ctx->cnt_merged.accumobjs == 0);
- assert(ctx->cnt_merged.accumbytes == 0);
- /* Remove ctx from bt2ctx. */
- if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+ prof_enter(tsd, tdata_self);
+ malloc_mutex_lock(gctx->lock);
+ assert(gctx->nlimbo != 0);
+ if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+ /* Remove gctx from bt2gctx. */
+ if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
not_reached();
- prof_leave(prof_tdata);
- /* Destroy ctx. */
- malloc_mutex_unlock(ctx->lock);
- bt_destroy(ctx->bt);
- idalloc(ctx);
+ prof_leave(tsd, tdata_self);
+ /* Destroy gctx. */
+ malloc_mutex_unlock(gctx->lock);
+ idalloctm(tsd, gctx, tcache_get(tsd, false), true);
} else {
/*
- * Compensate for increment in prof_ctx_merge() or
+ * Compensate for increment in prof_tctx_destroy() or
* prof_lookup().
*/
- ctx->nlimbo--;
- malloc_mutex_unlock(ctx->lock);
- prof_leave(prof_tdata);
+ gctx->nlimbo--;
+ malloc_mutex_unlock(gctx->lock);
+ prof_leave(tsd, tdata_self);
}
}
-static void
-prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
+/* tctx->tdata->lock must be held. */
+static bool
+prof_tctx_should_destroy(prof_tctx_t *tctx)
{
- bool destroy;
- cassert(config_prof);
+ if (opt_prof_accum)
+ return (false);
+ if (tctx->cnts.curobjs != 0)
+ return (false);
+ if (tctx->prepared)
+ return (false);
+ return (true);
+}
+
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx)
+{
+
+ if (opt_prof_accum)
+ return (false);
+ if (!tctx_tree_empty(&gctx->tctxs))
+ return (false);
+ if (gctx->nlimbo != 0)
+ return (false);
+ return (true);
+}
- /* Merge cnt stats and detach from ctx. */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
- ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
- ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
- ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
- ql_remove(&ctx->cnts_ql, cnt, cnts_link);
- if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
- ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
+/* tctx->tdata->lock is held upon entry, and released before return. */
+static void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
+{
+ prof_tdata_t *tdata = tctx->tdata;
+ prof_gctx_t *gctx = tctx->gctx;
+ bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+ assert(tctx->cnts.curobjs == 0);
+ assert(tctx->cnts.curbytes == 0);
+ assert(!opt_prof_accum);
+ assert(tctx->cnts.accumobjs == 0);
+ assert(tctx->cnts.accumbytes == 0);
+
+ ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+ destroy_tdata = prof_tdata_should_destroy(tdata, false);
+ malloc_mutex_unlock(tdata->lock);
+
+ malloc_mutex_lock(gctx->lock);
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ tctx_tree_remove(&gctx->tctxs, tctx);
+ destroy_tctx = true;
+ if (prof_gctx_should_destroy(gctx)) {
+ /*
+ * Increment gctx->nlimbo in order to keep another
+ * thread from winning the race to destroy gctx while
+ * this one has gctx->lock dropped. Without this, it
+ * would be possible for another thread to:
+ *
+ * 1) Sample an allocation associated with gctx.
+ * 2) Deallocate the sampled object.
+ * 3) Successfully prof_gctx_try_destroy(gctx).
+ *
+ * The result would be that gctx no longer exists by the
+ * time this thread accesses it in
+ * prof_gctx_try_destroy().
+ */
+ gctx->nlimbo++;
+ destroy_gctx = true;
+ } else
+ destroy_gctx = false;
+ break;
+ case prof_tctx_state_dumping:
/*
- * Increment ctx->nlimbo in order to keep another thread from
- * winning the race to destroy ctx while this one has ctx->lock
- * dropped. Without this, it would be possible for another
- * thread to:
- *
- * 1) Sample an allocation associated with ctx.
- * 2) Deallocate the sampled object.
- * 3) Successfully prof_ctx_destroy(ctx).
- *
- * The result would be that ctx no longer exists by the time
- * this thread accesses it in prof_ctx_destroy().
+ * A dumping thread needs tctx to remain valid until dumping
+ * has finished. Change state such that the dumping thread will
+ * complete destruction during a late dump iteration phase.
*/
- ctx->nlimbo++;
- destroy = true;
- } else
- destroy = false;
- malloc_mutex_unlock(ctx->lock);
- if (destroy)
- prof_ctx_destroy(ctx);
+ tctx->state = prof_tctx_state_purgatory;
+ destroy_tctx = false;
+ destroy_gctx = false;
+ break;
+ default:
+ not_reached();
+ destroy_tctx = false;
+ destroy_gctx = false;
+ }
+ malloc_mutex_unlock(gctx->lock);
+ if (destroy_gctx) {
+ prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+ tdata);
+ }
+
+ if (destroy_tdata)
+ prof_tdata_destroy(tsd, tdata, false);
+
+ if (destroy_tctx)
+ idalloctm(tsd, tctx, tcache_get(tsd, false), true);
}
static bool
-prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
- prof_ctx_t **p_ctx, bool *p_new_ctx)
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+ void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx)
{
union {
- prof_ctx_t *p;
+ prof_gctx_t *p;
void *v;
- } ctx;
+ } gctx;
union {
prof_bt_t *p;
void *v;
} btkey;
- bool new_ctx;
+ bool new_gctx;
- prof_enter(prof_tdata);
- if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
+ prof_enter(tsd, tdata);
+ if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
/* bt has never been seen before. Insert it. */
- ctx.v = imalloc(sizeof(prof_ctx_t));
- if (ctx.v == NULL) {
- prof_leave(prof_tdata);
+ gctx.p = prof_gctx_create(tsd, bt);
+ if (gctx.v == NULL) {
+ prof_leave(tsd, tdata);
return (true);
}
- btkey.p = bt_dup(bt);
- if (btkey.v == NULL) {
- prof_leave(prof_tdata);
- idalloc(ctx.v);
- return (true);
- }
- prof_ctx_init(ctx.p, btkey.p);
- if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
+ btkey.p = &gctx.p->bt;
+ if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
/* OOM. */
- prof_leave(prof_tdata);
- idalloc(btkey.v);
- idalloc(ctx.v);
+ prof_leave(tsd, tdata);
+ idalloctm(tsd, gctx.v, tcache_get(tsd, false), true);
return (true);
}
- new_ctx = true;
+ new_gctx = true;
} else {
/*
* Increment nlimbo, in order to avoid a race condition with
- * prof_ctx_merge()/prof_ctx_destroy().
+ * prof_tctx_destroy()/prof_gctx_try_destroy().
*/
- malloc_mutex_lock(ctx.p->lock);
- ctx.p->nlimbo++;
- malloc_mutex_unlock(ctx.p->lock);
- new_ctx = false;
+ malloc_mutex_lock(gctx.p->lock);
+ gctx.p->nlimbo++;
+ malloc_mutex_unlock(gctx.p->lock);
+ new_gctx = false;
}
- prof_leave(prof_tdata);
+ prof_leave(tsd, tdata);
*p_btkey = btkey.v;
- *p_ctx = ctx.p;
- *p_new_ctx = new_ctx;
+ *p_gctx = gctx.p;
+ *p_new_gctx = new_gctx;
return (false);
}
-prof_thr_cnt_t *
-prof_lookup(prof_bt_t *bt)
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt)
{
union {
- prof_thr_cnt_t *p;
+ prof_tctx_t *p;
void *v;
} ret;
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
+ bool not_found;
cassert(config_prof);
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL)
return (NULL);
- if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
+ malloc_mutex_lock(tdata->lock);
+ not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+ if (!not_found) /* Note double negative! */
+ ret.p->prepared = true;
+ malloc_mutex_unlock(tdata->lock);
+ if (not_found) {
+ tcache_t *tcache;
void *btkey;
- prof_ctx_t *ctx;
- bool new_ctx;
+ prof_gctx_t *gctx;
+ bool new_gctx, error;
/*
* This thread's cache lacks bt. Look for it in the global
* cache.
*/
- if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
+ if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+ &new_gctx))
return (NULL);
- /* Link a prof_thd_cnt_t into ctx for this thread. */
- if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
- assert(ckh_count(&prof_tdata->bt2cnt) > 0);
- /*
- * Flush the least recently used cnt in order to keep
- * bt2cnt from becoming too large.
- */
- ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
- assert(ret.v != NULL);
- if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
- NULL, NULL))
- not_reached();
- ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
- prof_ctx_merge(ret.p->ctx, ret.p);
- /* ret can now be re-used. */
- } else {
- assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
- /* Allocate and partially initialize a new cnt. */
- ret.v = imalloc(sizeof(prof_thr_cnt_t));
- if (ret.p == NULL) {
- if (new_ctx)
- prof_ctx_destroy(ctx);
- return (NULL);
- }
- ql_elm_new(ret.p, cnts_link);
- ql_elm_new(ret.p, lru_link);
+ /* Link a prof_tctx_t into gctx for this thread. */
+ tcache = tcache_get(tsd, true);
+ ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true,
+ NULL);
+ if (ret.p == NULL) {
+ if (new_gctx)
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+ return (NULL);
}
- /* Finish initializing ret. */
- ret.p->ctx = ctx;
- ret.p->epoch = 0;
+ ret.p->tdata = tdata;
+ ret.p->thr_uid = tdata->thr_uid;
+ ret.p->thr_discrim = tdata->thr_discrim;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
- if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
- if (new_ctx)
- prof_ctx_destroy(ctx);
- idalloc(ret.v);
+ ret.p->gctx = gctx;
+ ret.p->tctx_uid = tdata->tctx_uid_next++;
+ ret.p->prepared = true;
+ ret.p->state = prof_tctx_state_initializing;
+ malloc_mutex_lock(tdata->lock);
+ error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+ malloc_mutex_unlock(tdata->lock);
+ if (error) {
+ if (new_gctx)
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+ idalloctm(tsd, ret.v, tcache, true);
return (NULL);
}
- ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
- malloc_mutex_lock(ctx->lock);
- ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
- ctx->nlimbo--;
- malloc_mutex_unlock(ctx->lock);
- } else {
- /* Move ret to the front of the LRU. */
- ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
- ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+ malloc_mutex_lock(gctx->lock);
+ ret.p->state = prof_tctx_state_nominal;
+ tctx_tree_insert(&gctx->tctxs, ret.p);
+ gctx->nlimbo--;
+ malloc_mutex_unlock(gctx->lock);
}
return (ret.p);
}
+void
+prof_sample_threshold_update(prof_tdata_t *tdata)
+{
+ /*
+ * The body of this function is compiled out unless heap profiling is
+ * enabled, so that it is possible to compile jemalloc with floating
+ * point support completely disabled. Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a
+ * workaround for versions of glibc that don't properly save/restore
+ * floating point registers during dynamic lazy symbol loading (which
+ * internally calls into whatever malloc implementation happens to be
+ * integrated into the application). Note that some compilers (e.g.
+ * gcc 4.8) may use floating point registers for fast memory moves, so
+ * jemalloc must be compiled with such optimizations disabled (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
+#ifdef JEMALLOC_PROF
+ uint64_t r;
+ double u;
+
+ if (!config_prof)
+ return;
+
+ if (lg_prof_sample == 0) {
+ tdata->bytes_until_sample = 0;
+ return;
+ }
+
+ /*
+ * Compute sample interval as a geometrically distributed random
+ * variable with mean (2^lg_prof_sample).
+ *
+ * __ __
+ * | log(u) | 1
+ * tdata->bytes_until_sample = | -------- |, where p = ---------------
+ * | log(1-p) | lg_prof_sample
+ * 2
+ *
+ * For more information on the math, see:
+ *
+ * Non-Uniform Random Variate Generation
+ * Luc Devroye
+ * Springer-Verlag, New York, 1986
+ * pp 500
+ * (http://luc.devroye.org/rnbookindex.html)
+ */
+ prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005),
+ UINT64_C(1442695040888963407));
+ u = (double)r * (1.0/9007199254740992.0L);
+ tdata->bytes_until_sample = (uint64_t)(log(u) /
+ log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+ + (uint64_t)1U;
+#endif
+}
+
+#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
+{
+ size_t *tdata_count = (size_t *)arg;
+
+ (*tdata_count)++;
+
+ return (NULL);
+}
+
+size_t
+prof_tdata_count(void)
+{
+ size_t tdata_count = 0;
+
+ malloc_mutex_lock(&tdatas_mtx);
+ tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+ (void *)&tdata_count);
+ malloc_mutex_unlock(&tdatas_mtx);
+
+ return (tdata_count);
+}
+#endif
+
#ifdef JEMALLOC_JET
size_t
prof_bt_count(void)
{
size_t bt_count;
- prof_tdata_t *prof_tdata;
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL)
return (0);
- prof_enter(prof_tdata);
- bt_count = ckh_count(&bt2ctx);
- prof_leave(prof_tdata);
+ malloc_mutex_lock(&bt2gctx_mtx);
+ bt_count = ckh_count(&bt2gctx);
+ malloc_mutex_unlock(&bt2gctx_mtx);
return (bt_count);
}
@@ -675,7 +934,7 @@ prof_dump_open(bool propagate_err, const char *filename)
int fd;
fd = creat(filename, 0644);
- if (fd == -1 && propagate_err == false) {
+ if (fd == -1 && !propagate_err) {
malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
filename);
if (opt_abort)
@@ -700,7 +959,7 @@ prof_dump_flush(bool propagate_err)
err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
if (err == -1) {
- if (propagate_err == false) {
+ if (!propagate_err) {
malloc_write("<jemalloc>: write() failed during heap "
"profile flush\n");
if (opt_abort)
@@ -756,7 +1015,7 @@ prof_dump_write(bool propagate_err, const char *s)
return (false);
}
-JEMALLOC_ATTR(format(printf, 2, 3))
+JEMALLOC_FORMAT_PRINTF(2, 3)
static bool
prof_dump_printf(bool propagate_err, const char *format, ...)
{
@@ -772,176 +1031,367 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
return (ret);
}
+/* tctx->tdata->lock is held. */
static void
-prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
- prof_ctx_list_t *ctx_ql)
+prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
{
- prof_thr_cnt_t *thr_cnt;
- prof_cnt_t tcnt;
-
- cassert(config_prof);
-
- malloc_mutex_lock(ctx->lock);
-
- /*
- * Increment nlimbo so that ctx won't go away before dump.
- * Additionally, link ctx into the dump list so that it is included in
- * prof_dump()'s second pass.
- */
- ctx->nlimbo++;
- ql_tail_insert(ctx_ql, ctx, dump_link);
- memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
- ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
- volatile unsigned *epoch = &thr_cnt->epoch;
+ malloc_mutex_lock(tctx->gctx->lock);
- while (true) {
- unsigned epoch0 = *epoch;
-
- /* Make sure epoch is even. */
- if (epoch0 & 1U)
- continue;
-
- memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
+ switch (tctx->state) {
+ case prof_tctx_state_initializing:
+ malloc_mutex_unlock(tctx->gctx->lock);
+ return;
+ case prof_tctx_state_nominal:
+ tctx->state = prof_tctx_state_dumping;
+ malloc_mutex_unlock(tctx->gctx->lock);
- /* Terminate if epoch didn't change while reading. */
- if (*epoch == epoch0)
- break;
- }
+ memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
- ctx->cnt_summed.curobjs += tcnt.curobjs;
- ctx->cnt_summed.curbytes += tcnt.curbytes;
+ tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+ tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
if (opt_prof_accum) {
- ctx->cnt_summed.accumobjs += tcnt.accumobjs;
- ctx->cnt_summed.accumbytes += tcnt.accumbytes;
+ tdata->cnt_summed.accumobjs +=
+ tctx->dump_cnts.accumobjs;
+ tdata->cnt_summed.accumbytes +=
+ tctx->dump_cnts.accumbytes;
}
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
+ not_reached();
}
+}
- if (ctx->cnt_summed.curobjs != 0)
- (*leak_nctx)++;
+/* gctx->lock is held. */
+static void
+prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
+{
- /* Add to cnt_all. */
- cnt_all->curobjs += ctx->cnt_summed.curobjs;
- cnt_all->curbytes += ctx->cnt_summed.curbytes;
+ gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+ gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
if (opt_prof_accum) {
- cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
- cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
+ gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+ gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
}
-
- malloc_mutex_unlock(ctx->lock);
}
-static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+/* tctx->gctx is held. */
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
{
- if (opt_lg_prof_sample == 0) {
- if (prof_dump_printf(propagate_err,
- "heap profile: %"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
- cnt_all->curobjs, cnt_all->curbytes,
- cnt_all->accumobjs, cnt_all->accumbytes))
- return (true);
- } else {
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ /* New since dumping started; ignore. */
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
+ prof_tctx_merge_gctx(tctx, tctx->gctx);
+ break;
+ default:
+ not_reached();
+ }
+
+ return (NULL);
+}
+
+/* gctx->lock is held. */
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+{
+ bool propagate_err = *(bool *)arg;
+
+ switch (tctx->state) {
+ case prof_tctx_state_initializing:
+ case prof_tctx_state_nominal:
+ /* Not captured by this dump. */
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
if (prof_dump_printf(propagate_err,
- "heap profile: %"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
- cnt_all->curobjs, cnt_all->curbytes,
- cnt_all->accumobjs, cnt_all->accumbytes,
- ((uint64_t)1U << opt_lg_prof_sample)))
- return (true);
+ " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+ "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+ tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+ tctx->dump_cnts.accumbytes))
+ return (tctx);
+ break;
+ default:
+ not_reached();
}
+ return (NULL);
+}
- return (false);
+/* tctx->gctx is held. */
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+{
+ prof_tctx_t *ret;
+
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ /* New since dumping started; ignore. */
+ break;
+ case prof_tctx_state_dumping:
+ tctx->state = prof_tctx_state_nominal;
+ break;
+ case prof_tctx_state_purgatory:
+ ret = tctx;
+ goto label_return;
+ default:
+ not_reached();
+ }
+
+ ret = NULL;
+label_return:
+ return (ret);
}
static void
-prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
+prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
{
- ctx->nlimbo--;
- ql_remove(ctx_ql, ctx, dump_link);
+ cassert(config_prof);
+
+ malloc_mutex_lock(gctx->lock);
+
+ /*
+ * Increment nlimbo so that gctx won't go away before dump.
+ * Additionally, link gctx into the dump list so that it is included in
+ * prof_dump()'s second pass.
+ */
+ gctx->nlimbo++;
+ gctx_tree_insert(gctxs, gctx);
+
+ memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
+
+ malloc_mutex_unlock(gctx->lock);
+}
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+{
+ size_t *leak_ngctx = (size_t *)arg;
+
+ malloc_mutex_lock(gctx->lock);
+ tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL);
+ if (gctx->cnt_summed.curobjs != 0)
+ (*leak_ngctx)++;
+ malloc_mutex_unlock(gctx->lock);
+
+ return (NULL);
}
static void
-prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
{
+ prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+ prof_gctx_t *gctx;
- malloc_mutex_lock(ctx->lock);
- prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
- malloc_mutex_unlock(ctx->lock);
+ /*
+ * Standard tree iteration won't work here, because as soon as we
+ * decrement gctx->nlimbo and unlock gctx, another thread can
+ * concurrently destroy it, which will corrupt the tree. Therefore,
+ * tear down the tree one node at a time during iteration.
+ */
+ while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+ gctx_tree_remove(gctxs, gctx);
+ malloc_mutex_lock(gctx->lock);
+ {
+ prof_tctx_t *next;
+
+ next = NULL;
+ do {
+ prof_tctx_t *to_destroy =
+ tctx_tree_iter(&gctx->tctxs, next,
+ prof_tctx_finish_iter, NULL);
+ if (to_destroy != NULL) {
+ next = tctx_tree_next(&gctx->tctxs,
+ to_destroy);
+ tctx_tree_remove(&gctx->tctxs,
+ to_destroy);
+ idalloctm(tsd, to_destroy,
+ tcache_get(tsd, false), true);
+ } else
+ next = NULL;
+ } while (next != NULL);
+ }
+ gctx->nlimbo--;
+ if (prof_gctx_should_destroy(gctx)) {
+ gctx->nlimbo++;
+ malloc_mutex_unlock(gctx->lock);
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+ } else
+ malloc_mutex_unlock(gctx->lock);
+ }
}
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
+{
+ prof_cnt_t *cnt_all = (prof_cnt_t *)arg;
+
+ malloc_mutex_lock(tdata->lock);
+ if (!tdata->expired) {
+ size_t tabind;
+ union {
+ prof_tctx_t *p;
+ void *v;
+ } tctx;
+
+ tdata->dumping = true;
+ memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+ for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+ &tctx.v);)
+ prof_tctx_merge_tdata(tctx.p, tdata);
+
+ cnt_all->curobjs += tdata->cnt_summed.curobjs;
+ cnt_all->curbytes += tdata->cnt_summed.curbytes;
+ if (opt_prof_accum) {
+ cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
+ cnt_all->accumbytes += tdata->cnt_summed.accumbytes;
+ }
+ } else
+ tdata->dumping = false;
+ malloc_mutex_unlock(tdata->lock);
+
+ return (NULL);
+}
+
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
+{
+ bool propagate_err = *(bool *)arg;
+
+ if (!tdata->dumping)
+ return (NULL);
+
+ if (prof_dump_printf(propagate_err,
+ " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+ tdata->thr_uid, tdata->cnt_summed.curobjs,
+ tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+ tdata->cnt_summed.accumbytes,
+ (tdata->thread_name != NULL) ? " " : "",
+ (tdata->thread_name != NULL) ? tdata->thread_name : ""))
+ return (tdata);
+ return (NULL);
+}
+
+#ifdef JEMALLOC_JET
+#undef prof_dump_header
+#define prof_dump_header JEMALLOC_N(prof_dump_header_impl)
+#endif
+static bool
+prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+{
+ bool ret;
+
+ if (prof_dump_printf(propagate_err,
+ "heap_v2/%"FMTu64"\n"
+ " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+ ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+ cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
+ return (true);
+
+ malloc_mutex_lock(&tdatas_mtx);
+ ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+ (void *)&propagate_err) != NULL);
+ malloc_mutex_unlock(&tdatas_mtx);
+ return (ret);
+}
+#ifdef JEMALLOC_JET
+#undef prof_dump_header
+#define prof_dump_header JEMALLOC_N(prof_dump_header)
+prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
+#endif
+
+/* gctx->lock is held. */
static bool
-prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
- prof_ctx_list_t *ctx_ql)
+prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
+ prof_gctx_tree_t *gctxs)
{
bool ret;
unsigned i;
cassert(config_prof);
- /*
- * Current statistics can sum to 0 as a result of unmerged per thread
- * statistics. Additionally, interval- and growth-triggered dumps can
- * occur between the time a ctx is created and when its statistics are
- * filled in. Avoid dumping any ctx that is an artifact of either
- * implementation detail.
- */
- malloc_mutex_lock(ctx->lock);
- if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
- (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
- assert(ctx->cnt_summed.curobjs == 0);
- assert(ctx->cnt_summed.curbytes == 0);
- assert(ctx->cnt_summed.accumobjs == 0);
- assert(ctx->cnt_summed.accumbytes == 0);
+ /* Avoid dumping such gctx's that have no useful data. */
+ if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+ (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+ assert(gctx->cnt_summed.curobjs == 0);
+ assert(gctx->cnt_summed.curbytes == 0);
+ assert(gctx->cnt_summed.accumobjs == 0);
+ assert(gctx->cnt_summed.accumbytes == 0);
ret = false;
goto label_return;
}
- if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @",
- ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
- ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
+ if (prof_dump_printf(propagate_err, "@")) {
ret = true;
goto label_return;
}
-
for (i = 0; i < bt->len; i++) {
- if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
+ if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
(uintptr_t)bt->vec[i])) {
ret = true;
goto label_return;
}
}
- if (prof_dump_write(propagate_err, "\n")) {
+ if (prof_dump_printf(propagate_err,
+ "\n"
+ " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+ gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+ gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+ ret = true;
+ goto label_return;
+ }
+
+ if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+ (void *)&propagate_err) != NULL) {
ret = true;
goto label_return;
}
ret = false;
label_return:
- prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
- malloc_mutex_unlock(ctx->lock);
return (ret);
}
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...)
+{
+ int mfd;
+ va_list ap;
+ char filename[PATH_MAX + 1];
+
+ va_start(ap, format);
+ malloc_vsnprintf(filename, sizeof(filename), format, ap);
+ va_end(ap);
+ mfd = open(filename, O_RDONLY);
+
+ return (mfd);
+}
+
static bool
prof_dump_maps(bool propagate_err)
{
bool ret;
int mfd;
- char filename[PATH_MAX + 1];
cassert(config_prof);
#ifdef __FreeBSD__
- malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
+ mfd = prof_open_maps("/proc/curproc/map");
#else
- malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
- (int)getpid());
+ {
+ int pid = getpid();
+
+ mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+ if (mfd == -1)
+ mfd = prof_open_maps("/proc/%d/maps", pid);
+ }
#endif
- mfd = open(filename, O_RDONLY);
if (mfd != -1) {
ssize_t nread;
@@ -977,51 +1427,85 @@ label_return:
}
static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
const char *filename)
{
if (cnt_all->curbytes != 0) {
- malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
- PRId64" object%s, %zu context%s\n",
+ malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %"
+ FMTu64" object%s, %zu context%s\n",
cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
- leak_nctx, (leak_nctx != 1) ? "s" : "");
+ leak_ngctx, (leak_ngctx != 1) ? "s" : "");
malloc_printf(
- "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
+ "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
filename);
}
}
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+{
+ prof_gctx_t *ret;
+ bool propagate_err = *(bool *)arg;
+
+ malloc_mutex_lock(gctx->lock);
+
+ if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) {
+ ret = gctx;
+ goto label_return;
+ }
+
+ ret = NULL;
+label_return:
+ malloc_mutex_unlock(gctx->lock);
+ return (ret);
+}
+
static bool
-prof_dump(bool propagate_err, const char *filename, bool leakcheck)
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
{
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
prof_cnt_t cnt_all;
size_t tabind;
union {
- prof_ctx_t *p;
+ prof_gctx_t *p;
void *v;
- } ctx;
- size_t leak_nctx;
- prof_ctx_list_t ctx_ql;
+ } gctx;
+ size_t leak_ngctx;
+ prof_gctx_tree_t gctxs;
cassert(config_prof);
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL)
return (true);
malloc_mutex_lock(&prof_dump_mtx);
+ prof_enter(tsd, tdata);
- /* Merge per thread profile stats, and sum them in cnt_all. */
+ /*
+ * Put gctx's in limbo and clear their counters in preparation for
+ * summing.
+ */
+ gctx_tree_new(&gctxs);
+ for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
+ prof_dump_gctx_prep(gctx.p, &gctxs);
+
+ /*
+ * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+ * stats and merge them into the associated gctx's.
+ */
memset(&cnt_all, 0, sizeof(prof_cnt_t));
- leak_nctx = 0;
- ql_new(&ctx_ql);
- prof_enter(prof_tdata);
- for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
- prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
- prof_leave(prof_tdata);
+ malloc_mutex_lock(&tdatas_mtx);
+ tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all);
+ malloc_mutex_unlock(&tdatas_mtx);
+
+ /* Merge tctx stats into gctx's. */
+ leak_ngctx = 0;
+ gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx);
+
+ prof_leave(tsd, tdata);
/* Create dump file. */
if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
@@ -1031,11 +1515,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
if (prof_dump_header(propagate_err, &cnt_all))
goto label_write_error;
- /* Dump per ctx profile stats. */
- while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
- if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
- goto label_write_error;
- }
+ /* Dump per gctx profile stats. */
+ if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
+ (void *)&propagate_err) != NULL)
+ goto label_write_error;
/* Dump /proc/<pid>/maps if possible. */
if (prof_dump_maps(propagate_err))
@@ -1044,17 +1527,17 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
if (prof_dump_close(propagate_err))
goto label_open_close_error;
+ prof_gctx_finish(tsd, &gctxs);
malloc_mutex_unlock(&prof_dump_mtx);
if (leakcheck)
- prof_leakcheck(&cnt_all, leak_nctx, filename);
+ prof_leakcheck(&cnt_all, leak_ngctx, filename);
return (false);
label_write_error:
prof_dump_close(propagate_err);
label_open_close_error:
- while ((ctx.p = ql_first(&ctx_ql)) != NULL)
- prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
+ prof_gctx_finish(tsd, &gctxs);
malloc_mutex_unlock(&prof_dump_mtx);
return (true);
}
@@ -1062,7 +1545,7 @@ label_open_close_error:
#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
static void
-prof_dump_filename(char *filename, char v, int64_t vseq)
+prof_dump_filename(char *filename, char v, uint64_t vseq)
{
cassert(config_prof);
@@ -1070,12 +1553,12 @@ prof_dump_filename(char *filename, char v, int64_t vseq)
if (vseq != VSEQ_INVALID) {
/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
- "%s.%d.%"PRIu64".%c%"PRId64".heap",
+ "%s.%d.%"FMTu64".%c%"FMTu64".heap",
opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
} else {
/* "<prefix>.<pid>.<seq>.<v>.heap" */
malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
- "%s.%d.%"PRIu64".%c.heap",
+ "%s.%d.%"FMTu64".%c.heap",
opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
}
prof_dump_seq++;
@@ -1084,57 +1567,63 @@ prof_dump_filename(char *filename, char v, int64_t vseq)
static void
prof_fdump(void)
{
+ tsd_t *tsd;
char filename[DUMP_FILENAME_BUFSIZE];
cassert(config_prof);
+ assert(opt_prof_final);
+ assert(opt_prof_prefix[0] != '\0');
- if (prof_booted == false)
+ if (!prof_booted)
return;
+ tsd = tsd_fetch();
- if (opt_prof_final && opt_prof_prefix[0] != '\0') {
- malloc_mutex_lock(&prof_dump_seq_mtx);
- prof_dump_filename(filename, 'f', VSEQ_INVALID);
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, opt_prof_leak);
- }
+ malloc_mutex_lock(&prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'f', VSEQ_INVALID);
+ malloc_mutex_unlock(&prof_dump_seq_mtx);
+ prof_dump(tsd, false, filename, opt_prof_leak);
}
void
prof_idump(void)
{
- prof_tdata_t *prof_tdata;
- char filename[PATH_MAX + 1];
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
cassert(config_prof);
- if (prof_booted == false)
+ if (!prof_booted)
return;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL)
return;
- if (prof_tdata->enq) {
- prof_tdata->enq_idump = true;
+ if (tdata->enq) {
+ tdata->enq_idump = true;
return;
}
if (opt_prof_prefix[0] != '\0') {
+ char filename[PATH_MAX + 1];
malloc_mutex_lock(&prof_dump_seq_mtx);
prof_dump_filename(filename, 'i', prof_dump_iseq);
prof_dump_iseq++;
malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, false);
+ prof_dump(tsd, false, filename, false);
}
}
bool
prof_mdump(const char *filename)
{
+ tsd_t *tsd;
char filename_buf[DUMP_FILENAME_BUFSIZE];
cassert(config_prof);
- if (opt_prof == false || prof_booted == false)
+ if (!opt_prof || !prof_booted)
return (true);
+ tsd = tsd_fetch();
if (filename == NULL) {
/* No filename specified, so automatically generate one. */
@@ -1146,33 +1635,35 @@ prof_mdump(const char *filename)
malloc_mutex_unlock(&prof_dump_seq_mtx);
filename = filename_buf;
}
- return (prof_dump(true, filename, false));
+ return (prof_dump(tsd, true, filename, false));
}
void
prof_gdump(void)
{
- prof_tdata_t *prof_tdata;
- char filename[DUMP_FILENAME_BUFSIZE];
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
cassert(config_prof);
- if (prof_booted == false)
+ if (!prof_booted)
return;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL)
return;
- if (prof_tdata->enq) {
- prof_tdata->enq_gdump = true;
+ if (tdata->enq) {
+ tdata->enq_gdump = true;
return;
}
if (opt_prof_prefix[0] != '\0') {
+ char filename[DUMP_FILENAME_BUFSIZE];
malloc_mutex_lock(&prof_dump_seq_mtx);
prof_dump_filename(filename, 'u', prof_dump_useq);
prof_dump_useq++;
malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, false);
+ prof_dump(tsd, false, filename, false);
}
}
@@ -1199,88 +1690,375 @@ prof_bt_keycomp(const void *k1, const void *k2)
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
}
-prof_tdata_t *
-prof_tdata_init(void)
+JEMALLOC_INLINE_C uint64_t
+prof_thr_uid_alloc(void)
+{
+ uint64_t thr_uid;
+
+ malloc_mutex_lock(&next_thr_uid_mtx);
+ thr_uid = next_thr_uid;
+ next_thr_uid++;
+ malloc_mutex_unlock(&next_thr_uid_mtx);
+
+ return (thr_uid);
+}
+
+static prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+ char *thread_name, bool active)
{
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
+ tcache_t *tcache;
cassert(config_prof);
/* Initialize an empty cache for this thread. */
- prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
- if (prof_tdata == NULL)
+ tcache = tcache_get(tsd, true);
+ tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false,
+ tcache, true, NULL);
+ if (tdata == NULL)
return (NULL);
- if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
+ tdata->lock = prof_tdata_mutex_choose(thr_uid);
+ tdata->thr_uid = thr_uid;
+ tdata->thr_discrim = thr_discrim;
+ tdata->thread_name = thread_name;
+ tdata->attached = true;
+ tdata->expired = false;
+ tdata->tctx_uid_next = 0;
+
+ if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
prof_bt_hash, prof_bt_keycomp)) {
- idalloc(prof_tdata);
+ idalloctm(tsd, tdata, tcache, true);
return (NULL);
}
- ql_new(&prof_tdata->lru_ql);
- prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
- if (prof_tdata->vec == NULL) {
- ckh_delete(&prof_tdata->bt2cnt);
- idalloc(prof_tdata);
- return (NULL);
- }
+ tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+ prof_sample_threshold_update(tdata);
- prof_tdata->prng_state = 0;
- prof_tdata->threshold = 0;
- prof_tdata->accum = 0;
+ tdata->enq = false;
+ tdata->enq_idump = false;
+ tdata->enq_gdump = false;
- prof_tdata->enq = false;
- prof_tdata->enq_idump = false;
- prof_tdata->enq_gdump = false;
+ tdata->dumping = false;
+ tdata->active = active;
- prof_tdata_tsd_set(&prof_tdata);
+ malloc_mutex_lock(&tdatas_mtx);
+ tdata_tree_insert(&tdatas, tdata);
+ malloc_mutex_unlock(&tdatas_mtx);
- return (prof_tdata);
+ return (tdata);
}
-void
-prof_tdata_cleanup(void *arg)
+prof_tdata_t *
+prof_tdata_init(tsd_t *tsd)
{
- prof_thr_cnt_t *cnt;
- prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
- cassert(config_prof);
+ return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL,
+ prof_thread_active_init_get()));
+}
- if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
- /*
- * Another destructor deallocated memory after this destructor
- * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
- * in order to receive another callback.
- */
- prof_tdata = PROF_TDATA_STATE_PURGATORY;
- prof_tdata_tsd_set(&prof_tdata);
- } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to PROF_TDATA_STATE_PURGATORY so that other destructors
- * wouldn't cause re-creation of the prof_tdata. This time, do
- * nothing, so that the destructor will not be called again.
- */
- } else if (prof_tdata != NULL) {
- /*
- * Delete the hash table. All of its contents can still be
- * iterated over via the LRU.
- */
- ckh_delete(&prof_tdata->bt2cnt);
+/* tdata->lock must be held. */
+static bool
+prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
+{
+
+ if (tdata->attached && !even_if_attached)
+ return (false);
+ if (ckh_count(&tdata->bt2tctx) != 0)
+ return (false);
+ return (true);
+}
+
+/* tdatas_mtx must be held. */
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+ bool even_if_attached)
+{
+ tcache_t *tcache;
+
+ assert(prof_tdata_should_destroy(tdata, even_if_attached));
+ assert(tsd_prof_tdata_get(tsd) != tdata);
+
+ tdata_tree_remove(&tdatas, tdata);
+
+ tcache = tcache_get(tsd, false);
+ if (tdata->thread_name != NULL)
+ idalloctm(tsd, tdata->thread_name, tcache, true);
+ ckh_delete(tsd, &tdata->bt2tctx);
+ idalloctm(tsd, tdata, tcache, true);
+}
+
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
+{
+
+ malloc_mutex_lock(&tdatas_mtx);
+ prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+ malloc_mutex_unlock(&tdatas_mtx);
+}
+
+static void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
+{
+ bool destroy_tdata;
+
+ malloc_mutex_lock(tdata->lock);
+ if (tdata->attached) {
+ destroy_tdata = prof_tdata_should_destroy(tdata, true);
/*
- * Iteratively merge cnt's into the global stats and delete
- * them.
+ * Only detach if !destroy_tdata, because detaching would allow
+ * another thread to win the race to destroy tdata.
*/
- while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
- ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
- prof_ctx_merge(cnt->ctx, cnt);
- idalloc(cnt);
- }
- idalloc(prof_tdata->vec);
- idalloc(prof_tdata);
- prof_tdata = PROF_TDATA_STATE_PURGATORY;
- prof_tdata_tsd_set(&prof_tdata);
+ if (!destroy_tdata)
+ tdata->attached = false;
+ tsd_prof_tdata_set(tsd, NULL);
+ } else
+ destroy_tdata = false;
+ malloc_mutex_unlock(tdata->lock);
+ if (destroy_tdata)
+ prof_tdata_destroy(tsd, tdata, true);
+}
+
+prof_tdata_t *
+prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
+{
+ uint64_t thr_uid = tdata->thr_uid;
+ uint64_t thr_discrim = tdata->thr_discrim + 1;
+ char *thread_name = (tdata->thread_name != NULL) ?
+ prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
+ bool active = tdata->active;
+
+ prof_tdata_detach(tsd, tdata);
+ return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+ active));
+}
+
+static bool
+prof_tdata_expire(prof_tdata_t *tdata)
+{
+ bool destroy_tdata;
+
+ malloc_mutex_lock(tdata->lock);
+ if (!tdata->expired) {
+ tdata->expired = true;
+ destroy_tdata = tdata->attached ? false :
+ prof_tdata_should_destroy(tdata, false);
+ } else
+ destroy_tdata = false;
+ malloc_mutex_unlock(tdata->lock);
+
+ return (destroy_tdata);
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
+{
+
+ return (prof_tdata_expire(tdata) ? tdata : NULL);
+}
+
+void
+prof_reset(tsd_t *tsd, size_t lg_sample)
+{
+ prof_tdata_t *next;
+
+ assert(lg_sample < (sizeof(uint64_t) << 3));
+
+ malloc_mutex_lock(&prof_dump_mtx);
+ malloc_mutex_lock(&tdatas_mtx);
+
+ lg_prof_sample = lg_sample;
+
+ next = NULL;
+ do {
+ prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+ prof_tdata_reset_iter, NULL);
+ if (to_destroy != NULL) {
+ next = tdata_tree_next(&tdatas, to_destroy);
+ prof_tdata_destroy_locked(tsd, to_destroy, false);
+ } else
+ next = NULL;
+ } while (next != NULL);
+
+ malloc_mutex_unlock(&tdatas_mtx);
+ malloc_mutex_unlock(&prof_dump_mtx);
+}
+
+void
+prof_tdata_cleanup(tsd_t *tsd)
+{
+ prof_tdata_t *tdata;
+
+ if (!config_prof)
+ return;
+
+ tdata = tsd_prof_tdata_get(tsd);
+ if (tdata != NULL)
+ prof_tdata_detach(tsd, tdata);
+}
+
+bool
+prof_active_get(void)
+{
+ bool prof_active_current;
+
+ malloc_mutex_lock(&prof_active_mtx);
+ prof_active_current = prof_active;
+ malloc_mutex_unlock(&prof_active_mtx);
+ return (prof_active_current);
+}
+
+bool
+prof_active_set(bool active)
+{
+ bool prof_active_old;
+
+ malloc_mutex_lock(&prof_active_mtx);
+ prof_active_old = prof_active;
+ prof_active = active;
+ malloc_mutex_unlock(&prof_active_mtx);
+ return (prof_active_old);
+}
+
+const char *
+prof_thread_name_get(void)
+{
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
+
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL)
+ return ("");
+ return (tdata->thread_name != NULL ? tdata->thread_name : "");
+}
+
+static char *
+prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
+{
+ char *ret;
+ size_t size;
+
+ if (thread_name == NULL)
+ return (NULL);
+
+ size = strlen(thread_name) + 1;
+ if (size == 1)
+ return ("");
+
+ ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL);
+ if (ret == NULL)
+ return (NULL);
+ memcpy(ret, thread_name, size);
+ return (ret);
+}
+
+int
+prof_thread_name_set(tsd_t *tsd, const char *thread_name)
+{
+ prof_tdata_t *tdata;
+ unsigned i;
+ char *s;
+
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL)
+ return (EAGAIN);
+
+ /* Validate input. */
+ if (thread_name == NULL)
+ return (EFAULT);
+ for (i = 0; thread_name[i] != '\0'; i++) {
+ char c = thread_name[i];
+ if (!isgraph(c) && !isblank(c))
+ return (EFAULT);
}
+
+ s = prof_thread_name_alloc(tsd, thread_name);
+ if (s == NULL)
+ return (EAGAIN);
+
+ if (tdata->thread_name != NULL) {
+ idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
+ true);
+ tdata->thread_name = NULL;
+ }
+ if (strlen(s) > 0)
+ tdata->thread_name = s;
+ return (0);
+}
+
+bool
+prof_thread_active_get(void)
+{
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
+
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL)
+ return (false);
+ return (tdata->active);
+}
+
+bool
+prof_thread_active_set(bool active)
+{
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
+
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL)
+ return (true);
+ tdata->active = active;
+ return (false);
+}
+
+bool
+prof_thread_active_init_get(void)
+{
+ bool active_init;
+
+ malloc_mutex_lock(&prof_thread_active_init_mtx);
+ active_init = prof_thread_active_init;
+ malloc_mutex_unlock(&prof_thread_active_init_mtx);
+ return (active_init);
+}
+
+bool
+prof_thread_active_init_set(bool active_init)
+{
+ bool active_init_old;
+
+ malloc_mutex_lock(&prof_thread_active_init_mtx);
+ active_init_old = prof_thread_active_init;
+ prof_thread_active_init = active_init;
+ malloc_mutex_unlock(&prof_thread_active_init_mtx);
+ return (active_init_old);
+}
+
+bool
+prof_gdump_get(void)
+{
+ bool prof_gdump_current;
+
+ malloc_mutex_lock(&prof_gdump_mtx);
+ prof_gdump_current = prof_gdump_val;
+ malloc_mutex_unlock(&prof_gdump_mtx);
+ return (prof_gdump_current);
+}
+
+bool
+prof_gdump_set(bool gdump)
+{
+ bool prof_gdump_old;
+
+ malloc_mutex_lock(&prof_gdump_mtx);
+ prof_gdump_old = prof_gdump_val;
+ prof_gdump_val = gdump;
+ malloc_mutex_unlock(&prof_gdump_mtx);
+ return (prof_gdump_old);
}
void
@@ -1300,11 +2078,11 @@ prof_boot1(void)
cassert(config_prof);
/*
- * opt_prof and prof_promote must be in their final state before any
- * arenas are initialized, so this function must be executed early.
+ * opt_prof must be in its final state before any arenas are
+ * initialized, so this function must be executed early.
*/
- if (opt_prof_leak && opt_prof == false) {
+ if (opt_prof_leak && !opt_prof) {
/*
* Enable opt_prof, but in such a way that profiles are never
* automatically dumped.
@@ -1317,8 +2095,6 @@ prof_boot1(void)
opt_lg_prof_interval);
}
}
-
- prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
}
bool
@@ -1328,36 +2104,65 @@ prof_boot2(void)
cassert(config_prof);
if (opt_prof) {
+ tsd_t *tsd;
unsigned i;
- if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
+ lg_prof_sample = opt_lg_prof_sample;
+
+ prof_active = opt_prof_active;
+ if (malloc_mutex_init(&prof_active_mtx))
+ return (true);
+
+ prof_gdump_val = opt_prof_gdump;
+ if (malloc_mutex_init(&prof_gdump_mtx))
+ return (true);
+
+ prof_thread_active_init = opt_prof_thread_active_init;
+ if (malloc_mutex_init(&prof_thread_active_init_mtx))
+ return (true);
+
+ tsd = tsd_fetch();
+ if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
prof_bt_keycomp))
return (true);
- if (malloc_mutex_init(&bt2ctx_mtx))
+ if (malloc_mutex_init(&bt2gctx_mtx))
+ return (true);
+
+ tdata_tree_new(&tdatas);
+ if (malloc_mutex_init(&tdatas_mtx))
+ return (true);
+
+ next_thr_uid = 0;
+ if (malloc_mutex_init(&next_thr_uid_mtx))
return (true);
- if (prof_tdata_tsd_boot()) {
- malloc_write(
- "<jemalloc>: Error in pthread_key_create()\n");
- abort();
- }
if (malloc_mutex_init(&prof_dump_seq_mtx))
return (true);
if (malloc_mutex_init(&prof_dump_mtx))
return (true);
- if (atexit(prof_fdump) != 0) {
+ if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
+ atexit(prof_fdump) != 0) {
malloc_write("<jemalloc>: Error in atexit()\n");
if (opt_abort)
abort();
}
- ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
+ gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
sizeof(malloc_mutex_t));
- if (ctx_locks == NULL)
+ if (gctx_locks == NULL)
return (true);
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
- if (malloc_mutex_init(&ctx_locks[i]))
+ if (malloc_mutex_init(&gctx_locks[i]))
+ return (true);
+ }
+
+ tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS *
+ sizeof(malloc_mutex_t));
+ if (tdata_locks == NULL)
+ return (true);
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+ if (malloc_mutex_init(&tdata_locks[i]))
return (true);
}
}
@@ -1382,10 +2187,14 @@ prof_prefork(void)
if (opt_prof) {
unsigned i;
- malloc_mutex_prefork(&bt2ctx_mtx);
+ malloc_mutex_prefork(&tdatas_mtx);
+ malloc_mutex_prefork(&bt2gctx_mtx);
+ malloc_mutex_prefork(&next_thr_uid_mtx);
malloc_mutex_prefork(&prof_dump_seq_mtx);
for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_prefork(&ctx_locks[i]);
+ malloc_mutex_prefork(&gctx_locks[i]);
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+ malloc_mutex_prefork(&tdata_locks[i]);
}
}
@@ -1396,10 +2205,14 @@ prof_postfork_parent(void)
if (opt_prof) {
unsigned i;
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+ malloc_mutex_postfork_parent(&tdata_locks[i]);
for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_postfork_parent(&ctx_locks[i]);
+ malloc_mutex_postfork_parent(&gctx_locks[i]);
malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
- malloc_mutex_postfork_parent(&bt2ctx_mtx);
+ malloc_mutex_postfork_parent(&next_thr_uid_mtx);
+ malloc_mutex_postfork_parent(&bt2gctx_mtx);
+ malloc_mutex_postfork_parent(&tdatas_mtx);
}
}
@@ -1410,10 +2223,14 @@ prof_postfork_child(void)
if (opt_prof) {
unsigned i;
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+ malloc_mutex_postfork_child(&tdata_locks[i]);
for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_postfork_child(&ctx_locks[i]);
+ malloc_mutex_postfork_child(&gctx_locks[i]);
malloc_mutex_postfork_child(&prof_dump_seq_mtx);
- malloc_mutex_postfork_child(&bt2ctx_mtx);
+ malloc_mutex_postfork_child(&next_thr_uid_mtx);
+ malloc_mutex_postfork_child(&bt2gctx_mtx);
+ malloc_mutex_postfork_child(&tdatas_mtx);
}
}
diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c
index 543151164..6c43dfcaa 100644
--- a/deps/jemalloc/src/quarantine.c
+++ b/deps/jemalloc/src/quarantine.c
@@ -2,7 +2,7 @@
#include "jemalloc/internal/jemalloc_internal.h"
/*
- * quarantine pointers close to NULL are used to encode state information that
+ * Quarantine pointers close to NULL are used to encode state information that
* is used for cleaning up during thread shutdown.
*/
#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1)
@@ -10,26 +10,25 @@
#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY
/******************************************************************************/
-/* Data. */
-
-malloc_tsd_data(, quarantine, quarantine_t *, NULL)
-
-/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-static quarantine_t *quarantine_grow(quarantine_t *quarantine);
-static void quarantine_drain_one(quarantine_t *quarantine);
-static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound);
+static quarantine_t *quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
+static void quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine);
+static void quarantine_drain(tsd_t *tsd, quarantine_t *quarantine,
+ size_t upper_bound);
/******************************************************************************/
-quarantine_t *
-quarantine_init(size_t lg_maxobjs)
+static quarantine_t *
+quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
{
quarantine_t *quarantine;
- quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) +
- ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)));
+ assert(tsd_nominal(tsd));
+
+ quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs)
+ + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false,
+ tcache_get(tsd, true), true, NULL);
if (quarantine == NULL)
return (NULL);
quarantine->curbytes = 0;
@@ -37,19 +36,36 @@ quarantine_init(size_t lg_maxobjs)
quarantine->first = 0;
quarantine->lg_maxobjs = lg_maxobjs;
- quarantine_tsd_set(&quarantine);
-
return (quarantine);
}
+void
+quarantine_alloc_hook_work(tsd_t *tsd)
+{
+ quarantine_t *quarantine;
+
+ if (!tsd_nominal(tsd))
+ return;
+
+ quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT);
+ /*
+ * Check again whether quarantine has been initialized, because
+ * quarantine_init() may have triggered recursive initialization.
+ */
+ if (tsd_quarantine_get(tsd) == NULL)
+ tsd_quarantine_set(tsd, quarantine);
+ else
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+}
+
static quarantine_t *
-quarantine_grow(quarantine_t *quarantine)
+quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
{
quarantine_t *ret;
- ret = quarantine_init(quarantine->lg_maxobjs + 1);
+ ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1);
if (ret == NULL) {
- quarantine_drain_one(quarantine);
+ quarantine_drain_one(tsd, quarantine);
return (quarantine);
}
@@ -71,17 +87,18 @@ quarantine_grow(quarantine_t *quarantine)
memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
sizeof(quarantine_obj_t));
}
- idalloc(quarantine);
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+ tsd_quarantine_set(tsd, ret);
return (ret);
}
static void
-quarantine_drain_one(quarantine_t *quarantine)
+quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
{
quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
assert(obj->usize == isalloc(obj->ptr, config_prof));
- idalloc(obj->ptr);
+ idalloctm(tsd, obj->ptr, NULL, false);
quarantine->curbytes -= obj->usize;
quarantine->curobjs--;
quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -89,15 +106,15 @@ quarantine_drain_one(quarantine_t *quarantine)
}
static void
-quarantine_drain(quarantine_t *quarantine, size_t upper_bound)
+quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound)
{
while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
- quarantine_drain_one(quarantine);
+ quarantine_drain_one(tsd, quarantine);
}
void
-quarantine(void *ptr)
+quarantine(tsd_t *tsd, void *ptr)
{
quarantine_t *quarantine;
size_t usize = isalloc(ptr, config_prof);
@@ -105,17 +122,8 @@ quarantine(void *ptr)
cassert(config_fill);
assert(opt_quarantine);
- quarantine = *quarantine_tsd_get();
- if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) {
- if (quarantine == QUARANTINE_STATE_PURGATORY) {
- /*
- * Make a note that quarantine() was called after
- * quarantine_cleanup() was called.
- */
- quarantine = QUARANTINE_STATE_REINCARNATED;
- quarantine_tsd_set(&quarantine);
- }
- idalloc(ptr);
+ if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
+ idalloctm(tsd, ptr, NULL, false);
return;
}
/*
@@ -125,11 +133,11 @@ quarantine(void *ptr)
if (quarantine->curbytes + usize > opt_quarantine) {
size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
- usize : 0;
- quarantine_drain(quarantine, upper_bound);
+ quarantine_drain(tsd, quarantine, upper_bound);
}
/* Grow the quarantine ring buffer if it's full. */
if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
- quarantine = quarantine_grow(quarantine);
+ quarantine = quarantine_grow(tsd, quarantine);
/* quarantine_grow() must free a slot if it fails to grow. */
assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
/* Append ptr if its size doesn't exceed the quarantine size. */
@@ -141,12 +149,12 @@ quarantine(void *ptr)
obj->usize = usize;
quarantine->curbytes += usize;
quarantine->curobjs++;
- if (config_fill && opt_junk) {
+ if (config_fill && unlikely(opt_junk_free)) {
/*
* Only do redzone validation if Valgrind isn't in
* operation.
*/
- if ((config_valgrind == false || opt_valgrind == false)
+ if ((!config_valgrind || likely(!in_valgrind))
&& usize <= SMALL_MAXCLASS)
arena_quarantine_junk_small(ptr, usize);
else
@@ -154,46 +162,22 @@ quarantine(void *ptr)
}
} else {
assert(quarantine->curbytes == 0);
- idalloc(ptr);
+ idalloctm(tsd, ptr, NULL, false);
}
}
void
-quarantine_cleanup(void *arg)
-{
- quarantine_t *quarantine = *(quarantine_t **)arg;
-
- if (quarantine == QUARANTINE_STATE_REINCARNATED) {
- /*
- * Another destructor deallocated memory after this destructor
- * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY
- * in order to receive another callback.
- */
- quarantine = QUARANTINE_STATE_PURGATORY;
- quarantine_tsd_set(&quarantine);
- } else if (quarantine == QUARANTINE_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to QUARANTINE_STATE_PURGATORY so that other destructors
- * wouldn't cause re-creation of the quarantine. This time, do
- * nothing, so that the destructor will not be called again.
- */
- } else if (quarantine != NULL) {
- quarantine_drain(quarantine, 0);
- idalloc(quarantine);
- quarantine = QUARANTINE_STATE_PURGATORY;
- quarantine_tsd_set(&quarantine);
- }
-}
-
-bool
-quarantine_boot(void)
+quarantine_cleanup(tsd_t *tsd)
{
+ quarantine_t *quarantine;
- cassert(config_fill);
-
- if (quarantine_tsd_boot())
- return (true);
+ if (!config_fill)
+ return;
- return (false);
+ quarantine = tsd_quarantine_get(tsd);
+ if (quarantine != NULL) {
+ quarantine_drain(tsd, quarantine, 0);
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+ tsd_quarantine_set(tsd, NULL);
+ }
}
diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c
index 205957ac4..af0d97e75 100644
--- a/deps/jemalloc/src/rtree.c
+++ b/deps/jemalloc/src/rtree.c
@@ -1,73 +1,74 @@
#define JEMALLOC_RTREE_C_
#include "jemalloc/internal/jemalloc_internal.h"
-rtree_t *
-rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
+static unsigned
+hmin(unsigned ha, unsigned hb)
{
- rtree_t *ret;
- unsigned bits_per_level, bits_in_leaf, height, i;
+
+ return (ha < hb ? ha : hb);
+}
+
+/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */
+bool
+rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
+ rtree_node_dalloc_t *dalloc)
+{
+ unsigned bits_in_leaf, height, i;
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
- bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
- bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1;
+ bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
+ : (bits % RTREE_BITS_PER_LEVEL);
if (bits > bits_in_leaf) {
- height = 1 + (bits - bits_in_leaf) / bits_per_level;
- if ((height-1) * bits_per_level + bits_in_leaf != bits)
+ height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL;
+ if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits)
height++;
- } else {
+ } else
height = 1;
+ assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
+
+ rtree->alloc = alloc;
+ rtree->dalloc = dalloc;
+ rtree->height = height;
+
+ /* Root level. */
+ rtree->levels[0].subtree = NULL;
+ rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL :
+ bits_in_leaf;
+ rtree->levels[0].cumbits = rtree->levels[0].bits;
+ /* Interior levels. */
+ for (i = 1; i < height-1; i++) {
+ rtree->levels[i].subtree = NULL;
+ rtree->levels[i].bits = RTREE_BITS_PER_LEVEL;
+ rtree->levels[i].cumbits = rtree->levels[i-1].cumbits +
+ RTREE_BITS_PER_LEVEL;
}
- assert((height-1) * bits_per_level + bits_in_leaf >= bits);
-
- ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
- (sizeof(unsigned) * height));
- if (ret == NULL)
- return (NULL);
- memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
- height));
-
- ret->alloc = alloc;
- ret->dalloc = dalloc;
- if (malloc_mutex_init(&ret->mutex)) {
- if (dalloc != NULL)
- dalloc(ret);
- return (NULL);
- }
- ret->height = height;
+ /* Leaf level. */
if (height > 1) {
- if ((height-1) * bits_per_level + bits_in_leaf > bits) {
- ret->level2bits[0] = (bits - bits_in_leaf) %
- bits_per_level;
- } else
- ret->level2bits[0] = bits_per_level;
- for (i = 1; i < height-1; i++)
- ret->level2bits[i] = bits_per_level;
- ret->level2bits[height-1] = bits_in_leaf;
- } else
- ret->level2bits[0] = bits;
+ rtree->levels[height-1].subtree = NULL;
+ rtree->levels[height-1].bits = bits_in_leaf;
+ rtree->levels[height-1].cumbits = bits;
+ }
- ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
- if (ret->root == NULL) {
- if (dalloc != NULL)
- dalloc(ret);
- return (NULL);
+ /* Compute lookup table to be used by rtree_start_level(). */
+ for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
+ rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height -
+ 1);
}
- memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
- return (ret);
+ return (false);
}
static void
-rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
+rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level)
{
- if (level < rtree->height - 1) {
+ if (level + 1 < rtree->height) {
size_t nchildren, i;
- nchildren = ZU(1) << rtree->level2bits[level];
+ nchildren = ZU(1) << rtree->levels[level].bits;
for (i = 0; i < nchildren; i++) {
- void **child = (void **)node[i];
+ rtree_node_elm_t *child = node[i].child;
if (child != NULL)
rtree_delete_subtree(rtree, child, level + 1);
}
@@ -78,28 +79,49 @@ rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
void
rtree_delete(rtree_t *rtree)
{
+ unsigned i;
- rtree_delete_subtree(rtree, rtree->root, 0);
- rtree->dalloc(rtree);
+ for (i = 0; i < rtree->height; i++) {
+ rtree_node_elm_t *subtree = rtree->levels[i].subtree;
+ if (subtree != NULL)
+ rtree_delete_subtree(rtree, subtree, i);
+ }
}
-void
-rtree_prefork(rtree_t *rtree)
+static rtree_node_elm_t *
+rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
{
+ rtree_node_elm_t *node;
+
+ if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+ /*
+ * Another thread is already in the process of initializing.
+ * Spin-wait until initialization is complete.
+ */
+ do {
+ CPU_SPINWAIT;
+ node = atomic_read_p((void **)elmp);
+ } while (node == RTREE_NODE_INITIALIZING);
+ } else {
+ node = rtree->alloc(ZU(1) << rtree->levels[level].bits);
+ if (node == NULL)
+ return (NULL);
+ atomic_write_p((void **)elmp, node);
+ }
- malloc_mutex_prefork(&rtree->mutex);
+ return (node);
}
-void
-rtree_postfork_parent(rtree_t *rtree)
+rtree_node_elm_t *
+rtree_subtree_read_hard(rtree_t *rtree, unsigned level)
{
- malloc_mutex_postfork_parent(&rtree->mutex);
+ return (rtree_node_init(rtree, level, &rtree->levels[level].subtree));
}
-void
-rtree_postfork_child(rtree_t *rtree)
+rtree_node_elm_t *
+rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
{
- malloc_mutex_postfork_child(&rtree->mutex);
+ return (rtree_node_init(rtree, level, &elm->child));
}
diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c
index bef2ab33c..154c3e74c 100644
--- a/deps/jemalloc/src/stats.c
+++ b/deps/jemalloc/src/stats.c
@@ -6,31 +6,22 @@
xmallctl(n, v, &sz, NULL, 0); \
} while (0)
-#define CTL_I_GET(n, v, t) do { \
+#define CTL_M2_GET(n, i, v, t) do { \
size_t mib[6]; \
size_t miblen = sizeof(mib) / sizeof(size_t); \
size_t sz = sizeof(t); \
xmallctlnametomib(n, mib, &miblen); \
- mib[2] = i; \
+ mib[2] = (i); \
xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
} while (0)
-#define CTL_J_GET(n, v, t) do { \
+#define CTL_M2_M4_GET(n, i, j, v, t) do { \
size_t mib[6]; \
size_t miblen = sizeof(mib) / sizeof(size_t); \
size_t sz = sizeof(t); \
xmallctlnametomib(n, mib, &miblen); \
- mib[2] = j; \
- xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
-} while (0)
-
-#define CTL_IJ_GET(n, v, t) do { \
- size_t mib[6]; \
- size_t miblen = sizeof(mib) / sizeof(size_t); \
- size_t sz = sizeof(t); \
- xmallctlnametomib(n, mib, &miblen); \
- mib[2] = i; \
- mib[4] = j; \
+ mib[2] = (i); \
+ mib[4] = (j); \
xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
} while (0)
@@ -48,8 +39,10 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *),
void *cbopaque, unsigned i);
static void stats_arena_lruns_print(void (*write_cb)(void *, const char *),
void *cbopaque, unsigned i);
+static void stats_arena_hchunks_print(
+ void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
static void stats_arena_print(void (*write_cb)(void *, const char *),
- void *cbopaque, unsigned i, bool bins, bool large);
+ void *cbopaque, unsigned i, bool bins, bool large, bool huge);
/******************************************************************************/
@@ -58,100 +51,109 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i)
{
size_t page;
- bool config_tcache;
- unsigned nbins, j, gap_start;
+ bool config_tcache, in_gap;
+ unsigned nbins, j;
CTL_GET("arenas.page", &page, size_t);
CTL_GET("config.tcache", &config_tcache, bool);
if (config_tcache) {
malloc_cprintf(write_cb, cbopaque,
- "bins: bin size regs pgs allocated nmalloc"
- " ndalloc nrequests nfills nflushes"
- " newruns reruns curruns\n");
+ "bins: size ind allocated nmalloc"
+ " ndalloc nrequests curregs curruns regs"
+ " pgs util nfills nflushes newruns"
+ " reruns\n");
} else {
malloc_cprintf(write_cb, cbopaque,
- "bins: bin size regs pgs allocated nmalloc"
- " ndalloc newruns reruns curruns\n");
+ "bins: size ind allocated nmalloc"
+ " ndalloc nrequests curregs curruns regs"
+ " pgs util newruns reruns\n");
}
CTL_GET("arenas.nbins", &nbins, unsigned);
- for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
+ for (j = 0, in_gap = false; j < nbins; j++) {
uint64_t nruns;
- CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
- if (nruns == 0) {
- if (gap_start == UINT_MAX)
- gap_start = j;
- } else {
- size_t reg_size, run_size, allocated;
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns,
+ uint64_t);
+ if (nruns == 0)
+ in_gap = true;
+ else {
+ size_t reg_size, run_size, curregs, availregs, milli;
+ size_t curruns;
uint32_t nregs;
uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
uint64_t reruns;
- size_t curruns;
+ char util[6]; /* "x.yyy". */
- if (gap_start != UINT_MAX) {
- if (j > gap_start + 1) {
- /* Gap of more than one size class. */
- malloc_cprintf(write_cb, cbopaque,
- "[%u..%u]\n", gap_start,
- j - 1);
- } else {
- /* Gap of one size class. */
- malloc_cprintf(write_cb, cbopaque,
- "[%u]\n", gap_start);
- }
- gap_start = UINT_MAX;
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
+ in_gap = false;
}
- CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
- CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
- CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
- &allocated, size_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
+ CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+ CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+ CTL_M2_GET("arenas.bin.0.run_size", j, &run_size,
+ size_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
&nmalloc, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j,
&ndalloc, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j,
+ &curregs, size_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+ &nrequests, uint64_t);
if (config_tcache) {
- CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
- &nrequests, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
- &nfills, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
- &nflushes, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i,
+ j, &nfills, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
+ i, j, &nflushes, uint64_t);
}
- CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
- uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
- size_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j,
+ &reruns, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j,
+ &curruns, size_t);
+
+ availregs = nregs * curruns;
+ milli = (availregs != 0) ? (1000 * curregs) / availregs
+ : 1000;
+ assert(milli <= 1000);
+ if (milli < 10) {
+ malloc_snprintf(util, sizeof(util),
+ "0.00%zu", milli);
+ } else if (milli < 100) {
+ malloc_snprintf(util, sizeof(util), "0.0%zu",
+ milli);
+ } else if (milli < 1000) {
+ malloc_snprintf(util, sizeof(util), "0.%zu",
+ milli);
+ } else
+ malloc_snprintf(util, sizeof(util), "1");
+
if (config_tcache) {
malloc_cprintf(write_cb, cbopaque,
- "%13u %5zu %4u %3zu %12zu %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- j, reg_size, nregs, run_size / page,
- allocated, nmalloc, ndalloc, nrequests,
- nfills, nflushes, nruns, reruns, curruns);
+ "%20zu %3u %12zu %12"FMTu64
+ " %12"FMTu64" %12"FMTu64" %12zu"
+ " %12zu %4u %3zu %-5s %12"FMTu64
+ " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
+ reg_size, j, curregs * reg_size, nmalloc,
+ ndalloc, nrequests, curregs, curruns, nregs,
+ run_size / page, util, nfills, nflushes,
+ nruns, reruns);
} else {
malloc_cprintf(write_cb, cbopaque,
- "%13u %5zu %4u %3zu %12zu %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- j, reg_size, nregs, run_size / page,
- allocated, nmalloc, ndalloc, nruns, reruns,
- curruns);
+ "%20zu %3u %12zu %12"FMTu64
+ " %12"FMTu64" %12"FMTu64" %12zu"
+ " %12zu %4u %3zu %-5s %12"FMTu64
+ " %12"FMTu64"\n",
+ reg_size, j, curregs * reg_size, nmalloc,
+ ndalloc, nrequests, curregs, curruns, nregs,
+ run_size / page, util, nruns, reruns);
}
}
}
- if (gap_start != UINT_MAX) {
- if (j > gap_start + 1) {
- /* Gap of more than one size class. */
- malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
- gap_start, j - 1);
- } else {
- /* Gap of one size class. */
- malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
- }
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
}
}
@@ -159,110 +161,199 @@ static void
stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i)
{
- size_t page, nlruns, j;
- ssize_t gap_start;
-
- CTL_GET("arenas.page", &page, size_t);
+ unsigned nbins, nlruns, j;
+ bool in_gap;
malloc_cprintf(write_cb, cbopaque,
- "large: size pages nmalloc ndalloc nrequests"
- " curruns\n");
- CTL_GET("arenas.nlruns", &nlruns, size_t);
- for (j = 0, gap_start = -1; j < nlruns; j++) {
+ "large: size ind allocated nmalloc ndalloc"
+ " nrequests curruns\n");
+ CTL_GET("arenas.nbins", &nbins, unsigned);
+ CTL_GET("arenas.nlruns", &nlruns, unsigned);
+ for (j = 0, in_gap = false; j < nlruns; j++) {
uint64_t nmalloc, ndalloc, nrequests;
size_t run_size, curruns;
- CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+ CTL_M2_M4_GET("stats.arenas.0.lruns.0.nmalloc", i, j, &nmalloc,
uint64_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+ CTL_M2_M4_GET("stats.arenas.0.lruns.0.ndalloc", i, j, &ndalloc,
uint64_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
- uint64_t);
- if (nrequests == 0) {
- if (gap_start == -1)
- gap_start = j;
- } else {
- CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
+ CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j,
+ &nrequests, uint64_t);
+ if (nrequests == 0)
+ in_gap = true;
+ else {
+ CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
+ CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j,
+ &curruns, size_t);
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
+ in_gap = false;
+ }
+ malloc_cprintf(write_cb, cbopaque,
+ "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64" %12zu\n",
+ run_size, nbins + j, curruns * run_size, nmalloc,
+ ndalloc, nrequests, curruns);
+ }
+ }
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
+ }
+}
+
+static void
+stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
+ void *cbopaque, unsigned i)
+{
+ unsigned nbins, nlruns, nhchunks, j;
+ bool in_gap;
+
+ malloc_cprintf(write_cb, cbopaque,
+ "huge: size ind allocated nmalloc ndalloc"
+ " nrequests curhchunks\n");
+ CTL_GET("arenas.nbins", &nbins, unsigned);
+ CTL_GET("arenas.nlruns", &nlruns, unsigned);
+ CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+ for (j = 0, in_gap = false; j < nhchunks; j++) {
+ uint64_t nmalloc, ndalloc, nrequests;
+ size_t hchunk_size, curhchunks;
+
+ CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nmalloc", i, j,
+ &nmalloc, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.hchunks.0.ndalloc", i, j,
+ &ndalloc, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j,
+ &nrequests, uint64_t);
+ if (nrequests == 0)
+ in_gap = true;
+ else {
+ CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size,
size_t);
- if (gap_start != -1) {
- malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
- j - gap_start);
- gap_start = -1;
+ CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i,
+ j, &curhchunks, size_t);
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
+ in_gap = false;
}
malloc_cprintf(write_cb, cbopaque,
- "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- run_size, run_size / page, nmalloc, ndalloc,
- nrequests, curruns);
+ "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64" %12zu\n",
+ hchunk_size, nbins + nlruns + j,
+ curhchunks * hchunk_size, nmalloc, ndalloc,
+ nrequests, curhchunks);
}
}
- if (gap_start != -1)
- malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+ if (in_gap) {
+ malloc_cprintf(write_cb, cbopaque,
+ " ---\n");
+ }
}
static void
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
- unsigned i, bool bins, bool large)
+ unsigned i, bool bins, bool large, bool huge)
{
unsigned nthreads;
const char *dss;
+ ssize_t lg_dirty_mult;
size_t page, pactive, pdirty, mapped;
+ size_t metadata_mapped, metadata_allocated;
uint64_t npurge, nmadvise, purged;
size_t small_allocated;
uint64_t small_nmalloc, small_ndalloc, small_nrequests;
size_t large_allocated;
uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+ size_t huge_allocated;
+ uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests;
CTL_GET("arenas.page", &page, size_t);
- CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+ CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
malloc_cprintf(write_cb, cbopaque,
"assigned threads: %u\n", nthreads);
- CTL_I_GET("stats.arenas.0.dss", &dss, const char *);
+ CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
dss);
- CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
- CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
- CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
- CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
- CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
+ CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
+ if (lg_dirty_mult >= 0) {
+ malloc_cprintf(write_cb, cbopaque,
+ "min active:dirty page ratio: %u:1\n",
+ (1U << lg_dirty_mult));
+ } else {
+ malloc_cprintf(write_cb, cbopaque,
+ "min active:dirty page ratio: N/A\n");
+ }
+ CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
+ CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
+ CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
+ CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
+ CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
malloc_cprintf(write_cb, cbopaque,
- "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
- " %"PRIu64" madvise%s, %"PRIu64" purged\n",
- pactive, pdirty, npurge, npurge == 1 ? "" : "s",
- nmadvise, nmadvise == 1 ? "" : "s", purged);
+ "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64
+ " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge ==
+ 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged);
malloc_cprintf(write_cb, cbopaque,
- " allocated nmalloc ndalloc nrequests\n");
- CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
- CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
+ " allocated nmalloc ndalloc"
+ " nrequests\n");
+ CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
+ size_t);
+ CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
+ uint64_t);
malloc_cprintf(write_cb, cbopaque,
- "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+ "small: %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64"\n",
small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
- CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
- CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
+ CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
+ size_t);
+ CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
+ uint64_t);
malloc_cprintf(write_cb, cbopaque,
- "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+ "large: %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64"\n",
large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+ CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
+ CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
+ CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests,
+ uint64_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "huge: %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64"\n",
+ huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
+ malloc_cprintf(write_cb, cbopaque,
+ "total: %12zu %12"FMTu64" %12"FMTu64
+ " %12"FMTu64"\n",
+ small_allocated + large_allocated + huge_allocated,
+ small_nmalloc + large_nmalloc + huge_nmalloc,
+ small_ndalloc + large_ndalloc + huge_ndalloc,
+ small_nrequests + large_nrequests + huge_nrequests);
+ malloc_cprintf(write_cb, cbopaque,
+ "active: %12zu\n", pactive * page);
+ CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
malloc_cprintf(write_cb, cbopaque,
- "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- small_allocated + large_allocated,
- small_nmalloc + large_nmalloc,
- small_ndalloc + large_ndalloc,
- small_nrequests + large_nrequests);
- malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page);
- CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
- malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped);
+ "mapped: %12zu\n", mapped);
+ CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
+ size_t);
+ CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
+ size_t);
+ malloc_cprintf(write_cb, cbopaque,
+ "metadata: mapped: %zu, allocated: %zu\n",
+ metadata_mapped, metadata_allocated);
if (bins)
stats_arena_bins_print(write_cb, cbopaque, i);
if (large)
stats_arena_lruns_print(write_cb, cbopaque, i);
+ if (huge)
+ stats_arena_hchunks_print(write_cb, cbopaque, i);
}
void
@@ -277,6 +368,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
bool unmerged = true;
bool bins = true;
bool large = true;
+ bool huge = true;
/*
* Refresh stats, in case mallctl() was called by the application.
@@ -319,6 +411,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
case 'l':
large = false;
break;
+ case 'h':
+ huge = false;
+ break;
default:;
}
}
@@ -327,7 +422,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
malloc_cprintf(write_cb, cbopaque,
"___ Begin jemalloc statistics ___\n");
if (general) {
- int err;
const char *cpv;
bool bv;
unsigned uv;
@@ -346,26 +440,40 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
bv ? "enabled" : "disabled");
#define OPT_WRITE_BOOL(n) \
- if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \
- == 0) { \
+ if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \
malloc_cprintf(write_cb, cbopaque, \
" opt."#n": %s\n", bv ? "true" : "false"); \
}
+#define OPT_WRITE_BOOL_MUTABLE(n, m) { \
+ bool bv2; \
+ if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 && \
+ je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) { \
+ malloc_cprintf(write_cb, cbopaque, \
+ " opt."#n": %s ("#m": %s)\n", bv ? "true" \
+ : "false", bv2 ? "true" : "false"); \
+ } \
+}
#define OPT_WRITE_SIZE_T(n) \
- if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \
- == 0) { \
+ if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \
malloc_cprintf(write_cb, cbopaque, \
" opt."#n": %zu\n", sv); \
}
#define OPT_WRITE_SSIZE_T(n) \
- if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \
- == 0) { \
+ if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) { \
malloc_cprintf(write_cb, cbopaque, \
" opt."#n": %zd\n", ssv); \
}
+#define OPT_WRITE_SSIZE_T_MUTABLE(n, m) { \
+ ssize_t ssv2; \
+ if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 && \
+ je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) { \
+ malloc_cprintf(write_cb, cbopaque, \
+ " opt."#n": %zd ("#m": %zd)\n", \
+ ssv, ssv2); \
+ } \
+}
#define OPT_WRITE_CHAR_P(n) \
- if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \
- == 0) { \
+ if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) { \
malloc_cprintf(write_cb, cbopaque, \
" opt."#n": \"%s\"\n", cpv); \
}
@@ -376,9 +484,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_SIZE_T(lg_chunk)
OPT_WRITE_CHAR_P(dss)
OPT_WRITE_SIZE_T(narenas)
- OPT_WRITE_SSIZE_T(lg_dirty_mult)
+ OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult)
OPT_WRITE_BOOL(stats_print)
- OPT_WRITE_BOOL(junk)
+ OPT_WRITE_CHAR_P(junk)
OPT_WRITE_SIZE_T(quarantine)
OPT_WRITE_BOOL(redzone)
OPT_WRITE_BOOL(zero)
@@ -389,7 +497,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_SSIZE_T(lg_tcache_max)
OPT_WRITE_BOOL(prof)
OPT_WRITE_CHAR_P(prof_prefix)
- OPT_WRITE_BOOL(prof_active)
+ OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
+ OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init,
+ prof.thread_active_init)
OPT_WRITE_SSIZE_T(lg_prof_sample)
OPT_WRITE_BOOL(prof_accum)
OPT_WRITE_SSIZE_T(lg_prof_interval)
@@ -398,6 +508,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_BOOL(prof_leak)
#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_BOOL_MUTABLE
#undef OPT_WRITE_SIZE_T
#undef OPT_WRITE_SSIZE_T
#undef OPT_WRITE_CHAR_P
@@ -411,12 +522,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
sizeof(void *));
CTL_GET("arenas.quantum", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+ malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n",
+ sv);
CTL_GET("arenas.page", &sv, size_t);
malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
- CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
+ CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
if (ssv >= 0) {
malloc_cprintf(write_cb, cbopaque,
"Min active:dirty page ratio per arena: %u:1\n",
@@ -425,22 +537,20 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
malloc_cprintf(write_cb, cbopaque,
"Min active:dirty page ratio per arena: N/A\n");
}
- if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
- == 0) {
+ if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
malloc_cprintf(write_cb, cbopaque,
"Maximum thread-cached size class: %zu\n", sv);
}
- if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
- bv) {
- CTL_GET("opt.lg_prof_sample", &sv, size_t);
+ if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) {
+ CTL_GET("prof.lg_sample", &sv, size_t);
malloc_cprintf(write_cb, cbopaque,
- "Average profile sample interval: %"PRIu64
+ "Average profile sample interval: %"FMTu64
" (2^%zu)\n", (((uint64_t)1U) << sv), sv);
CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
if (ssv >= 0) {
malloc_cprintf(write_cb, cbopaque,
- "Average profile dump interval: %"PRIu64
+ "Average profile dump interval: %"FMTu64
" (2^%zd)\n",
(((uint64_t)1U) << ssv), ssv);
} else {
@@ -449,47 +559,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
}
CTL_GET("opt.lg_chunk", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n",
- (ZU(1) << sv), sv);
+ malloc_cprintf(write_cb, cbopaque,
+ "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv);
}
if (config_stats) {
size_t *cactive;
- size_t allocated, active, mapped;
- size_t chunks_current, chunks_high;
- uint64_t chunks_total;
- size_t huge_allocated;
- uint64_t huge_nmalloc, huge_ndalloc;
+ size_t allocated, active, metadata, resident, mapped;
CTL_GET("stats.cactive", &cactive, size_t *);
CTL_GET("stats.allocated", &allocated, size_t);
CTL_GET("stats.active", &active, size_t);
+ CTL_GET("stats.metadata", &metadata, size_t);
+ CTL_GET("stats.resident", &resident, size_t);
CTL_GET("stats.mapped", &mapped, size_t);
malloc_cprintf(write_cb, cbopaque,
- "Allocated: %zu, active: %zu, mapped: %zu\n",
- allocated, active, mapped);
- malloc_cprintf(write_cb, cbopaque,
- "Current active ceiling: %zu\n", atomic_read_z(cactive));
-
- /* Print chunk stats. */
- CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
- CTL_GET("stats.chunks.high", &chunks_high, size_t);
- CTL_GET("stats.chunks.current", &chunks_current, size_t);
- malloc_cprintf(write_cb, cbopaque, "chunks: nchunks "
- "highchunks curchunks\n");
- malloc_cprintf(write_cb, cbopaque,
- " %13"PRIu64" %12zu %12zu\n",
- chunks_total, chunks_high, chunks_current);
-
- /* Print huge stats. */
- CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
- CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
- CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
- malloc_cprintf(write_cb, cbopaque,
- "huge: nmalloc ndalloc allocated\n");
+ "Allocated: %zu, active: %zu, metadata: %zu,"
+ " resident: %zu, mapped: %zu\n",
+ allocated, active, metadata, resident, mapped);
malloc_cprintf(write_cb, cbopaque,
- " %12"PRIu64" %12"PRIu64" %12zu\n",
- huge_nmalloc, huge_ndalloc, huge_allocated);
+ "Current active ceiling: %zu\n",
+ atomic_read_z(cactive));
if (merged) {
unsigned narenas;
@@ -508,12 +598,12 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
ninitialized++;
}
- if (ninitialized > 1 || unmerged == false) {
+ if (ninitialized > 1 || !unmerged) {
/* Print merged arena stats. */
malloc_cprintf(write_cb, cbopaque,
"\nMerged arenas stats:\n");
stats_arena_print(write_cb, cbopaque,
- narenas, bins, large);
+ narenas, bins, large, huge);
}
}
}
@@ -539,7 +629,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
cbopaque,
"\narenas[%u]:\n", i);
stats_arena_print(write_cb,
- cbopaque, i, bins, large);
+ cbopaque, i, bins, large,
+ huge);
}
}
}
diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c
index 6de92960b..fdafd0c62 100644
--- a/deps/jemalloc/src/tcache.c
+++ b/deps/jemalloc/src/tcache.c
@@ -4,9 +4,6 @@
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, tcache, tcache_t *, NULL)
-malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
-
bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
@@ -16,6 +13,14 @@ static unsigned stack_nelms; /* Total stack elms per tcache. */
size_t nhbins;
size_t tcache_maxclass;
+tcaches_t *tcaches;
+
+/* Index of first element within tcaches that has never been used. */
+static unsigned tcaches_past;
+
+/* Head of singly linked list tracking available tcaches elements. */
+static tcaches_t *tcaches_avail;
+
/******************************************************************************/
size_t tcache_salloc(const void *ptr)
@@ -25,9 +30,9 @@ size_t tcache_salloc(const void *ptr)
}
void
-tcache_event_hard(tcache_t *tcache)
+tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
{
- size_t binind = tcache->next_gc_bin;
+ szind_t binind = tcache->next_gc_bin;
tcache_bin_t *tbin = &tcache->tbins[binind];
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
@@ -36,11 +41,12 @@ tcache_event_hard(tcache_t *tcache)
* Flush (ceiling) 3/4 of the objects below the low water mark.
*/
if (binind < NBINS) {
- tcache_bin_flush_small(tbin, binind, tbin->ncached -
- tbin->low_water + (tbin->low_water >> 2), tcache);
+ tcache_bin_flush_small(tsd, tcache, tbin, binind,
+ tbin->ncached - tbin->low_water + (tbin->low_water
+ >> 2));
} else {
- tcache_bin_flush_large(tbin, binind, tbin->ncached -
- tbin->low_water + (tbin->low_water >> 2), tcache);
+ tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
+ - tbin->low_water + (tbin->low_water >> 2), tcache);
}
/*
* Reduce fill count by 2X. Limit lg_fill_div such that the
@@ -65,12 +71,13 @@ tcache_event_hard(tcache_t *tcache)
}
void *
-tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ tcache_bin_t *tbin, szind_t binind)
{
void *ret;
- arena_tcache_fill_small(tcache->arena, tbin, binind,
- config_prof ? tcache->prof_accumbytes : 0);
+ arena_tcache_fill_small(arena, tbin, binind, config_prof ?
+ tcache->prof_accumbytes : 0);
if (config_prof)
tcache->prof_accumbytes = 0;
ret = tcache_alloc_easy(tbin);
@@ -79,9 +86,10 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
}
void
-tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache)
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+ szind_t binind, unsigned rem)
{
+ arena_t *arena;
void *ptr;
unsigned i, nflush, ndeferred;
bool merged_stats = false;
@@ -89,22 +97,24 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
assert(binind < NBINS);
assert(rem <= tbin->ncached);
+ arena = arena_choose(tsd, NULL);
+ assert(arena != NULL);
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena bin associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
- arena_t *arena = chunk->arena;
- arena_bin_t *bin = &arena->bins[binind];
+ arena_t *bin_arena = extent_node_arena_get(&chunk->node);
+ arena_bin_t *bin = &bin_arena->bins[binind];
- if (config_prof && arena == tcache->arena) {
+ if (config_prof && bin_arena == arena) {
if (arena_prof_accum(arena, tcache->prof_accumbytes))
prof_idump();
tcache->prof_accumbytes = 0;
}
malloc_mutex_lock(&bin->lock);
- if (config_stats && arena == tcache->arena) {
- assert(merged_stats == false);
+ if (config_stats && bin_arena == arena) {
+ assert(!merged_stats);
merged_stats = true;
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
@@ -115,17 +125,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
ptr = tbin->avail[i];
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk->arena == arena) {
+ if (extent_node_arena_get(&chunk->node) == bin_arena) {
size_t pageind = ((uintptr_t)ptr -
(uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm =
- arena_mapp_get(chunk, pageind);
- if (config_fill && opt_junk) {
- arena_alloc_junk_small(ptr,
- &arena_bin_info[binind], true);
- }
- arena_dalloc_bin_locked(arena, chunk, ptr,
- mapelm);
+ arena_chunk_map_bits_t *bitselm =
+ arena_bitselm_get(chunk, pageind);
+ arena_dalloc_bin_junked_locked(bin_arena, chunk,
+ ptr, bitselm);
} else {
/*
* This object was allocated via a different
@@ -139,12 +145,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
}
malloc_mutex_unlock(&bin->lock);
}
- if (config_stats && merged_stats == false) {
+ if (config_stats && !merged_stats) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
- arena_bin_t *bin = &tcache->arena->bins[binind];
+ arena_bin_t *bin = &arena->bins[binind];
malloc_mutex_lock(&bin->lock);
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
@@ -160,9 +166,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
}
void
-tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache)
+tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+ unsigned rem, tcache_t *tcache)
{
+ arena_t *arena;
void *ptr;
unsigned i, nflush, ndeferred;
bool merged_stats = false;
@@ -170,17 +177,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
assert(binind < nhbins);
assert(rem <= tbin->ncached);
+ arena = arena_choose(tsd, NULL);
+ assert(arena != NULL);
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
tbin->avail[0]);
- arena_t *arena = chunk->arena;
+ arena_t *locked_arena = extent_node_arena_get(&chunk->node);
UNUSED bool idump;
if (config_prof)
idump = false;
- malloc_mutex_lock(&arena->lock);
- if ((config_prof || config_stats) && arena == tcache->arena) {
+ malloc_mutex_lock(&locked_arena->lock);
+ if ((config_prof || config_stats) && locked_arena == arena) {
if (config_prof) {
idump = arena_prof_accum_locked(arena,
tcache->prof_accumbytes);
@@ -200,9 +209,11 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
ptr = tbin->avail[i];
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk->arena == arena)
- arena_dalloc_large_locked(arena, chunk, ptr);
- else {
+ if (extent_node_arena_get(&chunk->node) ==
+ locked_arena) {
+ arena_dalloc_large_junked_locked(locked_arena,
+ chunk, ptr);
+ } else {
/*
* This object was allocated via a different
* arena than the one that is currently locked.
@@ -213,16 +224,15 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
ndeferred++;
}
}
- malloc_mutex_unlock(&arena->lock);
+ malloc_mutex_unlock(&locked_arena->lock);
if (config_prof && idump)
prof_idump();
}
- if (config_stats && merged_stats == false) {
+ if (config_stats && !merged_stats) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
- arena_t *arena = tcache->arena;
malloc_mutex_lock(&arena->lock);
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[binind - NBINS].nrequests +=
@@ -249,24 +259,58 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena)
ql_tail_insert(&arena->tcache_ql, tcache, link);
malloc_mutex_unlock(&arena->lock);
}
- tcache->arena = arena;
}
void
-tcache_arena_dissociate(tcache_t *tcache)
+tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena)
+{
+
+ tcache_arena_dissociate(tcache, oldarena);
+ tcache_arena_associate(tcache, newarena);
+}
+
+void
+tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
{
if (config_stats) {
/* Unlink from list of extant tcaches. */
- malloc_mutex_lock(&tcache->arena->lock);
- ql_remove(&tcache->arena->tcache_ql, tcache, link);
- tcache_stats_merge(tcache, tcache->arena);
- malloc_mutex_unlock(&tcache->arena->lock);
+ malloc_mutex_lock(&arena->lock);
+ if (config_debug) {
+ bool in_ql = false;
+ tcache_t *iter;
+ ql_foreach(iter, &arena->tcache_ql, link) {
+ if (iter == tcache) {
+ in_ql = true;
+ break;
+ }
+ }
+ assert(in_ql);
+ }
+ ql_remove(&arena->tcache_ql, tcache, link);
+ tcache_stats_merge(tcache, arena);
+ malloc_mutex_unlock(&arena->lock);
}
}
tcache_t *
-tcache_create(arena_t *arena)
+tcache_get_hard(tsd_t *tsd)
+{
+ arena_t *arena;
+
+ if (!tcache_enabled_get()) {
+ if (tsd_nominal(tsd))
+ tcache_enabled_set(false); /* Memoize. */
+ return (NULL);
+ }
+ arena = arena_choose(tsd, NULL);
+ if (unlikely(arena == NULL))
+ return (NULL);
+ return (tcache_create(tsd, arena));
+}
+
+tcache_t *
+tcache_create(tsd_t *tsd, arena_t *arena)
{
tcache_t *tcache;
size_t size, stack_offset;
@@ -277,23 +321,10 @@ tcache_create(arena_t *arena)
size = PTR_CEILING(size);
stack_offset = size;
size += stack_nelms * sizeof(void *);
- /*
- * Round up to the nearest multiple of the cacheline size, in order to
- * avoid the possibility of false cacheline sharing.
- *
- * That this works relies on the same logic as in ipalloc(), but we
- * cannot directly call ipalloc() here due to tcache bootstrapping
- * issues.
- */
- size = (size + CACHELINE_MASK) & (-CACHELINE);
-
- if (size <= SMALL_MAXCLASS)
- tcache = (tcache_t *)arena_malloc_small(arena, size, true);
- else if (size <= tcache_maxclass)
- tcache = (tcache_t *)arena_malloc_large(arena, size, true);
- else
- tcache = (tcache_t *)icalloct(size, false, arena);
+ /* Avoid false cacheline sharing. */
+ size = sa2u(size, CACHELINE);
+ tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get());
if (tcache == NULL)
return (NULL);
@@ -307,25 +338,23 @@ tcache_create(arena_t *arena)
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
}
- tcache_tsd_set(&tcache);
-
return (tcache);
}
-void
-tcache_destroy(tcache_t *tcache)
+static void
+tcache_destroy(tsd_t *tsd, tcache_t *tcache)
{
+ arena_t *arena;
unsigned i;
- size_t tcache_size;
- tcache_arena_dissociate(tcache);
+ arena = arena_choose(tsd, NULL);
+ tcache_arena_dissociate(tcache, arena);
for (i = 0; i < NBINS; i++) {
tcache_bin_t *tbin = &tcache->tbins[i];
- tcache_bin_flush_small(tbin, i, 0, tcache);
+ tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
if (config_stats && tbin->tstats.nrequests != 0) {
- arena_t *arena = tcache->arena;
arena_bin_t *bin = &arena->bins[i];
malloc_mutex_lock(&bin->lock);
bin->stats.nrequests += tbin->tstats.nrequests;
@@ -335,10 +364,9 @@ tcache_destroy(tcache_t *tcache)
for (; i < nhbins; i++) {
tcache_bin_t *tbin = &tcache->tbins[i];
- tcache_bin_flush_large(tbin, i, 0, tcache);
+ tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
if (config_stats && tbin->tstats.nrequests != 0) {
- arena_t *arena = tcache->arena;
malloc_mutex_lock(&arena->lock);
arena->stats.nrequests_large += tbin->tstats.nrequests;
arena->stats.lstats[i - NBINS].nrequests +=
@@ -348,57 +376,33 @@ tcache_destroy(tcache_t *tcache)
}
if (config_prof && tcache->prof_accumbytes > 0 &&
- arena_prof_accum(tcache->arena, tcache->prof_accumbytes))
+ arena_prof_accum(arena, tcache->prof_accumbytes))
prof_idump();
- tcache_size = arena_salloc(tcache, false);
- if (tcache_size <= SMALL_MAXCLASS) {
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
- arena_t *arena = chunk->arena;
- size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
- LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
-
- arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
- } else if (tcache_size <= tcache_maxclass) {
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
- arena_t *arena = chunk->arena;
-
- arena_dalloc_large(arena, chunk, tcache);
- } else
- idalloct(tcache, false);
+ idalloctm(tsd, tcache, false, true);
}
void
-tcache_thread_cleanup(void *arg)
+tcache_cleanup(tsd_t *tsd)
{
- tcache_t *tcache = *(tcache_t **)arg;
+ tcache_t *tcache;
- if (tcache == TCACHE_STATE_DISABLED) {
- /* Do nothing. */
- } else if (tcache == TCACHE_STATE_REINCARNATED) {
- /*
- * Another destructor called an allocator function after this
- * destructor was called. Reset tcache to
- * TCACHE_STATE_PURGATORY in order to receive another callback.
- */
- tcache = TCACHE_STATE_PURGATORY;
- tcache_tsd_set(&tcache);
- } else if (tcache == TCACHE_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
- * cause re-creation of the tcache. This time, do nothing, so
- * that the destructor will not be called again.
- */
- } else if (tcache != NULL) {
- assert(tcache != TCACHE_STATE_PURGATORY);
- tcache_destroy(tcache);
- tcache = TCACHE_STATE_PURGATORY;
- tcache_tsd_set(&tcache);
+ if (!config_tcache)
+ return;
+
+ if ((tcache = tsd_tcache_get(tsd)) != NULL) {
+ tcache_destroy(tsd, tcache);
+ tsd_tcache_set(tsd, NULL);
}
}
+void
+tcache_enabled_cleanup(tsd_t *tsd)
+{
+
+ /* Do nothing. */
+}
+
/* Caller must own arena->lock. */
void
tcache_stats_merge(tcache_t *tcache, arena_t *arena)
@@ -427,22 +431,82 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
}
bool
-tcache_boot0(void)
+tcaches_create(tsd_t *tsd, unsigned *r_ind)
+{
+ tcache_t *tcache;
+ tcaches_t *elm;
+
+ if (tcaches == NULL) {
+ tcaches = base_alloc(sizeof(tcache_t *) *
+ (MALLOCX_TCACHE_MAX+1));
+ if (tcaches == NULL)
+ return (true);
+ }
+
+ if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
+ return (true);
+ tcache = tcache_create(tsd, a0get());
+ if (tcache == NULL)
+ return (true);
+
+ if (tcaches_avail != NULL) {
+ elm = tcaches_avail;
+ tcaches_avail = tcaches_avail->next;
+ elm->tcache = tcache;
+ *r_ind = elm - tcaches;
+ } else {
+ elm = &tcaches[tcaches_past];
+ elm->tcache = tcache;
+ *r_ind = tcaches_past;
+ tcaches_past++;
+ }
+
+ return (false);
+}
+
+static void
+tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
+{
+
+ if (elm->tcache == NULL)
+ return;
+ tcache_destroy(tsd, elm->tcache);
+ elm->tcache = NULL;
+}
+
+void
+tcaches_flush(tsd_t *tsd, unsigned ind)
+{
+
+ tcaches_elm_flush(tsd, &tcaches[ind]);
+}
+
+void
+tcaches_destroy(tsd_t *tsd, unsigned ind)
+{
+ tcaches_t *elm = &tcaches[ind];
+ tcaches_elm_flush(tsd, elm);
+ elm->next = tcaches_avail;
+ tcaches_avail = elm;
+}
+
+bool
+tcache_boot(void)
{
unsigned i;
/*
- * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
+ * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
* known.
*/
if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
tcache_maxclass = SMALL_MAXCLASS;
- else if ((1U << opt_lg_tcache_max) > arena_maxclass)
- tcache_maxclass = arena_maxclass;
+ else if ((1U << opt_lg_tcache_max) > large_maxclass)
+ tcache_maxclass = large_maxclass;
else
tcache_maxclass = (1U << opt_lg_tcache_max);
- nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
+ nhbins = size2index(tcache_maxclass) + 1;
/* Initialize tcache_bin_info. */
tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
@@ -451,7 +515,11 @@ tcache_boot0(void)
return (true);
stack_nelms = 0;
for (i = 0; i < NBINS; i++) {
- if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+ if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+ tcache_bin_info[i].ncached_max =
+ TCACHE_NSLOTS_SMALL_MIN;
+ } else if ((arena_bin_info[i].nregs << 1) <=
+ TCACHE_NSLOTS_SMALL_MAX) {
tcache_bin_info[i].ncached_max =
(arena_bin_info[i].nregs << 1);
} else {
@@ -467,13 +535,3 @@ tcache_boot0(void)
return (false);
}
-
-bool
-tcache_boot1(void)
-{
-
- if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
- return (true);
-
- return (false);
-}
diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c
index 700caabfe..9ffe9afef 100644
--- a/deps/jemalloc/src/tsd.c
+++ b/deps/jemalloc/src/tsd.c
@@ -7,21 +7,22 @@
static unsigned ncleanups;
static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
+
/******************************************************************************/
void *
malloc_tsd_malloc(size_t size)
{
- /* Avoid choose_arena() in order to dodge bootstrapping issues. */
- return (arena_malloc(arenas[0], size, false, false));
+ return (a0malloc(CACHELINE_CEILING(size)));
}
void
malloc_tsd_dalloc(void *wrapper)
{
- idalloct(wrapper, false);
+ a0dalloc(wrapper);
}
void
@@ -67,10 +68,61 @@ malloc_tsd_cleanup_register(bool (*f)(void))
}
void
-malloc_tsd_boot(void)
+tsd_cleanup(void *arg)
+{
+ tsd_t *tsd = (tsd_t *)arg;
+
+ switch (tsd->state) {
+ case tsd_state_uninitialized:
+ /* Do nothing. */
+ break;
+ case tsd_state_nominal:
+#define O(n, t) \
+ n##_cleanup(tsd);
+MALLOC_TSD
+#undef O
+ tsd->state = tsd_state_purgatory;
+ tsd_set(tsd);
+ break;
+ case tsd_state_purgatory:
+ /*
+ * The previous time this destructor was called, we set the
+ * state to tsd_state_purgatory so that other destructors
+ * wouldn't cause re-creation of the tsd. This time, do
+ * nothing, and do not request another callback.
+ */
+ break;
+ case tsd_state_reincarnated:
+ /*
+ * Another destructor deallocated memory after this destructor
+ * was called. Reset state to tsd_state_purgatory and request
+ * another callback.
+ */
+ tsd->state = tsd_state_purgatory;
+ tsd_set(tsd);
+ break;
+ default:
+ not_reached();
+ }
+}
+
+bool
+malloc_tsd_boot0(void)
{
ncleanups = 0;
+ if (tsd_boot0())
+ return (true);
+ *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true;
+ return (false);
+}
+
+void
+malloc_tsd_boot1(void)
+{
+
+ tsd_boot1();
+ *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false;
}
#ifdef _WIN32
@@ -102,7 +154,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
# pragma section(".CRT$XLY",long,read)
#endif
JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL,
+static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
#endif
diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c
index 93a19fd16..4cb0d6c1e 100644
--- a/deps/jemalloc/src/util.c
+++ b/deps/jemalloc/src/util.c
@@ -81,10 +81,10 @@ buferror(int err, char *buf, size_t buflen)
{
#ifdef _WIN32
- FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0,
+ FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
(LPSTR)buf, buflen, NULL);
return (0);
-#elif defined(_GNU_SOURCE)
+#elif defined(__GLIBC__) && defined(_GNU_SOURCE)
char *b = strerror_r(err, buf, buflen);
if (b != buf) {
strncpy(buf, b, buflen);
@@ -100,7 +100,7 @@ uintmax_t
malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
{
uintmax_t ret, digit;
- int b;
+ unsigned b;
bool neg;
const char *p, *ns;
@@ -266,7 +266,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p)
sign = '-';
switch (sign) {
case '-':
- if (neg == false)
+ if (!neg)
break;
/* Fall through. */
case ' ':
@@ -329,7 +329,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
/* Left padding. */ \
size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \
(size_t)width - slen : 0); \
- if (left_justify == false && pad_len != 0) { \
+ if (!left_justify && pad_len != 0) { \
size_t j; \
for (j = 0; j < pad_len; j++) \
APPEND_C(' '); \
@@ -381,7 +381,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
case 'p': /* Synthetic; used for %p. */ \
val = va_arg(ap, uintptr_t); \
break; \
- default: not_reached(); \
+ default: \
+ not_reached(); \
+ val = 0; \
} \
} while (0)
@@ -404,19 +406,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
while (true) {
switch (*f) {
case '#':
- assert(alt_form == false);
+ assert(!alt_form);
alt_form = true;
break;
case '-':
- assert(left_justify == false);
+ assert(!left_justify);
left_justify = true;
break;
case ' ':
- assert(plus_space == false);
+ assert(!plus_space);
plus_space = true;
break;
case '+':
- assert(plus_plus == false);
+ assert(!plus_plus);
plus_plus = true;
break;
default: goto label_width;
@@ -548,7 +550,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
assert(len == '?' || len == 'l');
assert_not_implemented(len != 'l');
s = va_arg(ap, char *);
- slen = (prec < 0) ? strlen(s) : prec;
+ slen = (prec < 0) ? strlen(s) : (size_t)prec;
APPEND_PADDED_S(s, slen, width, left_justify);
f++;
break;
@@ -584,7 +586,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
return (ret);
}
-JEMALLOC_ATTR(format(printf, 3, 4))
+JEMALLOC_FORMAT_PRINTF(3, 4)
int
malloc_snprintf(char *str, size_t size, const char *format, ...)
{
@@ -623,7 +625,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
* Print to a callback function in such a way as to (hopefully) avoid memory
* allocation.
*/
-JEMALLOC_ATTR(format(printf, 3, 4))
+JEMALLOC_FORMAT_PRINTF(3, 4)
void
malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, ...)
@@ -636,7 +638,7 @@ malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
}
/* Print to stderr in such a way as to avoid memory allocation. */
-JEMALLOC_ATTR(format(printf, 1, 2))
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
malloc_printf(const char *format, ...)
{
diff --git a/deps/jemalloc/src/valgrind.c b/deps/jemalloc/src/valgrind.c
new file mode 100644
index 000000000..8e7ef3a2e
--- /dev/null
+++ b/deps/jemalloc/src/valgrind.c
@@ -0,0 +1,34 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_VALGRIND
+# error "This source file is for Valgrind integration."
+#endif
+
+#include <valgrind/memcheck.h>
+
+void
+valgrind_make_mem_noaccess(void *ptr, size_t usize)
+{
+
+ VALGRIND_MAKE_MEM_NOACCESS(ptr, usize);
+}
+
+void
+valgrind_make_mem_undefined(void *ptr, size_t usize)
+{
+
+ VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize);
+}
+
+void
+valgrind_make_mem_defined(void *ptr, size_t usize)
+{
+
+ VALGRIND_MAKE_MEM_DEFINED(ptr, usize);
+}
+
+void
+valgrind_freelike_block(void *ptr, size_t usize)
+{
+
+ VALGRIND_FREELIKE_BLOCK(ptr, usize);
+}
diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c
index e0302ef4e..12e1734a9 100644
--- a/deps/jemalloc/src/zone.c
+++ b/deps/jemalloc/src/zone.c
@@ -176,6 +176,7 @@ register_zone(void)
* register jemalloc's.
*/
malloc_zone_t *default_zone = malloc_default_zone();
+ malloc_zone_t *purgeable_zone = NULL;
if (!default_zone->zone_name ||
strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
return;
@@ -237,22 +238,37 @@ register_zone(void)
* run time.
*/
if (malloc_default_purgeable_zone != NULL)
- malloc_default_purgeable_zone();
+ purgeable_zone = malloc_default_purgeable_zone();
/* Register the custom zone. At this point it won't be the default. */
malloc_zone_register(&zone);
- /*
- * Unregister and reregister the default zone. On OSX >= 10.6,
- * unregistering takes the last registered zone and places it at the
- * location of the specified zone. Unregistering the default zone thus
- * makes the last registered one the default. On OSX < 10.6,
- * unregistering shifts all registered zones. The first registered zone
- * then becomes the default.
- */
do {
default_zone = malloc_default_zone();
+ /*
+ * Unregister and reregister the default zone. On OSX >= 10.6,
+ * unregistering takes the last registered zone and places it
+ * at the location of the specified zone. Unregistering the
+ * default zone thus makes the last registered one the default.
+ * On OSX < 10.6, unregistering shifts all registered zones.
+ * The first registered zone then becomes the default.
+ */
malloc_zone_unregister(default_zone);
malloc_zone_register(default_zone);
+ /*
+ * On OSX 10.6, having the default purgeable zone appear before
+ * the default zone makes some things crash because it thinks it
+ * owns the default zone allocated pointers. We thus
+ * unregister/re-register it in order to ensure it's always
+ * after the default zone. On OSX < 10.6, there is no purgeable
+ * zone, so this does nothing. On OSX >= 10.6, unregistering
+ * replaces the purgeable zone with the last registered zone
+ * above, i.e. the default zone. Registering it again then puts
+ * it at the end, obviously after the default zone.
+ */
+ if (purgeable_zone) {
+ malloc_zone_unregister(purgeable_zone);
+ malloc_zone_register(purgeable_zone);
+ }
} while (malloc_default_zone() != &zone);
}
diff --git a/deps/jemalloc/test/include/test/btalloc.h b/deps/jemalloc/test/include/test/btalloc.h
new file mode 100644
index 000000000..c3f9d4df7
--- /dev/null
+++ b/deps/jemalloc/test/include/test/btalloc.h
@@ -0,0 +1,31 @@
+/* btalloc() provides a mechanism for allocating via permuted backtraces. */
+void *btalloc(size_t size, unsigned bits);
+
+#define btalloc_n_proto(n) \
+void *btalloc_##n(size_t size, unsigned bits);
+btalloc_n_proto(0)
+btalloc_n_proto(1)
+
+#define btalloc_n_gen(n) \
+void * \
+btalloc_##n(size_t size, unsigned bits) \
+{ \
+ void *p; \
+ \
+ if (bits == 0) \
+ p = mallocx(size, 0); \
+ else { \
+ switch (bits & 0x1U) { \
+ case 0: \
+ p = (btalloc_0(size, bits >> 1)); \
+ break; \
+ case 1: \
+ p = (btalloc_1(size, bits >> 1)); \
+ break; \
+ default: not_reached(); \
+ } \
+ } \
+ /* Intentionally sabotage tail call optimization. */ \
+ assert_ptr_not_null(p, "Unexpected mallocx() failure"); \
+ return (p); \
+}
diff --git a/deps/jemalloc/test/include/test/jemalloc_test.h.in b/deps/jemalloc/test/include/test/jemalloc_test.h.in
index 730a55dba..455569da4 100644
--- a/deps/jemalloc/test/include/test/jemalloc_test.h.in
+++ b/deps/jemalloc/test/include/test/jemalloc_test.h.in
@@ -1,13 +1,21 @@
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <errno.h>
-#include <inttypes.h>
#include <math.h>
#include <string.h>
+#ifdef _WIN32
+# include "msvc_compat/strings.h"
+#endif
+#include <sys/time.h>
#ifdef _WIN32
# include <windows.h>
+# include "msvc_compat/windows_extra.h"
#else
# include <pthread.h>
#endif
@@ -132,10 +140,12 @@
/*
* Common test utilities.
*/
+#include "test/btalloc.h"
#include "test/math.h"
#include "test/mtx.h"
#include "test/mq.h"
#include "test/test.h"
+#include "test/timer.h"
#include "test/thd.h"
#define MEXP 19937
#include "test/SFMT.h"
diff --git a/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in b/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
index 18a9773d7..5cc8532a3 100644
--- a/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
+++ b/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
@@ -1,5 +1,9 @@
#include "jemalloc/internal/jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
-/* For use by SFMT. */
+/*
+ * For use by SFMT. configure.ac doesn't actually define HAVE_SSE2 because its
+ * dependencies are notoriously unportable in practice.
+ */
#undef HAVE_SSE2
#undef HAVE_ALTIVEC
diff --git a/deps/jemalloc/test/include/test/math.h b/deps/jemalloc/test/include/test/math.h
index a862ed7db..b057b29a1 100644
--- a/deps/jemalloc/test/include/test/math.h
+++ b/deps/jemalloc/test/include/test/math.h
@@ -299,7 +299,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
/*
* Given a value p in [0..1] and Gamma distribution shape and scale parameters,
- * compute the upper limit on the definite integeral from [0..z] that satisfies
+ * compute the upper limit on the definite integral from [0..z] that satisfies
* p.
*/
JEMALLOC_INLINE double
diff --git a/deps/jemalloc/test/include/test/mq.h b/deps/jemalloc/test/include/test/mq.h
index 11188653c..7c4df4931 100644
--- a/deps/jemalloc/test/include/test/mq.h
+++ b/deps/jemalloc/test/include/test/mq.h
@@ -1,3 +1,5 @@
+void mq_nanosleep(unsigned ns);
+
/*
* Simple templated message queue implementation that relies on only mutexes for
* synchronization (which reduces portability issues). Given the following
@@ -75,26 +77,23 @@ a_attr a_mq_msg_type * \
a_prefix##get(a_mq_type *mq) \
{ \
a_mq_msg_type *msg; \
- struct timespec timeout; \
+ unsigned ns; \
\
msg = a_prefix##tryget(mq); \
if (msg != NULL) \
return (msg); \
\
- timeout.tv_sec = 0; \
- timeout.tv_nsec = 1; \
+ ns = 1; \
while (true) { \
- nanosleep(&timeout, NULL); \
+ mq_nanosleep(ns); \
msg = a_prefix##tryget(mq); \
if (msg != NULL) \
return (msg); \
- if (timeout.tv_sec == 0) { \
+ if (ns < 1000*1000*1000) { \
/* Double sleep time, up to max 1 second. */ \
- timeout.tv_nsec <<= 1; \
- if (timeout.tv_nsec >= 1000*1000*1000) { \
- timeout.tv_sec = 1; \
- timeout.tv_nsec = 0; \
- } \
+ ns <<= 1; \
+ if (ns > 1000*1000*1000) \
+ ns = 1000*1000*1000; \
} \
} \
} \
diff --git a/deps/jemalloc/test/include/test/test.h b/deps/jemalloc/test/include/test/test.h
index a32ec07c4..3cf901fc4 100644
--- a/deps/jemalloc/test/include/test/test.h
+++ b/deps/jemalloc/test/include/test/test.h
@@ -1,6 +1,6 @@
#define ASSERT_BUFSIZE 256
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, fmt...) do { \
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do { \
t a_ = (a); \
t b_ = (b); \
if (!(a_ cmp b_)) { \
@@ -12,205 +12,205 @@
"%"pri" "#neg_cmp" %"pri": ", \
__func__, __FILE__, __LINE__, \
#a, #b, a_, b_); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_ptr_eq(a, b, fmt...) assert_cmp(void *, a, b, ==, \
- !=, "p", fmt)
-#define assert_ptr_ne(a, b, fmt...) assert_cmp(void *, a, b, !=, \
- ==, "p", fmt)
-#define assert_ptr_null(a, fmt...) assert_cmp(void *, a, NULL, ==, \
- !=, "p", fmt)
-#define assert_ptr_not_null(a, fmt...) assert_cmp(void *, a, NULL, !=, \
- ==, "p", fmt)
+#define assert_ptr_eq(a, b, ...) assert_cmp(void *, a, b, ==, \
+ !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...) assert_cmp(void *, a, b, !=, \
+ ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...) assert_cmp(void *, a, NULL, ==, \
+ !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...) assert_cmp(void *, a, NULL, !=, \
+ ==, "p", __VA_ARGS__)
-#define assert_c_eq(a, b, fmt...) assert_cmp(char, a, b, ==, !=, "c", fmt)
-#define assert_c_ne(a, b, fmt...) assert_cmp(char, a, b, !=, ==, "c", fmt)
-#define assert_c_lt(a, b, fmt...) assert_cmp(char, a, b, <, >=, "c", fmt)
-#define assert_c_le(a, b, fmt...) assert_cmp(char, a, b, <=, >, "c", fmt)
-#define assert_c_ge(a, b, fmt...) assert_cmp(char, a, b, >=, <, "c", fmt)
-#define assert_c_gt(a, b, fmt...) assert_cmp(char, a, b, >, <=, "c", fmt)
+#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
-#define assert_x_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "#x", fmt)
-#define assert_x_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "#x", fmt)
-#define assert_x_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "#x", fmt)
-#define assert_x_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "#x", fmt)
-#define assert_x_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "#x", fmt)
-#define assert_x_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "#x", fmt)
+#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
-#define assert_d_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "d", fmt)
-#define assert_d_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "d", fmt)
-#define assert_d_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "d", fmt)
-#define assert_d_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "d", fmt)
-#define assert_d_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "d", fmt)
-#define assert_d_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "d", fmt)
+#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
-#define assert_u_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "u", fmt)
-#define assert_u_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "u", fmt)
-#define assert_u_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "u", fmt)
-#define assert_u_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "u", fmt)
-#define assert_u_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "u", fmt)
-#define assert_u_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "u", fmt)
+#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
-#define assert_ld_eq(a, b, fmt...) assert_cmp(long, a, b, ==, \
- !=, "ld", fmt)
-#define assert_ld_ne(a, b, fmt...) assert_cmp(long, a, b, !=, \
- ==, "ld", fmt)
-#define assert_ld_lt(a, b, fmt...) assert_cmp(long, a, b, <, \
- >=, "ld", fmt)
-#define assert_ld_le(a, b, fmt...) assert_cmp(long, a, b, <=, \
- >, "ld", fmt)
-#define assert_ld_ge(a, b, fmt...) assert_cmp(long, a, b, >=, \
- <, "ld", fmt)
-#define assert_ld_gt(a, b, fmt...) assert_cmp(long, a, b, >, \
- <=, "ld", fmt)
+#define assert_ld_eq(a, b, ...) assert_cmp(long, a, b, ==, \
+ !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...) assert_cmp(long, a, b, !=, \
+ ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, \
+ >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, \
+ >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, \
+ <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, \
+ <=, "ld", __VA_ARGS__)
-#define assert_lu_eq(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, ==, !=, "lu", fmt)
-#define assert_lu_ne(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, !=, ==, "lu", fmt)
-#define assert_lu_lt(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, <, >=, "lu", fmt)
-#define assert_lu_le(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, <=, >, "lu", fmt)
-#define assert_lu_ge(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, >=, <, "lu", fmt)
-#define assert_lu_gt(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, >, <=, "lu", fmt)
+#define assert_lu_eq(a, b, ...) assert_cmp(unsigned long, \
+ a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...) assert_cmp(unsigned long, \
+ a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...) assert_cmp(unsigned long, \
+ a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...) assert_cmp(unsigned long, \
+ a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...) assert_cmp(unsigned long, \
+ a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...) assert_cmp(unsigned long, \
+ a, b, >, <=, "lu", __VA_ARGS__)
-#define assert_qd_eq(a, b, fmt...) assert_cmp(long long, a, b, ==, \
- !=, "qd", fmt)
-#define assert_qd_ne(a, b, fmt...) assert_cmp(long long, a, b, !=, \
- ==, "qd", fmt)
-#define assert_qd_lt(a, b, fmt...) assert_cmp(long long, a, b, <, \
- >=, "qd", fmt)
-#define assert_qd_le(a, b, fmt...) assert_cmp(long long, a, b, <=, \
- >, "qd", fmt)
-#define assert_qd_ge(a, b, fmt...) assert_cmp(long long, a, b, >=, \
- <, "qd", fmt)
-#define assert_qd_gt(a, b, fmt...) assert_cmp(long long, a, b, >, \
- <=, "qd", fmt)
+#define assert_qd_eq(a, b, ...) assert_cmp(long long, a, b, ==, \
+ !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...) assert_cmp(long long, a, b, !=, \
+ ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...) assert_cmp(long long, a, b, <, \
+ >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...) assert_cmp(long long, a, b, <=, \
+ >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...) assert_cmp(long long, a, b, >=, \
+ <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...) assert_cmp(long long, a, b, >, \
+ <=, "qd", __VA_ARGS__)
-#define assert_qu_eq(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, ==, !=, "qu", fmt)
-#define assert_qu_ne(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, !=, ==, "qu", fmt)
-#define assert_qu_lt(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, <, >=, "qu", fmt)
-#define assert_qu_le(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, <=, >, "qu", fmt)
-#define assert_qu_ge(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, >=, <, "qu", fmt)
-#define assert_qu_gt(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, >, <=, "qu", fmt)
+#define assert_qu_eq(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, >, <=, "qu", __VA_ARGS__)
-#define assert_jd_eq(a, b, fmt...) assert_cmp(intmax_t, a, b, ==, \
- !=, "jd", fmt)
-#define assert_jd_ne(a, b, fmt...) assert_cmp(intmax_t, a, b, !=, \
- ==, "jd", fmt)
-#define assert_jd_lt(a, b, fmt...) assert_cmp(intmax_t, a, b, <, \
- >=, "jd", fmt)
-#define assert_jd_le(a, b, fmt...) assert_cmp(intmax_t, a, b, <=, \
- >, "jd", fmt)
-#define assert_jd_ge(a, b, fmt...) assert_cmp(intmax_t, a, b, >=, \
- <, "jd", fmt)
-#define assert_jd_gt(a, b, fmt...) assert_cmp(intmax_t, a, b, >, \
- <=, "jd", fmt)
+#define assert_jd_eq(a, b, ...) assert_cmp(intmax_t, a, b, ==, \
+ !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...) assert_cmp(intmax_t, a, b, !=, \
+ ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...) assert_cmp(intmax_t, a, b, <, \
+ >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...) assert_cmp(intmax_t, a, b, <=, \
+ >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...) assert_cmp(intmax_t, a, b, >=, \
+ <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...) assert_cmp(intmax_t, a, b, >, \
+ <=, "jd", __VA_ARGS__)
-#define assert_ju_eq(a, b, fmt...) assert_cmp(uintmax_t, a, b, ==, \
- !=, "ju", fmt)
-#define assert_ju_ne(a, b, fmt...) assert_cmp(uintmax_t, a, b, !=, \
- ==, "ju", fmt)
-#define assert_ju_lt(a, b, fmt...) assert_cmp(uintmax_t, a, b, <, \
- >=, "ju", fmt)
-#define assert_ju_le(a, b, fmt...) assert_cmp(uintmax_t, a, b, <=, \
- >, "ju", fmt)
-#define assert_ju_ge(a, b, fmt...) assert_cmp(uintmax_t, a, b, >=, \
- <, "ju", fmt)
-#define assert_ju_gt(a, b, fmt...) assert_cmp(uintmax_t, a, b, >, \
- <=, "ju", fmt)
+#define assert_ju_eq(a, b, ...) assert_cmp(uintmax_t, a, b, ==, \
+ !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...) assert_cmp(uintmax_t, a, b, !=, \
+ ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...) assert_cmp(uintmax_t, a, b, <, \
+ >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...) assert_cmp(uintmax_t, a, b, <=, \
+ >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...) assert_cmp(uintmax_t, a, b, >=, \
+ <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...) assert_cmp(uintmax_t, a, b, >, \
+ <=, "ju", __VA_ARGS__)
-#define assert_zd_eq(a, b, fmt...) assert_cmp(ssize_t, a, b, ==, \
- !=, "zd", fmt)
-#define assert_zd_ne(a, b, fmt...) assert_cmp(ssize_t, a, b, !=, \
- ==, "zd", fmt)
-#define assert_zd_lt(a, b, fmt...) assert_cmp(ssize_t, a, b, <, \
- >=, "zd", fmt)
-#define assert_zd_le(a, b, fmt...) assert_cmp(ssize_t, a, b, <=, \
- >, "zd", fmt)
-#define assert_zd_ge(a, b, fmt...) assert_cmp(ssize_t, a, b, >=, \
- <, "zd", fmt)
-#define assert_zd_gt(a, b, fmt...) assert_cmp(ssize_t, a, b, >, \
- <=, "zd", fmt)
+#define assert_zd_eq(a, b, ...) assert_cmp(ssize_t, a, b, ==, \
+ !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...) assert_cmp(ssize_t, a, b, !=, \
+ ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...) assert_cmp(ssize_t, a, b, <, \
+ >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...) assert_cmp(ssize_t, a, b, <=, \
+ >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...) assert_cmp(ssize_t, a, b, >=, \
+ <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...) assert_cmp(ssize_t, a, b, >, \
+ <=, "zd", __VA_ARGS__)
-#define assert_zu_eq(a, b, fmt...) assert_cmp(size_t, a, b, ==, \
- !=, "zu", fmt)
-#define assert_zu_ne(a, b, fmt...) assert_cmp(size_t, a, b, !=, \
- ==, "zu", fmt)
-#define assert_zu_lt(a, b, fmt...) assert_cmp(size_t, a, b, <, \
- >=, "zu", fmt)
-#define assert_zu_le(a, b, fmt...) assert_cmp(size_t, a, b, <=, \
- >, "zu", fmt)
-#define assert_zu_ge(a, b, fmt...) assert_cmp(size_t, a, b, >=, \
- <, "zu", fmt)
-#define assert_zu_gt(a, b, fmt...) assert_cmp(size_t, a, b, >, \
- <=, "zu", fmt)
+#define assert_zu_eq(a, b, ...) assert_cmp(size_t, a, b, ==, \
+ !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...) assert_cmp(size_t, a, b, !=, \
+ ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...) assert_cmp(size_t, a, b, <, \
+ >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...) assert_cmp(size_t, a, b, <=, \
+ >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...) assert_cmp(size_t, a, b, >=, \
+ <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...) assert_cmp(size_t, a, b, >, \
+ <=, "zu", __VA_ARGS__)
-#define assert_d32_eq(a, b, fmt...) assert_cmp(int32_t, a, b, ==, \
- !=, PRId32, fmt)
-#define assert_d32_ne(a, b, fmt...) assert_cmp(int32_t, a, b, !=, \
- ==, PRId32, fmt)
-#define assert_d32_lt(a, b, fmt...) assert_cmp(int32_t, a, b, <, \
- >=, PRId32, fmt)
-#define assert_d32_le(a, b, fmt...) assert_cmp(int32_t, a, b, <=, \
- >, PRId32, fmt)
-#define assert_d32_ge(a, b, fmt...) assert_cmp(int32_t, a, b, >=, \
- <, PRId32, fmt)
-#define assert_d32_gt(a, b, fmt...) assert_cmp(int32_t, a, b, >, \
- <=, PRId32, fmt)
+#define assert_d32_eq(a, b, ...) assert_cmp(int32_t, a, b, ==, \
+ !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...) assert_cmp(int32_t, a, b, !=, \
+ ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...) assert_cmp(int32_t, a, b, <, \
+ >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...) assert_cmp(int32_t, a, b, <=, \
+ >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...) assert_cmp(int32_t, a, b, >=, \
+ <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...) assert_cmp(int32_t, a, b, >, \
+ <=, FMTd32, __VA_ARGS__)
-#define assert_u32_eq(a, b, fmt...) assert_cmp(uint32_t, a, b, ==, \
- !=, PRIu32, fmt)
-#define assert_u32_ne(a, b, fmt...) assert_cmp(uint32_t, a, b, !=, \
- ==, PRIu32, fmt)
-#define assert_u32_lt(a, b, fmt...) assert_cmp(uint32_t, a, b, <, \
- >=, PRIu32, fmt)
-#define assert_u32_le(a, b, fmt...) assert_cmp(uint32_t, a, b, <=, \
- >, PRIu32, fmt)
-#define assert_u32_ge(a, b, fmt...) assert_cmp(uint32_t, a, b, >=, \
- <, PRIu32, fmt)
-#define assert_u32_gt(a, b, fmt...) assert_cmp(uint32_t, a, b, >, \
- <=, PRIu32, fmt)
+#define assert_u32_eq(a, b, ...) assert_cmp(uint32_t, a, b, ==, \
+ !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...) assert_cmp(uint32_t, a, b, !=, \
+ ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...) assert_cmp(uint32_t, a, b, <, \
+ >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...) assert_cmp(uint32_t, a, b, <=, \
+ >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...) assert_cmp(uint32_t, a, b, >=, \
+ <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...) assert_cmp(uint32_t, a, b, >, \
+ <=, FMTu32, __VA_ARGS__)
-#define assert_d64_eq(a, b, fmt...) assert_cmp(int64_t, a, b, ==, \
- !=, PRId64, fmt)
-#define assert_d64_ne(a, b, fmt...) assert_cmp(int64_t, a, b, !=, \
- ==, PRId64, fmt)
-#define assert_d64_lt(a, b, fmt...) assert_cmp(int64_t, a, b, <, \
- >=, PRId64, fmt)
-#define assert_d64_le(a, b, fmt...) assert_cmp(int64_t, a, b, <=, \
- >, PRId64, fmt)
-#define assert_d64_ge(a, b, fmt...) assert_cmp(int64_t, a, b, >=, \
- <, PRId64, fmt)
-#define assert_d64_gt(a, b, fmt...) assert_cmp(int64_t, a, b, >, \
- <=, PRId64, fmt)
+#define assert_d64_eq(a, b, ...) assert_cmp(int64_t, a, b, ==, \
+ !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...) assert_cmp(int64_t, a, b, !=, \
+ ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...) assert_cmp(int64_t, a, b, <, \
+ >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...) assert_cmp(int64_t, a, b, <=, \
+ >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...) assert_cmp(int64_t, a, b, >=, \
+ <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...) assert_cmp(int64_t, a, b, >, \
+ <=, FMTd64, __VA_ARGS__)
-#define assert_u64_eq(a, b, fmt...) assert_cmp(uint64_t, a, b, ==, \
- !=, PRIu64, fmt)
-#define assert_u64_ne(a, b, fmt...) assert_cmp(uint64_t, a, b, !=, \
- ==, PRIu64, fmt)
-#define assert_u64_lt(a, b, fmt...) assert_cmp(uint64_t, a, b, <, \
- >=, PRIu64, fmt)
-#define assert_u64_le(a, b, fmt...) assert_cmp(uint64_t, a, b, <=, \
- >, PRIu64, fmt)
-#define assert_u64_ge(a, b, fmt...) assert_cmp(uint64_t, a, b, >=, \
- <, PRIu64, fmt)
-#define assert_u64_gt(a, b, fmt...) assert_cmp(uint64_t, a, b, >, \
- <=, PRIu64, fmt)
+#define assert_u64_eq(a, b, ...) assert_cmp(uint64_t, a, b, ==, \
+ !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...) assert_cmp(uint64_t, a, b, !=, \
+ ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...) assert_cmp(uint64_t, a, b, <, \
+ >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...) assert_cmp(uint64_t, a, b, <=, \
+ >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...) assert_cmp(uint64_t, a, b, >=, \
+ <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...) assert_cmp(uint64_t, a, b, >, \
+ <=, FMTu64, __VA_ARGS__)
-#define assert_b_eq(a, b, fmt...) do { \
+#define assert_b_eq(a, b, ...) do { \
bool a_ = (a); \
bool b_ = (b); \
if (!(a_ == b_)) { \
@@ -222,11 +222,11 @@
__func__, __FILE__, __LINE__, \
#a, #b, a_ ? "true" : "false", \
b_ ? "true" : "false"); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_b_ne(a, b, fmt...) do { \
+#define assert_b_ne(a, b, ...) do { \
bool a_ = (a); \
bool b_ = (b); \
if (!(a_ != b_)) { \
@@ -238,14 +238,14 @@
__func__, __FILE__, __LINE__, \
#a, #b, a_ ? "true" : "false", \
b_ ? "true" : "false"); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_true(a, fmt...) assert_b_eq(a, true, fmt)
-#define assert_false(a, fmt...) assert_b_eq(a, false, fmt)
+#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__)
-#define assert_str_eq(a, b, fmt...) do { \
+#define assert_str_eq(a, b, ...) do { \
if (strcmp((a), (b))) { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
@@ -254,11 +254,11 @@
"(%s) same as (%s) --> " \
"\"%s\" differs from \"%s\": ", \
__func__, __FILE__, __LINE__, #a, #b, a, b); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_str_ne(a, b, fmt...) do { \
+#define assert_str_ne(a, b, ...) do { \
if (!strcmp((a), (b))) { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
@@ -267,18 +267,18 @@
"(%s) differs from (%s) --> " \
"\"%s\" same as \"%s\": ", \
__func__, __FILE__, __LINE__, #a, #b, a, b); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_not_reached(fmt...) do { \
+#define assert_not_reached(...) do { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
malloc_snprintf(prefix, sizeof(prefix), \
"%s:%s:%d: Unreachable code reached: ", \
__func__, __FILE__, __LINE__); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} while (0)
@@ -308,8 +308,8 @@ label_test_end: \
p_test_fini(); \
}
-#define test(tests...) \
- p_test(tests, NULL)
+#define test(...) \
+ p_test(__VA_ARGS__, NULL)
#define test_skip_if(e) do { \
if (e) { \
@@ -319,11 +319,11 @@ label_test_end: \
} \
} while (0)
-void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2));
-void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2));
+void test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
/* For private use by macros. */
-test_status_t p_test(test_t* t, ...);
+test_status_t p_test(test_t *t, ...);
void p_test_init(const char *name);
void p_test_fini(void);
void p_test_fail(const char *prefix, const char *message);
diff --git a/deps/jemalloc/test/include/test/thd.h b/deps/jemalloc/test/include/test/thd.h
index f941d7a75..47a51262e 100644
--- a/deps/jemalloc/test/include/test/thd.h
+++ b/deps/jemalloc/test/include/test/thd.h
@@ -1,4 +1,4 @@
-/* Abstraction layer for threading in tests */
+/* Abstraction layer for threading in tests. */
#ifdef _WIN32
typedef HANDLE thd_t;
#else
diff --git a/deps/jemalloc/test/include/test/timer.h b/deps/jemalloc/test/include/test/timer.h
new file mode 100644
index 000000000..a7fefdfd1
--- /dev/null
+++ b/deps/jemalloc/test/include/test/timer.h
@@ -0,0 +1,26 @@
+/* Simple timer, for use in benchmark reporting. */
+
+#include <unistd.h>
+#include <sys/time.h>
+
+#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+ && _POSIX_MONOTONIC_CLOCK >= 0
+
+typedef struct {
+#ifdef _WIN32
+ FILETIME ft0;
+ FILETIME ft1;
+#elif JEMALLOC_CLOCK_GETTIME
+ struct timespec ts0;
+ struct timespec ts1;
+ int clock_id;
+#else
+ struct timeval tv0;
+ struct timeval tv1;
+#endif
+} timedelta_t;
+
+void timer_start(timedelta_t *timer);
+void timer_stop(timedelta_t *timer);
+uint64_t timer_usec(const timedelta_t *timer);
+void timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
diff --git a/deps/jemalloc/test/integration/MALLOCX_ARENA.c b/deps/jemalloc/test/integration/MALLOCX_ARENA.c
index 71cf6f255..30c203ae6 100644
--- a/deps/jemalloc/test/integration/MALLOCX_ARENA.c
+++ b/deps/jemalloc/test/integration/MALLOCX_ARENA.c
@@ -2,6 +2,14 @@
#define NTHREADS 10
+static bool have_dss =
+#ifdef JEMALLOC_DSS
+ true
+#else
+ false
+#endif
+ ;
+
void *
thd_start(void *arg)
{
@@ -18,13 +26,16 @@ thd_start(void *arg)
size_t mib[3];
size_t miblen = sizeof(mib) / sizeof(size_t);
const char *dss_precs[] = {"disabled", "primary", "secondary"};
- const char *dss = dss_precs[thread_ind %
- (sizeof(dss_precs)/sizeof(char*))];
+ unsigned prec_ind = thread_ind %
+ (sizeof(dss_precs)/sizeof(char*));
+ const char *dss = dss_precs[prec_ind];
+ int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
"Error in mallctlnametomib()");
mib[1] = arena_ind;
assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
- sizeof(const char *)), 0, "Error in mallctlbymib()");
+ sizeof(const char *)), expected_err,
+ "Error in mallctlbymib()");
}
p = mallocx(1, MALLOCX_ARENA(arena_ind));
@@ -34,7 +45,7 @@ thd_start(void *arg)
return (NULL);
}
-TEST_BEGIN(test_ALLOCM_ARENA)
+TEST_BEGIN(test_MALLOCX_ARENA)
{
thd_t thds[NTHREADS];
unsigned i;
@@ -54,5 +65,5 @@ main(void)
{
return (test(
- test_ALLOCM_ARENA));
+ test_MALLOCX_ARENA));
}
diff --git a/deps/jemalloc/test/integration/allocm.c b/deps/jemalloc/test/integration/allocm.c
deleted file mode 100644
index 7b4ea0c2c..000000000
--- a/deps/jemalloc/test/integration/allocm.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define CHUNK 0x400000
-#define MAXALIGN (((size_t)1) << 25)
-#define NITER 4
-
-TEST_BEGIN(test_basic)
-{
- size_t nsz, rsz, sz;
- void *p;
-
- sz = 42;
- nsz = 0;
- assert_d_eq(nallocm(&nsz, sz, 0), ALLOCM_SUCCESS,
- "Unexpected nallocm() error");
- rsz = 0;
- assert_d_eq(allocm(&p, &rsz, sz, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_zu_ge(rsz, sz, "Real size smaller than expected");
- assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-
- assert_d_eq(allocm(&p, NULL, sz, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-
- nsz = 0;
- assert_d_eq(nallocm(&nsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS,
- "Unexpected nallocm() error");
- rsz = 0;
- assert_d_eq(allocm(&p, &rsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_alignment_and_size)
-{
- int r;
- size_t nsz, rsz, sz, alignment, total;
- unsigned i;
- void *ps[NITER];
-
- for (i = 0; i < NITER; i++)
- ps[i] = NULL;
-
- for (alignment = 8;
- alignment <= MAXALIGN;
- alignment <<= 1) {
- total = 0;
- for (sz = 1;
- sz < 3 * alignment && sz < (1U << 31);
- sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
- for (i = 0; i < NITER; i++) {
- nsz = 0;
- r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) |
- ALLOCM_ZERO);
- assert_d_eq(r, ALLOCM_SUCCESS,
- "nallocm() error for alignment=%zu, "
- "size=%zu (%#zx): %d",
- alignment, sz, sz, r);
- rsz = 0;
- r = allocm(&ps[i], &rsz, sz,
- ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
- assert_d_eq(r, ALLOCM_SUCCESS,
- "allocm() error for alignment=%zu, "
- "size=%zu (%#zx): %d",
- alignment, sz, sz, r);
- assert_zu_ge(rsz, sz,
- "Real size smaller than expected for "
- "alignment=%zu, size=%zu", alignment, sz);
- assert_zu_eq(nsz, rsz,
- "nallocm()/allocm() rsize mismatch for "
- "alignment=%zu, size=%zu", alignment, sz);
- assert_ptr_null(
- (void *)((uintptr_t)ps[i] & (alignment-1)),
- "%p inadequately aligned for"
- " alignment=%zu, size=%zu", ps[i],
- alignment, sz);
- sallocm(ps[i], &rsz, 0);
- total += rsz;
- if (total >= (MAXALIGN << 1))
- break;
- }
- for (i = 0; i < NITER; i++) {
- if (ps[i] != NULL) {
- dallocm(ps[i], 0);
- ps[i] = NULL;
- }
- }
- }
- }
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_basic,
- test_alignment_and_size));
-}
diff --git a/deps/jemalloc/test/integration/chunk.c b/deps/jemalloc/test/integration/chunk.c
new file mode 100644
index 000000000..af1c9a53e
--- /dev/null
+++ b/deps/jemalloc/test/integration/chunk.c
@@ -0,0 +1,276 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
+static chunk_hooks_t orig_hooks;
+static chunk_hooks_t old_hooks;
+
+static bool do_dalloc = true;
+static bool do_decommit;
+
+static bool did_alloc;
+static bool did_dalloc;
+static bool did_commit;
+static bool did_decommit;
+static bool did_purge;
+static bool did_split;
+static bool did_merge;
+
+#if 0
+# define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#else
+# define TRACE_HOOK(fmt, ...)
+#endif
+
+void *
+chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
+ bool *commit, unsigned arena_ind)
+{
+
+ TRACE_HOOK("%s(new_addr=%p, size=%zu, alignment=%zu, *zero=%s, "
+ "*commit=%s, arena_ind=%u)\n", __func__, new_addr, size, alignment,
+ *zero ? "true" : "false", *commit ? "true" : "false", arena_ind);
+ did_alloc = true;
+ return (old_hooks.alloc(new_addr, size, alignment, zero, commit,
+ arena_ind));
+}
+
+bool
+chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind)
+{
+
+ TRACE_HOOK("%s(chunk=%p, size=%zu, committed=%s, arena_ind=%u)\n",
+ __func__, chunk, size, committed ? "true" : "false", arena_ind);
+ did_dalloc = true;
+ if (!do_dalloc)
+ return (true);
+ return (old_hooks.dalloc(chunk, size, committed, arena_ind));
+}
+
+bool
+chunk_commit(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
+ bool err;
+
+ TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu, "
+ "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+ arena_ind);
+ err = old_hooks.commit(chunk, size, offset, length, arena_ind);
+ did_commit = !err;
+ return (err);
+}
+
+bool
+chunk_decommit(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
+ bool err;
+
+ TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu, "
+ "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+ arena_ind);
+ if (!do_decommit)
+ return (true);
+ err = old_hooks.decommit(chunk, size, offset, length, arena_ind);
+ did_decommit = !err;
+ return (err);
+}
+
+bool
+chunk_purge(void *chunk, size_t size, size_t offset, size_t length,
+ unsigned arena_ind)
+{
+
+ TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu "
+ "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+ arena_ind);
+ did_purge = true;
+ return (old_hooks.purge(chunk, size, offset, length, arena_ind));
+}
+
+bool
+chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b,
+ bool committed, unsigned arena_ind)
+{
+
+ TRACE_HOOK("%s(chunk=%p, size=%zu, size_a=%zu, size_b=%zu, "
+ "committed=%s, arena_ind=%u)\n", __func__, chunk, size, size_a,
+ size_b, committed ? "true" : "false", arena_ind);
+ did_split = true;
+ return (old_hooks.split(chunk, size, size_a, size_b, committed,
+ arena_ind));
+}
+
+bool
+chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+ bool committed, unsigned arena_ind)
+{
+
+ TRACE_HOOK("%s(chunk_a=%p, size_a=%zu, chunk_b=%p size_b=%zu, "
+ "committed=%s, arena_ind=%u)\n", __func__, chunk_a, size_a, chunk_b,
+ size_b, committed ? "true" : "false", arena_ind);
+ did_merge = true;
+ return (old_hooks.merge(chunk_a, size_a, chunk_b, size_b,
+ committed, arena_ind));
+}
+
+TEST_BEGIN(test_chunk)
+{
+ void *p;
+ size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz;
+ chunk_hooks_t new_hooks = {
+ chunk_alloc,
+ chunk_dalloc,
+ chunk_commit,
+ chunk_decommit,
+ chunk_purge,
+ chunk_split,
+ chunk_merge
+ };
+ bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
+
+ /* Install custom chunk hooks. */
+ old_size = sizeof(chunk_hooks_t);
+ new_size = sizeof(chunk_hooks_t);
+ assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+ &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
+ orig_hooks = old_hooks;
+ assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
+ assert_ptr_ne(old_hooks.dalloc, chunk_dalloc,
+ "Unexpected dalloc error");
+ assert_ptr_ne(old_hooks.commit, chunk_commit,
+ "Unexpected commit error");
+ assert_ptr_ne(old_hooks.decommit, chunk_decommit,
+ "Unexpected decommit error");
+ assert_ptr_ne(old_hooks.purge, chunk_purge, "Unexpected purge error");
+ assert_ptr_ne(old_hooks.split, chunk_split, "Unexpected split error");
+ assert_ptr_ne(old_hooks.merge, chunk_merge, "Unexpected merge error");
+
+ /* Get large size classes. */
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
+ "Unexpected arenas.lrun.0.size failure");
+ assert_d_eq(mallctl("arenas.lrun.1.size", &large1, &sz, NULL, 0), 0,
+ "Unexpected arenas.lrun.1.size failure");
+
+ /* Get huge size classes. */
+ assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
+ "Unexpected arenas.hchunk.0.size failure");
+ assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0,
+ "Unexpected arenas.hchunk.1.size failure");
+ assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0,
+ "Unexpected arenas.hchunk.2.size failure");
+
+ /* Test dalloc/decommit/purge cascade. */
+ do_dalloc = false;
+ do_decommit = false;
+ p = mallocx(huge0 * 2, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ did_dalloc = false;
+ did_decommit = false;
+ did_purge = false;
+ did_split = false;
+ xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0);
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected arena.0.purge error");
+ if (xallocx_success_a) {
+ assert_true(did_dalloc, "Expected dalloc");
+ assert_false(did_decommit, "Unexpected decommit");
+ assert_true(did_purge, "Expected purge");
+ }
+ assert_true(did_split, "Expected split");
+ dallocx(p, 0);
+ do_dalloc = true;
+
+ /* Test decommit/commit and observe split/merge. */
+ do_dalloc = false;
+ do_decommit = true;
+ p = mallocx(huge0 * 2, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ did_decommit = false;
+ did_commit = false;
+ did_split = false;
+ did_merge = false;
+ xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0);
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected arena.0.purge error");
+ if (xallocx_success_b)
+ assert_true(did_split, "Expected split");
+ xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2);
+ assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
+ if (xallocx_success_b && xallocx_success_c)
+ assert_true(did_merge, "Expected merge");
+ dallocx(p, 0);
+ do_dalloc = true;
+ do_decommit = false;
+
+ /* Test purge for partial-chunk huge allocations. */
+ if (huge0 * 2 > huge2) {
+ /*
+ * There are at least four size classes per doubling, so a
+ * successful xallocx() from size=huge2 to size=huge1 is
+ * guaranteed to leave trailing purgeable memory.
+ */
+ p = mallocx(huge2, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ did_purge = false;
+ assert_zu_eq(xallocx(p, huge1, 0, 0), huge1,
+ "Unexpected xallocx() failure");
+ assert_true(did_purge, "Expected purge");
+ dallocx(p, 0);
+ }
+
+ /* Test decommit for large allocations. */
+ do_decommit = true;
+ p = mallocx(large1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected arena.0.purge error");
+ did_decommit = false;
+ assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+ "Unexpected xallocx() failure");
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected arena.0.purge error");
+ did_commit = false;
+ assert_zu_eq(xallocx(p, large1, 0, 0), large1,
+ "Unexpected xallocx() failure");
+ assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
+ dallocx(p, 0);
+ do_decommit = false;
+
+ /* Make sure non-huge allocation succeeds. */
+ p = mallocx(42, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ dallocx(p, 0);
+
+ /* Restore chunk hooks. */
+ assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks,
+ new_size), 0, "Unexpected chunk_hooks error");
+ assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+ NULL, 0), 0, "Unexpected chunk_hooks error");
+ assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
+ "Unexpected alloc error");
+ assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc,
+ "Unexpected dalloc error");
+ assert_ptr_eq(old_hooks.commit, orig_hooks.commit,
+ "Unexpected commit error");
+ assert_ptr_eq(old_hooks.decommit, orig_hooks.decommit,
+ "Unexpected decommit error");
+ assert_ptr_eq(old_hooks.purge, orig_hooks.purge,
+ "Unexpected purge error");
+ assert_ptr_eq(old_hooks.split, orig_hooks.split,
+ "Unexpected split error");
+ assert_ptr_eq(old_hooks.merge, orig_hooks.merge,
+ "Unexpected merge error");
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(test_chunk));
+}
diff --git a/deps/jemalloc/test/integration/mallocx.c b/deps/jemalloc/test/integration/mallocx.c
index 123e041fa..6253175d6 100644
--- a/deps/jemalloc/test/integration/mallocx.c
+++ b/deps/jemalloc/test/integration/mallocx.c
@@ -1,40 +1,122 @@
#include "test/jemalloc_test.h"
-#define CHUNK 0x400000
-#define MAXALIGN (((size_t)1) << 25)
-#define NITER 4
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return (ret);
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+ return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return (ret);
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+ return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_oom)
+{
+ size_t hugemax, size, alignment;
+
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ /*
+ * It should be impossible to allocate two objects that each consume
+ * more than half the virtual address space.
+ */
+ {
+ void *p;
+
+ p = mallocx(hugemax, 0);
+ if (p != NULL) {
+ assert_ptr_null(mallocx(hugemax, 0),
+ "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
+ dallocx(p, 0);
+ }
+ }
+
+#if LG_SIZEOF_PTR == 3
+ size = ZU(0x8000000000000000);
+ alignment = ZU(0x8000000000000000);
+#else
+ size = ZU(0x80000000);
+ alignment = ZU(0x80000000);
+#endif
+ assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
+ "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
+ alignment);
+}
+TEST_END
TEST_BEGIN(test_basic)
{
- size_t nsz, rsz, sz;
- void *p;
-
- sz = 42;
- nsz = nallocx(sz, 0);
- assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
- p = mallocx(sz, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- rsz = sallocx(p, 0);
- assert_zu_ge(rsz, sz, "Real size smaller than expected");
- assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
- dallocx(p, 0);
-
- p = mallocx(sz, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- dallocx(p, 0);
-
- nsz = nallocx(sz, MALLOCX_ZERO);
- assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
- p = mallocx(sz, MALLOCX_ZERO);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- rsz = sallocx(p, 0);
- assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
- dallocx(p, 0);
+#define MAXSZ (((size_t)1) << 26)
+ size_t sz;
+
+ for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
+ size_t nsz, rsz;
+ void *p;
+ nsz = nallocx(sz, 0);
+ assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+ p = mallocx(sz, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ rsz = sallocx(p, 0);
+ assert_zu_ge(rsz, sz, "Real size smaller than expected");
+ assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+ dallocx(p, 0);
+
+ p = mallocx(sz, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ dallocx(p, 0);
+
+ nsz = nallocx(sz, MALLOCX_ZERO);
+ assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+ p = mallocx(sz, MALLOCX_ZERO);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ rsz = sallocx(p, 0);
+ assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+ dallocx(p, 0);
+ }
+#undef MAXSZ
}
TEST_END
TEST_BEGIN(test_alignment_and_size)
{
+#define MAXALIGN (((size_t)1) << 25)
+#define NITER 4
size_t nsz, rsz, sz, alignment, total;
unsigned i;
void *ps[NITER];
@@ -84,6 +166,8 @@ TEST_BEGIN(test_alignment_and_size)
}
}
}
+#undef MAXALIGN
+#undef NITER
}
TEST_END
@@ -92,6 +176,7 @@ main(void)
{
return (test(
+ test_oom,
test_basic,
test_alignment_and_size));
}
diff --git a/deps/jemalloc/test/integration/mremap.c b/deps/jemalloc/test/integration/mremap.c
deleted file mode 100644
index a7fb7ef0a..000000000
--- a/deps/jemalloc/test/integration/mremap.c
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "test/jemalloc_test.h"
-
-TEST_BEGIN(test_mremap)
-{
- int err;
- size_t sz, lg_chunk, chunksize, i;
- char *p, *q;
-
- sz = sizeof(lg_chunk);
- err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0);
- assert_d_eq(err, 0, "Error in mallctl(): %s", strerror(err));
- chunksize = ((size_t)1U) << lg_chunk;
-
- p = (char *)malloc(chunksize);
- assert_ptr_not_null(p, "malloc(%zu) --> %p", chunksize, p);
- memset(p, 'a', chunksize);
-
- q = (char *)realloc(p, chunksize * 2);
- assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize * 2,
- q);
- for (i = 0; i < chunksize; i++) {
- assert_c_eq(q[i], 'a',
- "realloc() should preserve existing bytes across copies");
- }
-
- p = q;
-
- q = (char *)realloc(p, chunksize);
- assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize, q);
- for (i = 0; i < chunksize; i++) {
- assert_c_eq(q[i], 'a',
- "realloc() should preserve existing bytes across copies");
- }
-
- free(q);
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_mremap));
-}
diff --git a/deps/jemalloc/test/integration/overflow.c b/deps/jemalloc/test/integration/overflow.c
new file mode 100644
index 000000000..303d9b2d3
--- /dev/null
+++ b/deps/jemalloc/test/integration/overflow.c
@@ -0,0 +1,49 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_overflow)
+{
+ unsigned nhchunks;
+ size_t mib[4];
+ size_t sz, miblen, max_size_class;
+ void *p;
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
+ "Unexpected mallctl() error");
+
+ miblen = sizeof(mib) / sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+ mib[2] = nhchunks - 1;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
+ "Unexpected mallctlbymib() error");
+
+ assert_ptr_null(malloc(max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(malloc(SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+
+ assert_ptr_null(calloc(1, max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(calloc(1, SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+
+ p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() OOM");
+ assert_ptr_null(realloc(p, max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(realloc(p, SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+ free(p);
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_overflow));
+}
diff --git a/deps/jemalloc/test/integration/rallocm.c b/deps/jemalloc/test/integration/rallocm.c
deleted file mode 100644
index 33c11bb7c..000000000
--- a/deps/jemalloc/test/integration/rallocm.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "test/jemalloc_test.h"
-
-TEST_BEGIN(test_same_size)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE), ALLOCM_SUCCESS,
- "Unexpected rallocm() error");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_extra_no_move)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz, sz-42, ALLOCM_NO_MOVE),
- ALLOCM_SUCCESS, "Unexpected rallocm() error");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_no_move_fail)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE),
- ALLOCM_ERR_NOT_MOVED, "Unexpected rallocm() result");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_grow_and_shrink)
-{
- void *p, *q;
- size_t tsz;
-#define NCYCLES 3
- unsigned i, j;
-#define NSZS 2500
- size_t szs[NSZS];
-#define MAXSZ ZU(12 * 1024 * 1024)
-
- assert_d_eq(allocm(&p, &szs[0], 1, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- for (i = 0; i < NCYCLES; i++) {
- for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
- q = p;
- assert_d_eq(rallocm(&q, &szs[j], szs[j-1]+1, 0, 0),
- ALLOCM_SUCCESS,
- "Unexpected rallocm() error for size=%zu-->%zu",
- szs[j-1], szs[j-1]+1);
- assert_zu_ne(szs[j], szs[j-1]+1,
- "Expected size to at least: %zu", szs[j-1]+1);
- p = q;
- }
-
- for (j--; j > 0; j--) {
- q = p;
- assert_d_eq(rallocm(&q, &tsz, szs[j-1], 0, 0),
- ALLOCM_SUCCESS,
- "Unexpected rallocm() error for size=%zu-->%zu",
- szs[j], szs[j-1]);
- assert_zu_eq(tsz, szs[j-1],
- "Expected size=%zu, got size=%zu", szs[j-1], tsz);
- p = q;
- }
- }
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_same_size,
- test_extra_no_move,
- test_no_move_fail,
- test_grow_and_shrink));
-}
diff --git a/deps/jemalloc/test/integration/rallocx.c b/deps/jemalloc/test/integration/rallocx.c
index ee21aedff..be1b27b73 100644
--- a/deps/jemalloc/test/integration/rallocx.c
+++ b/deps/jemalloc/test/integration/rallocx.c
@@ -22,7 +22,7 @@ TEST_BEGIN(test_grow_and_shrink)
szs[j-1], szs[j-1]+1);
szs[j] = sallocx(q, 0);
assert_zu_ne(szs[j], szs[j-1]+1,
- "Expected size to at least: %zu", szs[j-1]+1);
+ "Expected size to be at least: %zu", szs[j-1]+1);
p = q;
}
@@ -55,8 +55,9 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
for (i = 0; i < len; i++) {
uint8_t b = buf[offset+i];
if (b != c) {
- test_fail("Allocation at %p contains %#x rather than "
- "%#x at offset %zu", p, b, c, offset+i);
+ test_fail("Allocation at %p (len=%zu) contains %#x "
+ "rather than %#x at offset %zu", p, len, b, c,
+ offset+i);
ret = true;
}
}
@@ -95,7 +96,8 @@ TEST_BEGIN(test_zero)
"Expected zeroed memory");
}
if (psz != qsz) {
- memset(q+psz, FILL_BYTE, qsz-psz);
+ memset((void *)((uintptr_t)q+psz), FILL_BYTE,
+ qsz-psz);
psz = qsz;
}
p = q;
@@ -159,8 +161,9 @@ TEST_BEGIN(test_lg_align_and_zero)
} else {
assert_false(validate_fill(q, 0, 0, MAX_VALIDATE),
"Expected zeroed memory");
- assert_false(validate_fill(q+sz-MAX_VALIDATE, 0, 0,
- MAX_VALIDATE), "Expected zeroed memory");
+ assert_false(validate_fill(
+ (void *)((uintptr_t)q+sz-MAX_VALIDATE),
+ 0, 0, MAX_VALIDATE), "Expected zeroed memory");
}
p = q;
}
diff --git a/deps/jemalloc/test/integration/sdallocx.c b/deps/jemalloc/test/integration/sdallocx.c
new file mode 100644
index 000000000..b84817d76
--- /dev/null
+++ b/deps/jemalloc/test/integration/sdallocx.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+#define MAXALIGN (((size_t)1) << 25)
+#define NITER 4
+
+TEST_BEGIN(test_basic)
+{
+ void *ptr = mallocx(64, 0);
+ sdallocx(ptr, 64, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_alignment_and_size)
+{
+ size_t nsz, sz, alignment, total;
+ unsigned i;
+ void *ps[NITER];
+
+ for (i = 0; i < NITER; i++)
+ ps[i] = NULL;
+
+ for (alignment = 8;
+ alignment <= MAXALIGN;
+ alignment <<= 1) {
+ total = 0;
+ for (sz = 1;
+ sz < 3 * alignment && sz < (1U << 31);
+ sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+ for (i = 0; i < NITER; i++) {
+ nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ total += nsz;
+ if (total >= (MAXALIGN << 1))
+ break;
+ }
+ for (i = 0; i < NITER; i++) {
+ if (ps[i] != NULL) {
+ sdallocx(ps[i], sz,
+ MALLOCX_ALIGN(alignment));
+ ps[i] = NULL;
+ }
+ }
+ }
+ }
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_basic,
+ test_alignment_and_size));
+}
diff --git a/deps/jemalloc/test/integration/xallocx.c b/deps/jemalloc/test/integration/xallocx.c
index ab4cf945e..373625219 100644
--- a/deps/jemalloc/test/integration/xallocx.c
+++ b/deps/jemalloc/test/integration/xallocx.c
@@ -48,6 +48,411 @@ TEST_BEGIN(test_no_move_fail)
}
TEST_END
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+ return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+ return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+ return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+ return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+ return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+ return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_size)
+{
+ size_t small0, hugemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ /* Test smallest supported size. */
+ assert_zu_eq(xallocx(p, 1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ /* Test largest supported size. */
+ assert_zu_le(xallocx(p, hugemax, 0, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ /* Test size overflow. */
+ assert_zu_le(xallocx(p, hugemax+1, 0, 0), hugemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_size_extra_overflow)
+{
+ size_t small0, hugemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ /* Test overflows that can be resolved by clamping extra. */
+ assert_zu_le(xallocx(p, hugemax-1, 2, 0), hugemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, hugemax, 1, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ /* Test overflow such that hugemax-size underflows. */
+ assert_zu_le(xallocx(p, hugemax+1, 2, 0), hugemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, hugemax+2, 3, 0), hugemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), hugemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_small)
+{
+ size_t small0, small1, hugemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ small1 = get_small_size(1);
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ /* Test size+extra overflow. */
+ assert_zu_eq(xallocx(p, small0, hugemax - small0 + 1, 0), small0,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_large)
+{
+ size_t smallmax, large0, large1, large2, huge0, hugemax;
+ void *p;
+
+ /* Get size classes. */
+ smallmax = get_small_size(get_nsmall()-1);
+ large0 = get_large_size(0);
+ large1 = get_large_size(1);
+ large2 = get_large_size(2);
+ huge0 = get_huge_size(0);
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ p = mallocx(large2, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+ "Unexpected xallocx() behavior");
+ /* Test size decrease with zero extra. */
+ assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, smallmax, 0, 0), large0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+ "Unexpected xallocx() behavior");
+ /* Test size decrease with non-zero extra. */
+ assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with zero extra. */
+ assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, huge0, 0, 0), large2,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+ "Unexpected xallocx() behavior");
+ /* Test size+extra overflow. */
+ assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_huge)
+{
+ size_t largemax, huge0, huge1, huge2, hugemax;
+ void *p;
+
+ /* Get size classes. */
+ largemax = get_large_size(get_nlarge()-1);
+ huge0 = get_huge_size(0);
+ huge1 = get_huge_size(1);
+ huge2 = get_huge_size(2);
+ hugemax = get_huge_size(get_nhuge()-1);
+
+ p = mallocx(huge2, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+ "Unexpected xallocx() behavior");
+ /* Test size decrease with zero extra. */
+ assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+ "Unexpected xallocx() behavior");
+ assert_zu_ge(xallocx(p, largemax, 0, 0), huge0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+ "Unexpected xallocx() behavior");
+ /* Test size decrease with non-zero extra. */
+ assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1,
+ "Unexpected xallocx() behavior");
+ assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with zero extra. */
+ assert_zu_le(xallocx(p, huge2, 0, 0), huge2,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+ "Unexpected xallocx() behavior");
+ /* Test size+extra overflow. */
+ assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+static void
+print_filled_extents(const void *p, uint8_t c, size_t len)
+{
+ const uint8_t *pc = (const uint8_t *)p;
+ size_t i, range0;
+ uint8_t c0;
+
+ malloc_printf(" p=%p, c=%#x, len=%zu:", p, c, len);
+ range0 = 0;
+ c0 = pc[0];
+ for (i = 0; i < len; i++) {
+ if (pc[i] != c0) {
+ malloc_printf(" %#x[%zu..%zu)", c0, range0, i);
+ range0 = i;
+ c0 = pc[i];
+ }
+ }
+ malloc_printf(" %#x[%zu..%zu)\n", c0, range0, i);
+}
+
+static bool
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
+{
+ const uint8_t *pc = (const uint8_t *)p;
+ bool err;
+ size_t i;
+
+ for (i = offset, err = false; i < offset+len; i++) {
+ if (pc[i] != c)
+ err = true;
+ }
+
+ if (err)
+ print_filled_extents(p, c, offset + len);
+
+ return (err);
+}
+
+static void
+test_zero(size_t szmin, size_t szmax)
+{
+ size_t sz, nsz;
+ void *p;
+#define FILL_BYTE 0x7aU
+
+ sz = szmax;
+ p = mallocx(sz, MALLOCX_ZERO);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
+ sz);
+
+ /*
+ * Fill with non-zero so that non-debug builds are more likely to detect
+ * errors.
+ */
+ memset(p, FILL_BYTE, sz);
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+
+ /* Shrink in place so that we can expect growing in place to succeed. */
+ sz = szmin;
+ assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz,
+ "Unexpected xallocx() error");
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+
+ for (sz = szmin; sz < szmax; sz = nsz) {
+ nsz = nallocx(sz+1, MALLOCX_ZERO);
+ assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz,
+ "Unexpected xallocx() failure");
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+ assert_false(validate_fill(p, 0x00, sz, nsz-sz),
+ "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
+ memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+ assert_false(validate_fill(p, FILL_BYTE, 0, nsz),
+ "Memory not filled: nsz=%zu", nsz);
+ }
+
+ dallocx(p, 0);
+}
+
+TEST_BEGIN(test_zero_large)
+{
+ size_t large0, largemax;
+
+ /* Get size classes. */
+ large0 = get_large_size(0);
+ largemax = get_large_size(get_nlarge()-1);
+
+ test_zero(large0, largemax);
+}
+TEST_END
+
+TEST_BEGIN(test_zero_huge)
+{
+ size_t huge0, huge1;
+
+ /* Get size classes. */
+ huge0 = get_huge_size(0);
+ huge1 = get_huge_size(1);
+
+ test_zero(huge1, huge0 * 2);
+}
+TEST_END
+
int
main(void)
{
@@ -55,5 +460,12 @@ main(void)
return (test(
test_same_size,
test_extra_no_move,
- test_no_move_fail));
+ test_no_move_fail,
+ test_size,
+ test_size_extra_overflow,
+ test_extra_small,
+ test_extra_large,
+ test_extra_huge,
+ test_zero_large,
+ test_zero_huge));
}
diff --git a/deps/jemalloc/test/src/SFMT.c b/deps/jemalloc/test/src/SFMT.c
index e6f8deecb..80cabe05e 100644
--- a/deps/jemalloc/test/src/SFMT.c
+++ b/deps/jemalloc/test/src/SFMT.c
@@ -463,11 +463,11 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
above = 0xffffffffU - (0xffffffffU % limit);
while (1) {
- ret = gen_rand32(ctx);
- if (ret < above) {
- ret %= limit;
- break;
- }
+ ret = gen_rand32(ctx);
+ if (ret < above) {
+ ret %= limit;
+ break;
+ }
}
return ret;
}
@@ -511,13 +511,13 @@ uint64_t gen_rand64(sfmt_t *ctx) {
uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
uint64_t ret, above;
- above = 0xffffffffffffffffLLU - (0xffffffffffffffffLLU % limit);
+ above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
while (1) {
- ret = gen_rand64(ctx);
- if (ret < above) {
- ret %= limit;
- break;
- }
+ ret = gen_rand64(ctx);
+ if (ret < above) {
+ ret %= limit;
+ break;
+ }
}
return ret;
}
diff --git a/deps/jemalloc/test/src/btalloc.c b/deps/jemalloc/test/src/btalloc.c
new file mode 100644
index 000000000..9a253d978
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc.c
@@ -0,0 +1,8 @@
+#include "test/jemalloc_test.h"
+
+void *
+btalloc(size_t size, unsigned bits)
+{
+
+ return (btalloc_0(size, bits));
+}
diff --git a/deps/jemalloc/test/src/btalloc_0.c b/deps/jemalloc/test/src/btalloc_0.c
new file mode 100644
index 000000000..77d8904ea
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc_0.c
@@ -0,0 +1,3 @@
+#include "test/jemalloc_test.h"
+
+btalloc_n_gen(0)
diff --git a/deps/jemalloc/test/src/btalloc_1.c b/deps/jemalloc/test/src/btalloc_1.c
new file mode 100644
index 000000000..4c126c309
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc_1.c
@@ -0,0 +1,3 @@
+#include "test/jemalloc_test.h"
+
+btalloc_n_gen(1)
diff --git a/deps/jemalloc/test/src/mq.c b/deps/jemalloc/test/src/mq.c
new file mode 100644
index 000000000..40b31c15c
--- /dev/null
+++ b/deps/jemalloc/test/src/mq.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Sleep for approximately ns nanoseconds. No lower *nor* upper bound on sleep
+ * time is guaranteed.
+ */
+void
+mq_nanosleep(unsigned ns)
+{
+
+ assert(ns <= 1000*1000*1000);
+
+#ifdef _WIN32
+ Sleep(ns / 1000);
+#else
+ {
+ struct timespec timeout;
+
+ if (ns < 1000*1000*1000) {
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = ns;
+ } else {
+ timeout.tv_sec = 1;
+ timeout.tv_nsec = 0;
+ }
+ nanosleep(&timeout, NULL);
+ }
+#endif
+}
diff --git a/deps/jemalloc/test/src/mtx.c b/deps/jemalloc/test/src/mtx.c
index 41b95d59d..73bd02f6d 100644
--- a/deps/jemalloc/test/src/mtx.c
+++ b/deps/jemalloc/test/src/mtx.c
@@ -1,5 +1,9 @@
#include "test/jemalloc_test.h"
+#ifndef _CRT_SPINCOUNT
+#define _CRT_SPINCOUNT 4000
+#endif
+
bool
mtx_init(mtx_t *mtx)
{
diff --git a/deps/jemalloc/test/src/test.c b/deps/jemalloc/test/src/test.c
index 528d85831..8173614cf 100644
--- a/deps/jemalloc/test/src/test.c
+++ b/deps/jemalloc/test/src/test.c
@@ -5,7 +5,7 @@ static test_status_t test_counts[test_status_count] = {0, 0, 0};
static test_status_t test_status = test_status_pass;
static const char * test_name = "";
-JEMALLOC_ATTR(format(printf, 1, 2))
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
test_skip(const char *format, ...)
{
@@ -18,7 +18,7 @@ test_skip(const char *format, ...)
test_status = test_status_skip;
}
-JEMALLOC_ATTR(format(printf, 1, 2))
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
test_fail(const char *format, ...)
{
@@ -61,13 +61,26 @@ p_test_fini(void)
}
test_status_t
-p_test(test_t* t, ...)
+p_test(test_t *t, ...)
{
- test_status_t ret = test_status_pass;
+ test_status_t ret;
va_list ap;
+ /*
+ * Make sure initialization occurs prior to running tests. Tests are
+ * special because they may use internal facilities prior to triggering
+ * initialization as a side effect of calling into the public API. This
+ * is a final safety that works even if jemalloc_constructor() doesn't
+ * run, as for MSVC builds.
+ */
+ if (nallocx(1, 0) == 0) {
+ malloc_printf("Initialization error");
+ return (test_status_fail);
+ }
+
+ ret = test_status_pass;
va_start(ap, t);
- for (; t != NULL; t = va_arg(ap, test_t*)) {
+ for (; t != NULL; t = va_arg(ap, test_t *)) {
t();
if (test_status > ret)
ret = test_status;
diff --git a/deps/jemalloc/test/src/thd.c b/deps/jemalloc/test/src/thd.c
index 233242a16..c9d006586 100644
--- a/deps/jemalloc/test/src/thd.c
+++ b/deps/jemalloc/test/src/thd.c
@@ -14,7 +14,11 @@ void
thd_join(thd_t thd, void **ret)
{
- WaitForSingleObject(thd, INFINITE);
+ if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
+ DWORD exit_code;
+ GetExitCodeThread(thd, (LPDWORD) &exit_code);
+ *ret = (void *)(uintptr_t)exit_code;
+ }
}
#else
diff --git a/deps/jemalloc/test/src/timer.c b/deps/jemalloc/test/src/timer.c
new file mode 100644
index 000000000..0c93abaf9
--- /dev/null
+++ b/deps/jemalloc/test/src/timer.c
@@ -0,0 +1,85 @@
+#include "test/jemalloc_test.h"
+
+void
+timer_start(timedelta_t *timer)
+{
+
+#ifdef _WIN32
+ GetSystemTimeAsFileTime(&timer->ft0);
+#elif JEMALLOC_CLOCK_GETTIME
+ if (sysconf(_SC_MONOTONIC_CLOCK) <= 0)
+ timer->clock_id = CLOCK_REALTIME;
+ else
+ timer->clock_id = CLOCK_MONOTONIC;
+ clock_gettime(timer->clock_id, &timer->ts0);
+#else
+ gettimeofday(&timer->tv0, NULL);
+#endif
+}
+
+void
+timer_stop(timedelta_t *timer)
+{
+
+#ifdef _WIN32
+ GetSystemTimeAsFileTime(&timer->ft0);
+#elif JEMALLOC_CLOCK_GETTIME
+ clock_gettime(timer->clock_id, &timer->ts1);
+#else
+ gettimeofday(&timer->tv1, NULL);
+#endif
+}
+
+uint64_t
+timer_usec(const timedelta_t *timer)
+{
+
+#ifdef _WIN32
+ uint64_t t0, t1;
+ t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) |
+ timer->ft0.dwLowDateTime;
+ t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) |
+ timer->ft1.dwLowDateTime;
+ return ((t1 - t0) / 10);
+#elif JEMALLOC_CLOCK_GETTIME
+ return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) +
+ (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000);
+#else
+ return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) +
+ timer->tv1.tv_usec - timer->tv0.tv_usec);
+#endif
+}
+
+void
+timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
+{
+ uint64_t t0 = timer_usec(a);
+ uint64_t t1 = timer_usec(b);
+ uint64_t mult;
+ unsigned i = 0;
+ unsigned j;
+ int n;
+
+ /* Whole. */
+ n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
+ i += n;
+ if (i >= buflen)
+ return;
+ mult = 1;
+ for (j = 0; j < n; j++)
+ mult *= 10;
+
+ /* Decimal. */
+ n = malloc_snprintf(&buf[i], buflen-i, ".");
+ i += n;
+
+ /* Fraction. */
+ while (i < buflen-1) {
+ uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10
+ >= 5)) ? 1 : 0;
+ n = malloc_snprintf(&buf[i], buflen-i,
+ "%"FMTu64, (t0 * mult / t1) % 10 + round);
+ i += n;
+ mult *= 10;
+ }
+}
diff --git a/deps/jemalloc/test/stress/microbench.c b/deps/jemalloc/test/stress/microbench.c
new file mode 100644
index 000000000..ee39fea7f
--- /dev/null
+++ b/deps/jemalloc/test/stress/microbench.c
@@ -0,0 +1,181 @@
+#include "test/jemalloc_test.h"
+
+JEMALLOC_INLINE_C void
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void))
+{
+ uint64_t i;
+
+ for (i = 0; i < nwarmup; i++)
+ func();
+ timer_start(timer);
+ for (i = 0; i < niter; i++)
+ func();
+ timer_stop(timer);
+}
+
+void
+compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
+ void (*func_a), const char *name_b, void (*func_b))
+{
+ timedelta_t timer_a, timer_b;
+ char ratio_buf[6];
+ void *p;
+
+ p = mallocx(1, 0);
+ if (p == NULL) {
+ test_fail("Unexpected mallocx() failure");
+ return;
+ }
+
+ time_func(&timer_a, nwarmup, niter, func_a);
+ time_func(&timer_b, nwarmup, niter, func_b);
+
+ timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
+ malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
+ "%s=%"FMTu64"us, ratio=1:%s\n",
+ niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
+ ratio_buf);
+
+ dallocx(p, 0);
+}
+
+static void
+malloc_free(void)
+{
+ /* The compiler can optimize away free(malloc(1))! */
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ free(p);
+}
+
+static void
+mallocx_free(void)
+{
+ void *p = mallocx(1, 0);
+ if (p == NULL) {
+ test_fail("Unexpected mallocx() failure");
+ return;
+ }
+ free(p);
+}
+
+TEST_BEGIN(test_malloc_vs_mallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
+ malloc_free, "mallocx", mallocx_free);
+}
+TEST_END
+
+static void
+malloc_dallocx(void)
+{
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ dallocx(p, 0);
+}
+
+static void
+malloc_sdallocx(void)
+{
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ sdallocx(p, 1, 0);
+}
+
+TEST_BEGIN(test_free_vs_dallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
+ "dallocx", malloc_dallocx);
+}
+TEST_END
+
+TEST_BEGIN(test_dallocx_vs_sdallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
+ "sdallocx", malloc_sdallocx);
+}
+TEST_END
+
+static void
+malloc_mus_free(void)
+{
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ malloc_usable_size(p);
+ free(p);
+}
+
+static void
+malloc_sallocx_free(void)
+{
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ if (sallocx(p, 0) < 1)
+ test_fail("Unexpected sallocx() failure");
+ free(p);
+}
+
+TEST_BEGIN(test_mus_vs_sallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
+ malloc_mus_free, "sallocx", malloc_sallocx_free);
+}
+TEST_END
+
+static void
+malloc_nallocx_free(void)
+{
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ if (nallocx(1, 0) < 1)
+ test_fail("Unexpected nallocx() failure");
+ free(p);
+}
+
+TEST_BEGIN(test_sallocx_vs_nallocx)
+{
+
+ compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
+ malloc_sallocx_free, "nallocx", malloc_nallocx_free);
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_malloc_vs_mallocx,
+ test_free_vs_dallocx,
+ test_dallocx_vs_sdallocx,
+ test_mus_vs_sallocx,
+ test_sallocx_vs_nallocx));
+}
diff --git a/deps/jemalloc/test/unit/SFMT.c b/deps/jemalloc/test/unit/SFMT.c
index c57bd68df..ba4be8702 100644
--- a/deps/jemalloc/test/unit/SFMT.c
+++ b/deps/jemalloc/test/unit/SFMT.c
@@ -445,1008 +445,1008 @@ static const uint32_t init_by_array_32_expected[] = {
2750138839U, 3518055702U, 733072558U, 4169325400U, 788493625U
};
static const uint64_t init_gen_rand_64_expected[] = {
- QU(16924766246869039260LLU), QU( 8201438687333352714LLU),
- QU( 2265290287015001750LLU), QU(18397264611805473832LLU),
- QU( 3375255223302384358LLU), QU( 6345559975416828796LLU),
- QU(18229739242790328073LLU), QU( 7596792742098800905LLU),
- QU( 255338647169685981LLU), QU( 2052747240048610300LLU),
- QU(18328151576097299343LLU), QU(12472905421133796567LLU),
- QU(11315245349717600863LLU), QU(16594110197775871209LLU),
- QU(15708751964632456450LLU), QU(10452031272054632535LLU),
- QU(11097646720811454386LLU), QU( 4556090668445745441LLU),
- QU(17116187693090663106LLU), QU(14931526836144510645LLU),
- QU( 9190752218020552591LLU), QU( 9625800285771901401LLU),
- QU(13995141077659972832LLU), QU( 5194209094927829625LLU),
- QU( 4156788379151063303LLU), QU( 8523452593770139494LLU),
- QU(14082382103049296727LLU), QU( 2462601863986088483LLU),
- QU( 3030583461592840678LLU), QU( 5221622077872827681LLU),
- QU( 3084210671228981236LLU), QU(13956758381389953823LLU),
- QU(13503889856213423831LLU), QU(15696904024189836170LLU),
- QU( 4612584152877036206LLU), QU( 6231135538447867881LLU),
- QU(10172457294158869468LLU), QU( 6452258628466708150LLU),
- QU(14044432824917330221LLU), QU( 370168364480044279LLU),
- QU(10102144686427193359LLU), QU( 667870489994776076LLU),
- QU( 2732271956925885858LLU), QU(18027788905977284151LLU),
- QU(15009842788582923859LLU), QU( 7136357960180199542LLU),
- QU(15901736243475578127LLU), QU(16951293785352615701LLU),
- QU(10551492125243691632LLU), QU(17668869969146434804LLU),
- QU(13646002971174390445LLU), QU( 9804471050759613248LLU),
- QU( 5511670439655935493LLU), QU(18103342091070400926LLU),
- QU(17224512747665137533LLU), QU(15534627482992618168LLU),
- QU( 1423813266186582647LLU), QU(15821176807932930024LLU),
- QU( 30323369733607156LLU), QU(11599382494723479403LLU),
- QU( 653856076586810062LLU), QU( 3176437395144899659LLU),
- QU(14028076268147963917LLU), QU(16156398271809666195LLU),
- QU( 3166955484848201676LLU), QU( 5746805620136919390LLU),
- QU(17297845208891256593LLU), QU(11691653183226428483LLU),
- QU(17900026146506981577LLU), QU(15387382115755971042LLU),
- QU(16923567681040845943LLU), QU( 8039057517199388606LLU),
- QU(11748409241468629263LLU), QU( 794358245539076095LLU),
- QU(13438501964693401242LLU), QU(14036803236515618962LLU),
- QU( 5252311215205424721LLU), QU(17806589612915509081LLU),
- QU( 6802767092397596006LLU), QU(14212120431184557140LLU),
- QU( 1072951366761385712LLU), QU(13098491780722836296LLU),
- QU( 9466676828710797353LLU), QU(12673056849042830081LLU),
- QU(12763726623645357580LLU), QU(16468961652999309493LLU),
- QU(15305979875636438926LLU), QU(17444713151223449734LLU),
- QU( 5692214267627883674LLU), QU(13049589139196151505LLU),
- QU( 880115207831670745LLU), QU( 1776529075789695498LLU),
- QU(16695225897801466485LLU), QU(10666901778795346845LLU),
- QU( 6164389346722833869LLU), QU( 2863817793264300475LLU),
- QU( 9464049921886304754LLU), QU( 3993566636740015468LLU),
- QU( 9983749692528514136LLU), QU(16375286075057755211LLU),
- QU(16042643417005440820LLU), QU(11445419662923489877LLU),
- QU( 7999038846885158836LLU), QU( 6721913661721511535LLU),
- QU( 5363052654139357320LLU), QU( 1817788761173584205LLU),
- QU(13290974386445856444LLU), QU( 4650350818937984680LLU),
- QU( 8219183528102484836LLU), QU( 1569862923500819899LLU),
- QU( 4189359732136641860LLU), QU(14202822961683148583LLU),
- QU( 4457498315309429058LLU), QU(13089067387019074834LLU),
- QU(11075517153328927293LLU), QU(10277016248336668389LLU),
- QU( 7070509725324401122LLU), QU(17808892017780289380LLU),
- QU(13143367339909287349LLU), QU( 1377743745360085151LLU),
- QU( 5749341807421286485LLU), QU(14832814616770931325LLU),
- QU( 7688820635324359492LLU), QU(10960474011539770045LLU),
- QU( 81970066653179790LLU), QU(12619476072607878022LLU),
- QU( 4419566616271201744LLU), QU(15147917311750568503LLU),
- QU( 5549739182852706345LLU), QU( 7308198397975204770LLU),
- QU(13580425496671289278LLU), QU(17070764785210130301LLU),
- QU( 8202832846285604405LLU), QU( 6873046287640887249LLU),
- QU( 6927424434308206114LLU), QU( 6139014645937224874LLU),
- QU(10290373645978487639LLU), QU(15904261291701523804LLU),
- QU( 9628743442057826883LLU), QU(18383429096255546714LLU),
- QU( 4977413265753686967LLU), QU( 7714317492425012869LLU),
- QU( 9025232586309926193LLU), QU(14627338359776709107LLU),
- QU(14759849896467790763LLU), QU(10931129435864423252LLU),
- QU( 4588456988775014359LLU), QU(10699388531797056724LLU),
- QU( 468652268869238792LLU), QU( 5755943035328078086LLU),
- QU( 2102437379988580216LLU), QU( 9986312786506674028LLU),
- QU( 2654207180040945604LLU), QU( 8726634790559960062LLU),
- QU( 100497234871808137LLU), QU( 2800137176951425819LLU),
- QU( 6076627612918553487LLU), QU( 5780186919186152796LLU),
- QU( 8179183595769929098LLU), QU( 6009426283716221169LLU),
- QU( 2796662551397449358LLU), QU( 1756961367041986764LLU),
- QU( 6972897917355606205LLU), QU(14524774345368968243LLU),
- QU( 2773529684745706940LLU), QU( 4853632376213075959LLU),
- QU( 4198177923731358102LLU), QU( 8271224913084139776LLU),
- QU( 2741753121611092226LLU), QU(16782366145996731181LLU),
- QU(15426125238972640790LLU), QU(13595497100671260342LLU),
- QU( 3173531022836259898LLU), QU( 6573264560319511662LLU),
- QU(18041111951511157441LLU), QU( 2351433581833135952LLU),
- QU( 3113255578908173487LLU), QU( 1739371330877858784LLU),
- QU(16046126562789165480LLU), QU( 8072101652214192925LLU),
- QU(15267091584090664910LLU), QU( 9309579200403648940LLU),
- QU( 5218892439752408722LLU), QU(14492477246004337115LLU),
- QU(17431037586679770619LLU), QU( 7385248135963250480LLU),
- QU( 9580144956565560660LLU), QU( 4919546228040008720LLU),
- QU(15261542469145035584LLU), QU(18233297270822253102LLU),
- QU( 5453248417992302857LLU), QU( 9309519155931460285LLU),
- QU(10342813012345291756LLU), QU(15676085186784762381LLU),
- QU(15912092950691300645LLU), QU( 9371053121499003195LLU),
- QU( 9897186478226866746LLU), QU(14061858287188196327LLU),
- QU( 122575971620788119LLU), QU(12146750969116317754LLU),
- QU( 4438317272813245201LLU), QU( 8332576791009527119LLU),
- QU(13907785691786542057LLU), QU(10374194887283287467LLU),
- QU( 2098798755649059566LLU), QU( 3416235197748288894LLU),
- QU( 8688269957320773484LLU), QU( 7503964602397371571LLU),
- QU(16724977015147478236LLU), QU( 9461512855439858184LLU),
- QU(13259049744534534727LLU), QU( 3583094952542899294LLU),
- QU( 8764245731305528292LLU), QU(13240823595462088985LLU),
- QU(13716141617617910448LLU), QU(18114969519935960955LLU),
- QU( 2297553615798302206LLU), QU( 4585521442944663362LLU),
- QU(17776858680630198686LLU), QU( 4685873229192163363LLU),
- QU( 152558080671135627LLU), QU(15424900540842670088LLU),
- QU(13229630297130024108LLU), QU(17530268788245718717LLU),
- QU(16675633913065714144LLU), QU( 3158912717897568068LLU),
- QU(15399132185380087288LLU), QU( 7401418744515677872LLU),
- QU(13135412922344398535LLU), QU( 6385314346100509511LLU),
- QU(13962867001134161139LLU), QU(10272780155442671999LLU),
- QU(12894856086597769142LLU), QU(13340877795287554994LLU),
- QU(12913630602094607396LLU), QU(12543167911119793857LLU),
- QU(17343570372251873096LLU), QU(10959487764494150545LLU),
- QU( 6966737953093821128LLU), QU(13780699135496988601LLU),
- QU( 4405070719380142046LLU), QU(14923788365607284982LLU),
- QU( 2869487678905148380LLU), QU( 6416272754197188403LLU),
- QU(15017380475943612591LLU), QU( 1995636220918429487LLU),
- QU( 3402016804620122716LLU), QU(15800188663407057080LLU),
- QU(11362369990390932882LLU), QU(15262183501637986147LLU),
- QU(10239175385387371494LLU), QU( 9352042420365748334LLU),
- QU( 1682457034285119875LLU), QU( 1724710651376289644LLU),
- QU( 2038157098893817966LLU), QU( 9897825558324608773LLU),
- QU( 1477666236519164736LLU), QU(16835397314511233640LLU),
- QU(10370866327005346508LLU), QU(10157504370660621982LLU),
- QU(12113904045335882069LLU), QU(13326444439742783008LLU),
- QU(11302769043000765804LLU), QU(13594979923955228484LLU),
- QU(11779351762613475968LLU), QU( 3786101619539298383LLU),
- QU( 8021122969180846063LLU), QU(15745904401162500495LLU),
- QU(10762168465993897267LLU), QU(13552058957896319026LLU),
- QU(11200228655252462013LLU), QU( 5035370357337441226LLU),
- QU( 7593918984545500013LLU), QU( 5418554918361528700LLU),
- QU( 4858270799405446371LLU), QU( 9974659566876282544LLU),
- QU(18227595922273957859LLU), QU( 2772778443635656220LLU),
- QU(14285143053182085385LLU), QU( 9939700992429600469LLU),
- QU(12756185904545598068LLU), QU( 2020783375367345262LLU),
- QU( 57026775058331227LLU), QU( 950827867930065454LLU),
- QU( 6602279670145371217LLU), QU( 2291171535443566929LLU),
- QU( 5832380724425010313LLU), QU( 1220343904715982285LLU),
- QU(17045542598598037633LLU), QU(15460481779702820971LLU),
- QU(13948388779949365130LLU), QU(13975040175430829518LLU),
- QU(17477538238425541763LLU), QU(11104663041851745725LLU),
- QU(15860992957141157587LLU), QU(14529434633012950138LLU),
- QU( 2504838019075394203LLU), QU( 7512113882611121886LLU),
- QU( 4859973559980886617LLU), QU( 1258601555703250219LLU),
- QU(15594548157514316394LLU), QU( 4516730171963773048LLU),
- QU(11380103193905031983LLU), QU( 6809282239982353344LLU),
- QU(18045256930420065002LLU), QU( 2453702683108791859LLU),
- QU( 977214582986981460LLU), QU( 2006410402232713466LLU),
- QU( 6192236267216378358LLU), QU( 3429468402195675253LLU),
- QU(18146933153017348921LLU), QU(17369978576367231139LLU),
- QU( 1246940717230386603LLU), QU(11335758870083327110LLU),
- QU(14166488801730353682LLU), QU( 9008573127269635732LLU),
- QU(10776025389820643815LLU), QU(15087605441903942962LLU),
- QU( 1359542462712147922LLU), QU(13898874411226454206LLU),
- QU(17911176066536804411LLU), QU( 9435590428600085274LLU),
- QU( 294488509967864007LLU), QU( 8890111397567922046LLU),
- QU( 7987823476034328778LLU), QU(13263827582440967651LLU),
- QU( 7503774813106751573LLU), QU(14974747296185646837LLU),
- QU( 8504765037032103375LLU), QU(17340303357444536213LLU),
- QU( 7704610912964485743LLU), QU( 8107533670327205061LLU),
- QU( 9062969835083315985LLU), QU(16968963142126734184LLU),
- QU(12958041214190810180LLU), QU( 2720170147759570200LLU),
- QU( 2986358963942189566LLU), QU(14884226322219356580LLU),
- QU( 286224325144368520LLU), QU(11313800433154279797LLU),
- QU(18366849528439673248LLU), QU(17899725929482368789LLU),
- QU( 3730004284609106799LLU), QU( 1654474302052767205LLU),
- QU( 5006698007047077032LLU), QU( 8196893913601182838LLU),
- QU(15214541774425211640LLU), QU(17391346045606626073LLU),
- QU( 8369003584076969089LLU), QU( 3939046733368550293LLU),
- QU(10178639720308707785LLU), QU( 2180248669304388697LLU),
- QU( 62894391300126322LLU), QU( 9205708961736223191LLU),
- QU( 6837431058165360438LLU), QU( 3150743890848308214LLU),
- QU(17849330658111464583LLU), QU(12214815643135450865LLU),
- QU(13410713840519603402LLU), QU( 3200778126692046802LLU),
- QU(13354780043041779313LLU), QU( 800850022756886036LLU),
- QU(15660052933953067433LLU), QU( 6572823544154375676LLU),
- QU(11030281857015819266LLU), QU(12682241941471433835LLU),
- QU(11654136407300274693LLU), QU( 4517795492388641109LLU),
- QU( 9757017371504524244LLU), QU(17833043400781889277LLU),
- QU(12685085201747792227LLU), QU(10408057728835019573LLU),
- QU( 98370418513455221LLU), QU( 6732663555696848598LLU),
- QU(13248530959948529780LLU), QU( 3530441401230622826LLU),
- QU(18188251992895660615LLU), QU( 1847918354186383756LLU),
- QU( 1127392190402660921LLU), QU(11293734643143819463LLU),
- QU( 3015506344578682982LLU), QU(13852645444071153329LLU),
- QU( 2121359659091349142LLU), QU( 1294604376116677694LLU),
- QU( 5616576231286352318LLU), QU( 7112502442954235625LLU),
- QU(11676228199551561689LLU), QU(12925182803007305359LLU),
- QU( 7852375518160493082LLU), QU( 1136513130539296154LLU),
- QU( 5636923900916593195LLU), QU( 3221077517612607747LLU),
- QU(17784790465798152513LLU), QU( 3554210049056995938LLU),
- QU(17476839685878225874LLU), QU( 3206836372585575732LLU),
- QU( 2765333945644823430LLU), QU(10080070903718799528LLU),
- QU( 5412370818878286353LLU), QU( 9689685887726257728LLU),
- QU( 8236117509123533998LLU), QU( 1951139137165040214LLU),
- QU( 4492205209227980349LLU), QU(16541291230861602967LLU),
- QU( 1424371548301437940LLU), QU( 9117562079669206794LLU),
- QU(14374681563251691625LLU), QU(13873164030199921303LLU),
- QU( 6680317946770936731LLU), QU(15586334026918276214LLU),
- QU(10896213950976109802LLU), QU( 9506261949596413689LLU),
- QU( 9903949574308040616LLU), QU( 6038397344557204470LLU),
- QU( 174601465422373648LLU), QU(15946141191338238030LLU),
- QU(17142225620992044937LLU), QU( 7552030283784477064LLU),
- QU( 2947372384532947997LLU), QU( 510797021688197711LLU),
- QU( 4962499439249363461LLU), QU( 23770320158385357LLU),
- QU( 959774499105138124LLU), QU( 1468396011518788276LLU),
- QU( 2015698006852312308LLU), QU( 4149400718489980136LLU),
- QU( 5992916099522371188LLU), QU(10819182935265531076LLU),
- QU(16189787999192351131LLU), QU( 342833961790261950LLU),
- QU(12470830319550495336LLU), QU(18128495041912812501LLU),
- QU( 1193600899723524337LLU), QU( 9056793666590079770LLU),
- QU( 2154021227041669041LLU), QU( 4963570213951235735LLU),
- QU( 4865075960209211409LLU), QU( 2097724599039942963LLU),
- QU( 2024080278583179845LLU), QU(11527054549196576736LLU),
- QU(10650256084182390252LLU), QU( 4808408648695766755LLU),
- QU( 1642839215013788844LLU), QU(10607187948250398390LLU),
- QU( 7076868166085913508LLU), QU( 730522571106887032LLU),
- QU(12500579240208524895LLU), QU( 4484390097311355324LLU),
- QU(15145801330700623870LLU), QU( 8055827661392944028LLU),
- QU( 5865092976832712268LLU), QU(15159212508053625143LLU),
- QU( 3560964582876483341LLU), QU( 4070052741344438280LLU),
- QU( 6032585709886855634LLU), QU(15643262320904604873LLU),
- QU( 2565119772293371111LLU), QU( 318314293065348260LLU),
- QU(15047458749141511872LLU), QU( 7772788389811528730LLU),
- QU( 7081187494343801976LLU), QU( 6465136009467253947LLU),
- QU(10425940692543362069LLU), QU( 554608190318339115LLU),
- QU(14796699860302125214LLU), QU( 1638153134431111443LLU),
- QU(10336967447052276248LLU), QU( 8412308070396592958LLU),
- QU( 4004557277152051226LLU), QU( 8143598997278774834LLU),
- QU(16413323996508783221LLU), QU(13139418758033994949LLU),
- QU( 9772709138335006667LLU), QU( 2818167159287157659LLU),
- QU(17091740573832523669LLU), QU(14629199013130751608LLU),
- QU(18268322711500338185LLU), QU( 8290963415675493063LLU),
- QU( 8830864907452542588LLU), QU( 1614839084637494849LLU),
- QU(14855358500870422231LLU), QU( 3472996748392519937LLU),
- QU(15317151166268877716LLU), QU( 5825895018698400362LLU),
- QU(16730208429367544129LLU), QU(10481156578141202800LLU),
- QU( 4746166512382823750LLU), QU(12720876014472464998LLU),
- QU( 8825177124486735972LLU), QU(13733447296837467838LLU),
- QU( 6412293741681359625LLU), QU( 8313213138756135033LLU),
- QU(11421481194803712517LLU), QU( 7997007691544174032LLU),
- QU( 6812963847917605930LLU), QU( 9683091901227558641LLU),
- QU(14703594165860324713LLU), QU( 1775476144519618309LLU),
- QU( 2724283288516469519LLU), QU( 717642555185856868LLU),
- QU( 8736402192215092346LLU), QU(11878800336431381021LLU),
- QU( 4348816066017061293LLU), QU( 6115112756583631307LLU),
- QU( 9176597239667142976LLU), QU(12615622714894259204LLU),
- QU(10283406711301385987LLU), QU( 5111762509485379420LLU),
- QU( 3118290051198688449LLU), QU( 7345123071632232145LLU),
- QU( 9176423451688682359LLU), QU( 4843865456157868971LLU),
- QU(12008036363752566088LLU), QU(12058837181919397720LLU),
- QU( 2145073958457347366LLU), QU( 1526504881672818067LLU),
- QU( 3488830105567134848LLU), QU(13208362960674805143LLU),
- QU( 4077549672899572192LLU), QU( 7770995684693818365LLU),
- QU( 1398532341546313593LLU), QU(12711859908703927840LLU),
- QU( 1417561172594446813LLU), QU(17045191024194170604LLU),
- QU( 4101933177604931713LLU), QU(14708428834203480320LLU),
- QU(17447509264469407724LLU), QU(14314821973983434255LLU),
- QU(17990472271061617265LLU), QU( 5087756685841673942LLU),
- QU(12797820586893859939LLU), QU( 1778128952671092879LLU),
- QU( 3535918530508665898LLU), QU( 9035729701042481301LLU),
- QU(14808661568277079962LLU), QU(14587345077537747914LLU),
- QU(11920080002323122708LLU), QU( 6426515805197278753LLU),
- QU( 3295612216725984831LLU), QU(11040722532100876120LLU),
- QU(12305952936387598754LLU), QU(16097391899742004253LLU),
- QU( 4908537335606182208LLU), QU(12446674552196795504LLU),
- QU(16010497855816895177LLU), QU( 9194378874788615551LLU),
- QU( 3382957529567613384LLU), QU( 5154647600754974077LLU),
- QU( 9801822865328396141LLU), QU( 9023662173919288143LLU),
- QU(17623115353825147868LLU), QU( 8238115767443015816LLU),
- QU(15811444159859002560LLU), QU( 9085612528904059661LLU),
- QU( 6888601089398614254LLU), QU( 258252992894160189LLU),
- QU( 6704363880792428622LLU), QU( 6114966032147235763LLU),
- QU(11075393882690261875LLU), QU( 8797664238933620407LLU),
- QU( 5901892006476726920LLU), QU( 5309780159285518958LLU),
- QU(14940808387240817367LLU), QU(14642032021449656698LLU),
- QU( 9808256672068504139LLU), QU( 3670135111380607658LLU),
- QU(11211211097845960152LLU), QU( 1474304506716695808LLU),
- QU(15843166204506876239LLU), QU( 7661051252471780561LLU),
- QU(10170905502249418476LLU), QU( 7801416045582028589LLU),
- QU( 2763981484737053050LLU), QU( 9491377905499253054LLU),
- QU(16201395896336915095LLU), QU( 9256513756442782198LLU),
- QU( 5411283157972456034LLU), QU( 5059433122288321676LLU),
- QU( 4327408006721123357LLU), QU( 9278544078834433377LLU),
- QU( 7601527110882281612LLU), QU(11848295896975505251LLU),
- QU(12096998801094735560LLU), QU(14773480339823506413LLU),
- QU(15586227433895802149LLU), QU(12786541257830242872LLU),
- QU( 6904692985140503067LLU), QU( 5309011515263103959LLU),
- QU(12105257191179371066LLU), QU(14654380212442225037LLU),
- QU( 2556774974190695009LLU), QU( 4461297399927600261LLU),
- QU(14888225660915118646LLU), QU(14915459341148291824LLU),
- QU( 2738802166252327631LLU), QU( 6047155789239131512LLU),
- QU(12920545353217010338LLU), QU(10697617257007840205LLU),
- QU( 2751585253158203504LLU), QU(13252729159780047496LLU),
- QU(14700326134672815469LLU), QU(14082527904374600529LLU),
- QU(16852962273496542070LLU), QU(17446675504235853907LLU),
- QU(15019600398527572311LLU), QU(12312781346344081551LLU),
- QU(14524667935039810450LLU), QU( 5634005663377195738LLU),
- QU(11375574739525000569LLU), QU( 2423665396433260040LLU),
- QU( 5222836914796015410LLU), QU( 4397666386492647387LLU),
- QU( 4619294441691707638LLU), QU( 665088602354770716LLU),
- QU(13246495665281593610LLU), QU( 6564144270549729409LLU),
- QU(10223216188145661688LLU), QU( 3961556907299230585LLU),
- QU(11543262515492439914LLU), QU(16118031437285993790LLU),
- QU( 7143417964520166465LLU), QU(13295053515909486772LLU),
- QU( 40434666004899675LLU), QU(17127804194038347164LLU),
- QU( 8599165966560586269LLU), QU( 8214016749011284903LLU),
- QU(13725130352140465239LLU), QU( 5467254474431726291LLU),
- QU( 7748584297438219877LLU), QU(16933551114829772472LLU),
- QU( 2169618439506799400LLU), QU( 2169787627665113463LLU),
- QU(17314493571267943764LLU), QU(18053575102911354912LLU),
- QU(11928303275378476973LLU), QU(11593850925061715550LLU),
- QU(17782269923473589362LLU), QU( 3280235307704747039LLU),
- QU( 6145343578598685149LLU), QU(17080117031114086090LLU),
- QU(18066839902983594755LLU), QU( 6517508430331020706LLU),
- QU( 8092908893950411541LLU), QU(12558378233386153732LLU),
- QU( 4476532167973132976LLU), QU(16081642430367025016LLU),
- QU( 4233154094369139361LLU), QU( 8693630486693161027LLU),
- QU(11244959343027742285LLU), QU(12273503967768513508LLU),
- QU(14108978636385284876LLU), QU( 7242414665378826984LLU),
- QU( 6561316938846562432LLU), QU( 8601038474994665795LLU),
- QU(17532942353612365904LLU), QU(17940076637020912186LLU),
- QU( 7340260368823171304LLU), QU( 7061807613916067905LLU),
- QU(10561734935039519326LLU), QU(17990796503724650862LLU),
- QU( 6208732943911827159LLU), QU( 359077562804090617LLU),
- QU(14177751537784403113LLU), QU(10659599444915362902LLU),
- QU(15081727220615085833LLU), QU(13417573895659757486LLU),
- QU(15513842342017811524LLU), QU(11814141516204288231LLU),
- QU( 1827312513875101814LLU), QU( 2804611699894603103LLU),
- QU(17116500469975602763LLU), QU(12270191815211952087LLU),
- QU(12256358467786024988LLU), QU(18435021722453971267LLU),
- QU( 671330264390865618LLU), QU( 476504300460286050LLU),
- QU(16465470901027093441LLU), QU( 4047724406247136402LLU),
- QU( 1322305451411883346LLU), QU( 1388308688834322280LLU),
- QU( 7303989085269758176LLU), QU( 9323792664765233642LLU),
- QU( 4542762575316368936LLU), QU(17342696132794337618LLU),
- QU( 4588025054768498379LLU), QU(13415475057390330804LLU),
- QU(17880279491733405570LLU), QU(10610553400618620353LLU),
- QU( 3180842072658960139LLU), QU(13002966655454270120LLU),
- QU( 1665301181064982826LLU), QU( 7083673946791258979LLU),
- QU( 190522247122496820LLU), QU(17388280237250677740LLU),
- QU( 8430770379923642945LLU), QU(12987180971921668584LLU),
- QU( 2311086108365390642LLU), QU( 2870984383579822345LLU),
- QU(14014682609164653318LLU), QU(14467187293062251484LLU),
- QU( 192186361147413298LLU), QU(15171951713531796524LLU),
- QU( 9900305495015948728LLU), QU(17958004775615466344LLU),
- QU(14346380954498606514LLU), QU(18040047357617407096LLU),
- QU( 5035237584833424532LLU), QU(15089555460613972287LLU),
- QU( 4131411873749729831LLU), QU( 1329013581168250330LLU),
- QU(10095353333051193949LLU), QU(10749518561022462716LLU),
- QU( 9050611429810755847LLU), QU(15022028840236655649LLU),
- QU( 8775554279239748298LLU), QU(13105754025489230502LLU),
- QU(15471300118574167585LLU), QU( 89864764002355628LLU),
- QU( 8776416323420466637LLU), QU( 5280258630612040891LLU),
- QU( 2719174488591862912LLU), QU( 7599309137399661994LLU),
- QU(15012887256778039979LLU), QU(14062981725630928925LLU),
- QU(12038536286991689603LLU), QU( 7089756544681775245LLU),
- QU(10376661532744718039LLU), QU( 1265198725901533130LLU),
- QU(13807996727081142408LLU), QU( 2935019626765036403LLU),
- QU( 7651672460680700141LLU), QU( 3644093016200370795LLU),
- QU( 2840982578090080674LLU), QU(17956262740157449201LLU),
- QU(18267979450492880548LLU), QU(11799503659796848070LLU),
- QU( 9942537025669672388LLU), QU(11886606816406990297LLU),
- QU( 5488594946437447576LLU), QU( 7226714353282744302LLU),
- QU( 3784851653123877043LLU), QU( 878018453244803041LLU),
- QU(12110022586268616085LLU), QU( 734072179404675123LLU),
- QU(11869573627998248542LLU), QU( 469150421297783998LLU),
- QU( 260151124912803804LLU), QU(11639179410120968649LLU),
- QU( 9318165193840846253LLU), QU(12795671722734758075LLU),
- QU(15318410297267253933LLU), QU( 691524703570062620LLU),
- QU( 5837129010576994601LLU), QU(15045963859726941052LLU),
- QU( 5850056944932238169LLU), QU(12017434144750943807LLU),
- QU( 7447139064928956574LLU), QU( 3101711812658245019LLU),
- QU(16052940704474982954LLU), QU(18195745945986994042LLU),
- QU( 8932252132785575659LLU), QU(13390817488106794834LLU),
- QU(11582771836502517453LLU), QU( 4964411326683611686LLU),
- QU( 2195093981702694011LLU), QU(14145229538389675669LLU),
- QU(16459605532062271798LLU), QU( 866316924816482864LLU),
- QU( 4593041209937286377LLU), QU( 8415491391910972138LLU),
- QU( 4171236715600528969LLU), QU(16637569303336782889LLU),
- QU( 2002011073439212680LLU), QU(17695124661097601411LLU),
- QU( 4627687053598611702LLU), QU( 7895831936020190403LLU),
- QU( 8455951300917267802LLU), QU( 2923861649108534854LLU),
- QU( 8344557563927786255LLU), QU( 6408671940373352556LLU),
- QU(12210227354536675772LLU), QU(14294804157294222295LLU),
- QU(10103022425071085127LLU), QU(10092959489504123771LLU),
- QU( 6554774405376736268LLU), QU(12629917718410641774LLU),
- QU( 6260933257596067126LLU), QU( 2460827021439369673LLU),
- QU( 2541962996717103668LLU), QU( 597377203127351475LLU),
- QU( 5316984203117315309LLU), QU( 4811211393563241961LLU),
- QU(13119698597255811641LLU), QU( 8048691512862388981LLU),
- QU(10216818971194073842LLU), QU( 4612229970165291764LLU),
- QU(10000980798419974770LLU), QU( 6877640812402540687LLU),
- QU( 1488727563290436992LLU), QU( 2227774069895697318LLU),
- QU(11237754507523316593LLU), QU(13478948605382290972LLU),
- QU( 1963583846976858124LLU), QU( 5512309205269276457LLU),
- QU( 3972770164717652347LLU), QU( 3841751276198975037LLU),
- QU(10283343042181903117LLU), QU( 8564001259792872199LLU),
- QU(16472187244722489221LLU), QU( 8953493499268945921LLU),
- QU( 3518747340357279580LLU), QU( 4003157546223963073LLU),
- QU( 3270305958289814590LLU), QU( 3966704458129482496LLU),
- QU( 8122141865926661939LLU), QU(14627734748099506653LLU),
- QU(13064426990862560568LLU), QU( 2414079187889870829LLU),
- QU( 5378461209354225306LLU), QU(10841985740128255566LLU),
- QU( 538582442885401738LLU), QU( 7535089183482905946LLU),
- QU(16117559957598879095LLU), QU( 8477890721414539741LLU),
- QU( 1459127491209533386LLU), QU(17035126360733620462LLU),
- QU( 8517668552872379126LLU), QU(10292151468337355014LLU),
- QU(17081267732745344157LLU), QU(13751455337946087178LLU),
- QU(14026945459523832966LLU), QU( 6653278775061723516LLU),
- QU(10619085543856390441LLU), QU( 2196343631481122885LLU),
- QU(10045966074702826136LLU), QU(10082317330452718282LLU),
- QU( 5920859259504831242LLU), QU( 9951879073426540617LLU),
- QU( 7074696649151414158LLU), QU(15808193543879464318LLU),
- QU( 7385247772746953374LLU), QU( 3192003544283864292LLU),
- QU(18153684490917593847LLU), QU(12423498260668568905LLU),
- QU(10957758099756378169LLU), QU(11488762179911016040LLU),
- QU( 2099931186465333782LLU), QU(11180979581250294432LLU),
- QU( 8098916250668367933LLU), QU( 3529200436790763465LLU),
- QU(12988418908674681745LLU), QU( 6147567275954808580LLU),
- QU( 3207503344604030989LLU), QU(10761592604898615360LLU),
- QU( 229854861031893504LLU), QU( 8809853962667144291LLU),
- QU(13957364469005693860LLU), QU( 7634287665224495886LLU),
- QU(12353487366976556874LLU), QU( 1134423796317152034LLU),
- QU( 2088992471334107068LLU), QU( 7393372127190799698LLU),
- QU( 1845367839871058391LLU), QU( 207922563987322884LLU),
- QU(11960870813159944976LLU), QU(12182120053317317363LLU),
- QU(17307358132571709283LLU), QU(13871081155552824936LLU),
- QU(18304446751741566262LLU), QU( 7178705220184302849LLU),
- QU(10929605677758824425LLU), QU(16446976977835806844LLU),
- QU(13723874412159769044LLU), QU( 6942854352100915216LLU),
- QU( 1726308474365729390LLU), QU( 2150078766445323155LLU),
- QU(15345558947919656626LLU), QU(12145453828874527201LLU),
- QU( 2054448620739726849LLU), QU( 2740102003352628137LLU),
- QU(11294462163577610655LLU), QU( 756164283387413743LLU),
- QU(17841144758438810880LLU), QU(10802406021185415861LLU),
- QU( 8716455530476737846LLU), QU( 6321788834517649606LLU),
- QU(14681322910577468426LLU), QU(17330043563884336387LLU),
- QU(12701802180050071614LLU), QU(14695105111079727151LLU),
- QU( 5112098511654172830LLU), QU( 4957505496794139973LLU),
- QU( 8270979451952045982LLU), QU(12307685939199120969LLU),
- QU(12425799408953443032LLU), QU( 8376410143634796588LLU),
- QU(16621778679680060464LLU), QU( 3580497854566660073LLU),
- QU( 1122515747803382416LLU), QU( 857664980960597599LLU),
- QU( 6343640119895925918LLU), QU(12878473260854462891LLU),
- QU(10036813920765722626LLU), QU(14451335468363173812LLU),
- QU( 5476809692401102807LLU), QU(16442255173514366342LLU),
- QU(13060203194757167104LLU), QU(14354124071243177715LLU),
- QU(15961249405696125227LLU), QU(13703893649690872584LLU),
- QU( 363907326340340064LLU), QU( 6247455540491754842LLU),
- QU(12242249332757832361LLU), QU( 156065475679796717LLU),
- QU( 9351116235749732355LLU), QU( 4590350628677701405LLU),
- QU( 1671195940982350389LLU), QU(13501398458898451905LLU),
- QU( 6526341991225002255LLU), QU( 1689782913778157592LLU),
- QU( 7439222350869010334LLU), QU(13975150263226478308LLU),
- QU(11411961169932682710LLU), QU(17204271834833847277LLU),
- QU( 541534742544435367LLU), QU( 6591191931218949684LLU),
- QU( 2645454775478232486LLU), QU( 4322857481256485321LLU),
- QU( 8477416487553065110LLU), QU(12902505428548435048LLU),
- QU( 971445777981341415LLU), QU(14995104682744976712LLU),
- QU( 4243341648807158063LLU), QU( 8695061252721927661LLU),
- QU( 5028202003270177222LLU), QU( 2289257340915567840LLU),
- QU(13870416345121866007LLU), QU(13994481698072092233LLU),
- QU( 6912785400753196481LLU), QU( 2278309315841980139LLU),
- QU( 4329765449648304839LLU), QU( 5963108095785485298LLU),
- QU( 4880024847478722478LLU), QU(16015608779890240947LLU),
- QU( 1866679034261393544LLU), QU( 914821179919731519LLU),
- QU( 9643404035648760131LLU), QU( 2418114953615593915LLU),
- QU( 944756836073702374LLU), QU(15186388048737296834LLU),
- QU( 7723355336128442206LLU), QU( 7500747479679599691LLU),
- QU(18013961306453293634LLU), QU( 2315274808095756456LLU),
- QU(13655308255424029566LLU), QU(17203800273561677098LLU),
- QU( 1382158694422087756LLU), QU( 5090390250309588976LLU),
- QU( 517170818384213989LLU), QU( 1612709252627729621LLU),
- QU( 1330118955572449606LLU), QU( 300922478056709885LLU),
- QU(18115693291289091987LLU), QU(13491407109725238321LLU),
- QU(15293714633593827320LLU), QU( 5151539373053314504LLU),
- QU( 5951523243743139207LLU), QU(14459112015249527975LLU),
- QU( 5456113959000700739LLU), QU( 3877918438464873016LLU),
- QU(12534071654260163555LLU), QU(15871678376893555041LLU),
- QU(11005484805712025549LLU), QU(16353066973143374252LLU),
- QU( 4358331472063256685LLU), QU( 8268349332210859288LLU),
- QU(12485161590939658075LLU), QU(13955993592854471343LLU),
- QU( 5911446886848367039LLU), QU(14925834086813706974LLU),
- QU( 6590362597857994805LLU), QU( 1280544923533661875LLU),
- QU( 1637756018947988164LLU), QU( 4734090064512686329LLU),
- QU(16693705263131485912LLU), QU( 6834882340494360958LLU),
- QU( 8120732176159658505LLU), QU( 2244371958905329346LLU),
- QU(10447499707729734021LLU), QU( 7318742361446942194LLU),
- QU( 8032857516355555296LLU), QU(14023605983059313116LLU),
- QU( 1032336061815461376LLU), QU( 9840995337876562612LLU),
- QU( 9869256223029203587LLU), QU(12227975697177267636LLU),
- QU(12728115115844186033LLU), QU( 7752058479783205470LLU),
- QU( 729733219713393087LLU), QU(12954017801239007622LLU)
+ KQU(16924766246869039260), KQU( 8201438687333352714),
+ KQU( 2265290287015001750), KQU(18397264611805473832),
+ KQU( 3375255223302384358), KQU( 6345559975416828796),
+ KQU(18229739242790328073), KQU( 7596792742098800905),
+ KQU( 255338647169685981), KQU( 2052747240048610300),
+ KQU(18328151576097299343), KQU(12472905421133796567),
+ KQU(11315245349717600863), KQU(16594110197775871209),
+ KQU(15708751964632456450), KQU(10452031272054632535),
+ KQU(11097646720811454386), KQU( 4556090668445745441),
+ KQU(17116187693090663106), KQU(14931526836144510645),
+ KQU( 9190752218020552591), KQU( 9625800285771901401),
+ KQU(13995141077659972832), KQU( 5194209094927829625),
+ KQU( 4156788379151063303), KQU( 8523452593770139494),
+ KQU(14082382103049296727), KQU( 2462601863986088483),
+ KQU( 3030583461592840678), KQU( 5221622077872827681),
+ KQU( 3084210671228981236), KQU(13956758381389953823),
+ KQU(13503889856213423831), KQU(15696904024189836170),
+ KQU( 4612584152877036206), KQU( 6231135538447867881),
+ KQU(10172457294158869468), KQU( 6452258628466708150),
+ KQU(14044432824917330221), KQU( 370168364480044279),
+ KQU(10102144686427193359), KQU( 667870489994776076),
+ KQU( 2732271956925885858), KQU(18027788905977284151),
+ KQU(15009842788582923859), KQU( 7136357960180199542),
+ KQU(15901736243475578127), KQU(16951293785352615701),
+ KQU(10551492125243691632), KQU(17668869969146434804),
+ KQU(13646002971174390445), KQU( 9804471050759613248),
+ KQU( 5511670439655935493), KQU(18103342091070400926),
+ KQU(17224512747665137533), KQU(15534627482992618168),
+ KQU( 1423813266186582647), KQU(15821176807932930024),
+ KQU( 30323369733607156), KQU(11599382494723479403),
+ KQU( 653856076586810062), KQU( 3176437395144899659),
+ KQU(14028076268147963917), KQU(16156398271809666195),
+ KQU( 3166955484848201676), KQU( 5746805620136919390),
+ KQU(17297845208891256593), KQU(11691653183226428483),
+ KQU(17900026146506981577), KQU(15387382115755971042),
+ KQU(16923567681040845943), KQU( 8039057517199388606),
+ KQU(11748409241468629263), KQU( 794358245539076095),
+ KQU(13438501964693401242), KQU(14036803236515618962),
+ KQU( 5252311215205424721), KQU(17806589612915509081),
+ KQU( 6802767092397596006), KQU(14212120431184557140),
+ KQU( 1072951366761385712), KQU(13098491780722836296),
+ KQU( 9466676828710797353), KQU(12673056849042830081),
+ KQU(12763726623645357580), KQU(16468961652999309493),
+ KQU(15305979875636438926), KQU(17444713151223449734),
+ KQU( 5692214267627883674), KQU(13049589139196151505),
+ KQU( 880115207831670745), KQU( 1776529075789695498),
+ KQU(16695225897801466485), KQU(10666901778795346845),
+ KQU( 6164389346722833869), KQU( 2863817793264300475),
+ KQU( 9464049921886304754), KQU( 3993566636740015468),
+ KQU( 9983749692528514136), KQU(16375286075057755211),
+ KQU(16042643417005440820), KQU(11445419662923489877),
+ KQU( 7999038846885158836), KQU( 6721913661721511535),
+ KQU( 5363052654139357320), KQU( 1817788761173584205),
+ KQU(13290974386445856444), KQU( 4650350818937984680),
+ KQU( 8219183528102484836), KQU( 1569862923500819899),
+ KQU( 4189359732136641860), KQU(14202822961683148583),
+ KQU( 4457498315309429058), KQU(13089067387019074834),
+ KQU(11075517153328927293), KQU(10277016248336668389),
+ KQU( 7070509725324401122), KQU(17808892017780289380),
+ KQU(13143367339909287349), KQU( 1377743745360085151),
+ KQU( 5749341807421286485), KQU(14832814616770931325),
+ KQU( 7688820635324359492), KQU(10960474011539770045),
+ KQU( 81970066653179790), KQU(12619476072607878022),
+ KQU( 4419566616271201744), KQU(15147917311750568503),
+ KQU( 5549739182852706345), KQU( 7308198397975204770),
+ KQU(13580425496671289278), KQU(17070764785210130301),
+ KQU( 8202832846285604405), KQU( 6873046287640887249),
+ KQU( 6927424434308206114), KQU( 6139014645937224874),
+ KQU(10290373645978487639), KQU(15904261291701523804),
+ KQU( 9628743442057826883), KQU(18383429096255546714),
+ KQU( 4977413265753686967), KQU( 7714317492425012869),
+ KQU( 9025232586309926193), KQU(14627338359776709107),
+ KQU(14759849896467790763), KQU(10931129435864423252),
+ KQU( 4588456988775014359), KQU(10699388531797056724),
+ KQU( 468652268869238792), KQU( 5755943035328078086),
+ KQU( 2102437379988580216), KQU( 9986312786506674028),
+ KQU( 2654207180040945604), KQU( 8726634790559960062),
+ KQU( 100497234871808137), KQU( 2800137176951425819),
+ KQU( 6076627612918553487), KQU( 5780186919186152796),
+ KQU( 8179183595769929098), KQU( 6009426283716221169),
+ KQU( 2796662551397449358), KQU( 1756961367041986764),
+ KQU( 6972897917355606205), KQU(14524774345368968243),
+ KQU( 2773529684745706940), KQU( 4853632376213075959),
+ KQU( 4198177923731358102), KQU( 8271224913084139776),
+ KQU( 2741753121611092226), KQU(16782366145996731181),
+ KQU(15426125238972640790), KQU(13595497100671260342),
+ KQU( 3173531022836259898), KQU( 6573264560319511662),
+ KQU(18041111951511157441), KQU( 2351433581833135952),
+ KQU( 3113255578908173487), KQU( 1739371330877858784),
+ KQU(16046126562789165480), KQU( 8072101652214192925),
+ KQU(15267091584090664910), KQU( 9309579200403648940),
+ KQU( 5218892439752408722), KQU(14492477246004337115),
+ KQU(17431037586679770619), KQU( 7385248135963250480),
+ KQU( 9580144956565560660), KQU( 4919546228040008720),
+ KQU(15261542469145035584), KQU(18233297270822253102),
+ KQU( 5453248417992302857), KQU( 9309519155931460285),
+ KQU(10342813012345291756), KQU(15676085186784762381),
+ KQU(15912092950691300645), KQU( 9371053121499003195),
+ KQU( 9897186478226866746), KQU(14061858287188196327),
+ KQU( 122575971620788119), KQU(12146750969116317754),
+ KQU( 4438317272813245201), KQU( 8332576791009527119),
+ KQU(13907785691786542057), KQU(10374194887283287467),
+ KQU( 2098798755649059566), KQU( 3416235197748288894),
+ KQU( 8688269957320773484), KQU( 7503964602397371571),
+ KQU(16724977015147478236), KQU( 9461512855439858184),
+ KQU(13259049744534534727), KQU( 3583094952542899294),
+ KQU( 8764245731305528292), KQU(13240823595462088985),
+ KQU(13716141617617910448), KQU(18114969519935960955),
+ KQU( 2297553615798302206), KQU( 4585521442944663362),
+ KQU(17776858680630198686), KQU( 4685873229192163363),
+ KQU( 152558080671135627), KQU(15424900540842670088),
+ KQU(13229630297130024108), KQU(17530268788245718717),
+ KQU(16675633913065714144), KQU( 3158912717897568068),
+ KQU(15399132185380087288), KQU( 7401418744515677872),
+ KQU(13135412922344398535), KQU( 6385314346100509511),
+ KQU(13962867001134161139), KQU(10272780155442671999),
+ KQU(12894856086597769142), KQU(13340877795287554994),
+ KQU(12913630602094607396), KQU(12543167911119793857),
+ KQU(17343570372251873096), KQU(10959487764494150545),
+ KQU( 6966737953093821128), KQU(13780699135496988601),
+ KQU( 4405070719380142046), KQU(14923788365607284982),
+ KQU( 2869487678905148380), KQU( 6416272754197188403),
+ KQU(15017380475943612591), KQU( 1995636220918429487),
+ KQU( 3402016804620122716), KQU(15800188663407057080),
+ KQU(11362369990390932882), KQU(15262183501637986147),
+ KQU(10239175385387371494), KQU( 9352042420365748334),
+ KQU( 1682457034285119875), KQU( 1724710651376289644),
+ KQU( 2038157098893817966), KQU( 9897825558324608773),
+ KQU( 1477666236519164736), KQU(16835397314511233640),
+ KQU(10370866327005346508), KQU(10157504370660621982),
+ KQU(12113904045335882069), KQU(13326444439742783008),
+ KQU(11302769043000765804), KQU(13594979923955228484),
+ KQU(11779351762613475968), KQU( 3786101619539298383),
+ KQU( 8021122969180846063), KQU(15745904401162500495),
+ KQU(10762168465993897267), KQU(13552058957896319026),
+ KQU(11200228655252462013), KQU( 5035370357337441226),
+ KQU( 7593918984545500013), KQU( 5418554918361528700),
+ KQU( 4858270799405446371), KQU( 9974659566876282544),
+ KQU(18227595922273957859), KQU( 2772778443635656220),
+ KQU(14285143053182085385), KQU( 9939700992429600469),
+ KQU(12756185904545598068), KQU( 2020783375367345262),
+ KQU( 57026775058331227), KQU( 950827867930065454),
+ KQU( 6602279670145371217), KQU( 2291171535443566929),
+ KQU( 5832380724425010313), KQU( 1220343904715982285),
+ KQU(17045542598598037633), KQU(15460481779702820971),
+ KQU(13948388779949365130), KQU(13975040175430829518),
+ KQU(17477538238425541763), KQU(11104663041851745725),
+ KQU(15860992957141157587), KQU(14529434633012950138),
+ KQU( 2504838019075394203), KQU( 7512113882611121886),
+ KQU( 4859973559980886617), KQU( 1258601555703250219),
+ KQU(15594548157514316394), KQU( 4516730171963773048),
+ KQU(11380103193905031983), KQU( 6809282239982353344),
+ KQU(18045256930420065002), KQU( 2453702683108791859),
+ KQU( 977214582986981460), KQU( 2006410402232713466),
+ KQU( 6192236267216378358), KQU( 3429468402195675253),
+ KQU(18146933153017348921), KQU(17369978576367231139),
+ KQU( 1246940717230386603), KQU(11335758870083327110),
+ KQU(14166488801730353682), KQU( 9008573127269635732),
+ KQU(10776025389820643815), KQU(15087605441903942962),
+ KQU( 1359542462712147922), KQU(13898874411226454206),
+ KQU(17911176066536804411), KQU( 9435590428600085274),
+ KQU( 294488509967864007), KQU( 8890111397567922046),
+ KQU( 7987823476034328778), KQU(13263827582440967651),
+ KQU( 7503774813106751573), KQU(14974747296185646837),
+ KQU( 8504765037032103375), KQU(17340303357444536213),
+ KQU( 7704610912964485743), KQU( 8107533670327205061),
+ KQU( 9062969835083315985), KQU(16968963142126734184),
+ KQU(12958041214190810180), KQU( 2720170147759570200),
+ KQU( 2986358963942189566), KQU(14884226322219356580),
+ KQU( 286224325144368520), KQU(11313800433154279797),
+ KQU(18366849528439673248), KQU(17899725929482368789),
+ KQU( 3730004284609106799), KQU( 1654474302052767205),
+ KQU( 5006698007047077032), KQU( 8196893913601182838),
+ KQU(15214541774425211640), KQU(17391346045606626073),
+ KQU( 8369003584076969089), KQU( 3939046733368550293),
+ KQU(10178639720308707785), KQU( 2180248669304388697),
+ KQU( 62894391300126322), KQU( 9205708961736223191),
+ KQU( 6837431058165360438), KQU( 3150743890848308214),
+ KQU(17849330658111464583), KQU(12214815643135450865),
+ KQU(13410713840519603402), KQU( 3200778126692046802),
+ KQU(13354780043041779313), KQU( 800850022756886036),
+ KQU(15660052933953067433), KQU( 6572823544154375676),
+ KQU(11030281857015819266), KQU(12682241941471433835),
+ KQU(11654136407300274693), KQU( 4517795492388641109),
+ KQU( 9757017371504524244), KQU(17833043400781889277),
+ KQU(12685085201747792227), KQU(10408057728835019573),
+ KQU( 98370418513455221), KQU( 6732663555696848598),
+ KQU(13248530959948529780), KQU( 3530441401230622826),
+ KQU(18188251992895660615), KQU( 1847918354186383756),
+ KQU( 1127392190402660921), KQU(11293734643143819463),
+ KQU( 3015506344578682982), KQU(13852645444071153329),
+ KQU( 2121359659091349142), KQU( 1294604376116677694),
+ KQU( 5616576231286352318), KQU( 7112502442954235625),
+ KQU(11676228199551561689), KQU(12925182803007305359),
+ KQU( 7852375518160493082), KQU( 1136513130539296154),
+ KQU( 5636923900916593195), KQU( 3221077517612607747),
+ KQU(17784790465798152513), KQU( 3554210049056995938),
+ KQU(17476839685878225874), KQU( 3206836372585575732),
+ KQU( 2765333945644823430), KQU(10080070903718799528),
+ KQU( 5412370818878286353), KQU( 9689685887726257728),
+ KQU( 8236117509123533998), KQU( 1951139137165040214),
+ KQU( 4492205209227980349), KQU(16541291230861602967),
+ KQU( 1424371548301437940), KQU( 9117562079669206794),
+ KQU(14374681563251691625), KQU(13873164030199921303),
+ KQU( 6680317946770936731), KQU(15586334026918276214),
+ KQU(10896213950976109802), KQU( 9506261949596413689),
+ KQU( 9903949574308040616), KQU( 6038397344557204470),
+ KQU( 174601465422373648), KQU(15946141191338238030),
+ KQU(17142225620992044937), KQU( 7552030283784477064),
+ KQU( 2947372384532947997), KQU( 510797021688197711),
+ KQU( 4962499439249363461), KQU( 23770320158385357),
+ KQU( 959774499105138124), KQU( 1468396011518788276),
+ KQU( 2015698006852312308), KQU( 4149400718489980136),
+ KQU( 5992916099522371188), KQU(10819182935265531076),
+ KQU(16189787999192351131), KQU( 342833961790261950),
+ KQU(12470830319550495336), KQU(18128495041912812501),
+ KQU( 1193600899723524337), KQU( 9056793666590079770),
+ KQU( 2154021227041669041), KQU( 4963570213951235735),
+ KQU( 4865075960209211409), KQU( 2097724599039942963),
+ KQU( 2024080278583179845), KQU(11527054549196576736),
+ KQU(10650256084182390252), KQU( 4808408648695766755),
+ KQU( 1642839215013788844), KQU(10607187948250398390),
+ KQU( 7076868166085913508), KQU( 730522571106887032),
+ KQU(12500579240208524895), KQU( 4484390097311355324),
+ KQU(15145801330700623870), KQU( 8055827661392944028),
+ KQU( 5865092976832712268), KQU(15159212508053625143),
+ KQU( 3560964582876483341), KQU( 4070052741344438280),
+ KQU( 6032585709886855634), KQU(15643262320904604873),
+ KQU( 2565119772293371111), KQU( 318314293065348260),
+ KQU(15047458749141511872), KQU( 7772788389811528730),
+ KQU( 7081187494343801976), KQU( 6465136009467253947),
+ KQU(10425940692543362069), KQU( 554608190318339115),
+ KQU(14796699860302125214), KQU( 1638153134431111443),
+ KQU(10336967447052276248), KQU( 8412308070396592958),
+ KQU( 4004557277152051226), KQU( 8143598997278774834),
+ KQU(16413323996508783221), KQU(13139418758033994949),
+ KQU( 9772709138335006667), KQU( 2818167159287157659),
+ KQU(17091740573832523669), KQU(14629199013130751608),
+ KQU(18268322711500338185), KQU( 8290963415675493063),
+ KQU( 8830864907452542588), KQU( 1614839084637494849),
+ KQU(14855358500870422231), KQU( 3472996748392519937),
+ KQU(15317151166268877716), KQU( 5825895018698400362),
+ KQU(16730208429367544129), KQU(10481156578141202800),
+ KQU( 4746166512382823750), KQU(12720876014472464998),
+ KQU( 8825177124486735972), KQU(13733447296837467838),
+ KQU( 6412293741681359625), KQU( 8313213138756135033),
+ KQU(11421481194803712517), KQU( 7997007691544174032),
+ KQU( 6812963847917605930), KQU( 9683091901227558641),
+ KQU(14703594165860324713), KQU( 1775476144519618309),
+ KQU( 2724283288516469519), KQU( 717642555185856868),
+ KQU( 8736402192215092346), KQU(11878800336431381021),
+ KQU( 4348816066017061293), KQU( 6115112756583631307),
+ KQU( 9176597239667142976), KQU(12615622714894259204),
+ KQU(10283406711301385987), KQU( 5111762509485379420),
+ KQU( 3118290051198688449), KQU( 7345123071632232145),
+ KQU( 9176423451688682359), KQU( 4843865456157868971),
+ KQU(12008036363752566088), KQU(12058837181919397720),
+ KQU( 2145073958457347366), KQU( 1526504881672818067),
+ KQU( 3488830105567134848), KQU(13208362960674805143),
+ KQU( 4077549672899572192), KQU( 7770995684693818365),
+ KQU( 1398532341546313593), KQU(12711859908703927840),
+ KQU( 1417561172594446813), KQU(17045191024194170604),
+ KQU( 4101933177604931713), KQU(14708428834203480320),
+ KQU(17447509264469407724), KQU(14314821973983434255),
+ KQU(17990472271061617265), KQU( 5087756685841673942),
+ KQU(12797820586893859939), KQU( 1778128952671092879),
+ KQU( 3535918530508665898), KQU( 9035729701042481301),
+ KQU(14808661568277079962), KQU(14587345077537747914),
+ KQU(11920080002323122708), KQU( 6426515805197278753),
+ KQU( 3295612216725984831), KQU(11040722532100876120),
+ KQU(12305952936387598754), KQU(16097391899742004253),
+ KQU( 4908537335606182208), KQU(12446674552196795504),
+ KQU(16010497855816895177), KQU( 9194378874788615551),
+ KQU( 3382957529567613384), KQU( 5154647600754974077),
+ KQU( 9801822865328396141), KQU( 9023662173919288143),
+ KQU(17623115353825147868), KQU( 8238115767443015816),
+ KQU(15811444159859002560), KQU( 9085612528904059661),
+ KQU( 6888601089398614254), KQU( 258252992894160189),
+ KQU( 6704363880792428622), KQU( 6114966032147235763),
+ KQU(11075393882690261875), KQU( 8797664238933620407),
+ KQU( 5901892006476726920), KQU( 5309780159285518958),
+ KQU(14940808387240817367), KQU(14642032021449656698),
+ KQU( 9808256672068504139), KQU( 3670135111380607658),
+ KQU(11211211097845960152), KQU( 1474304506716695808),
+ KQU(15843166204506876239), KQU( 7661051252471780561),
+ KQU(10170905502249418476), KQU( 7801416045582028589),
+ KQU( 2763981484737053050), KQU( 9491377905499253054),
+ KQU(16201395896336915095), KQU( 9256513756442782198),
+ KQU( 5411283157972456034), KQU( 5059433122288321676),
+ KQU( 4327408006721123357), KQU( 9278544078834433377),
+ KQU( 7601527110882281612), KQU(11848295896975505251),
+ KQU(12096998801094735560), KQU(14773480339823506413),
+ KQU(15586227433895802149), KQU(12786541257830242872),
+ KQU( 6904692985140503067), KQU( 5309011515263103959),
+ KQU(12105257191179371066), KQU(14654380212442225037),
+ KQU( 2556774974190695009), KQU( 4461297399927600261),
+ KQU(14888225660915118646), KQU(14915459341148291824),
+ KQU( 2738802166252327631), KQU( 6047155789239131512),
+ KQU(12920545353217010338), KQU(10697617257007840205),
+ KQU( 2751585253158203504), KQU(13252729159780047496),
+ KQU(14700326134672815469), KQU(14082527904374600529),
+ KQU(16852962273496542070), KQU(17446675504235853907),
+ KQU(15019600398527572311), KQU(12312781346344081551),
+ KQU(14524667935039810450), KQU( 5634005663377195738),
+ KQU(11375574739525000569), KQU( 2423665396433260040),
+ KQU( 5222836914796015410), KQU( 4397666386492647387),
+ KQU( 4619294441691707638), KQU( 665088602354770716),
+ KQU(13246495665281593610), KQU( 6564144270549729409),
+ KQU(10223216188145661688), KQU( 3961556907299230585),
+ KQU(11543262515492439914), KQU(16118031437285993790),
+ KQU( 7143417964520166465), KQU(13295053515909486772),
+ KQU( 40434666004899675), KQU(17127804194038347164),
+ KQU( 8599165966560586269), KQU( 8214016749011284903),
+ KQU(13725130352140465239), KQU( 5467254474431726291),
+ KQU( 7748584297438219877), KQU(16933551114829772472),
+ KQU( 2169618439506799400), KQU( 2169787627665113463),
+ KQU(17314493571267943764), KQU(18053575102911354912),
+ KQU(11928303275378476973), KQU(11593850925061715550),
+ KQU(17782269923473589362), KQU( 3280235307704747039),
+ KQU( 6145343578598685149), KQU(17080117031114086090),
+ KQU(18066839902983594755), KQU( 6517508430331020706),
+ KQU( 8092908893950411541), KQU(12558378233386153732),
+ KQU( 4476532167973132976), KQU(16081642430367025016),
+ KQU( 4233154094369139361), KQU( 8693630486693161027),
+ KQU(11244959343027742285), KQU(12273503967768513508),
+ KQU(14108978636385284876), KQU( 7242414665378826984),
+ KQU( 6561316938846562432), KQU( 8601038474994665795),
+ KQU(17532942353612365904), KQU(17940076637020912186),
+ KQU( 7340260368823171304), KQU( 7061807613916067905),
+ KQU(10561734935039519326), KQU(17990796503724650862),
+ KQU( 6208732943911827159), KQU( 359077562804090617),
+ KQU(14177751537784403113), KQU(10659599444915362902),
+ KQU(15081727220615085833), KQU(13417573895659757486),
+ KQU(15513842342017811524), KQU(11814141516204288231),
+ KQU( 1827312513875101814), KQU( 2804611699894603103),
+ KQU(17116500469975602763), KQU(12270191815211952087),
+ KQU(12256358467786024988), KQU(18435021722453971267),
+ KQU( 671330264390865618), KQU( 476504300460286050),
+ KQU(16465470901027093441), KQU( 4047724406247136402),
+ KQU( 1322305451411883346), KQU( 1388308688834322280),
+ KQU( 7303989085269758176), KQU( 9323792664765233642),
+ KQU( 4542762575316368936), KQU(17342696132794337618),
+ KQU( 4588025054768498379), KQU(13415475057390330804),
+ KQU(17880279491733405570), KQU(10610553400618620353),
+ KQU( 3180842072658960139), KQU(13002966655454270120),
+ KQU( 1665301181064982826), KQU( 7083673946791258979),
+ KQU( 190522247122496820), KQU(17388280237250677740),
+ KQU( 8430770379923642945), KQU(12987180971921668584),
+ KQU( 2311086108365390642), KQU( 2870984383579822345),
+ KQU(14014682609164653318), KQU(14467187293062251484),
+ KQU( 192186361147413298), KQU(15171951713531796524),
+ KQU( 9900305495015948728), KQU(17958004775615466344),
+ KQU(14346380954498606514), KQU(18040047357617407096),
+ KQU( 5035237584833424532), KQU(15089555460613972287),
+ KQU( 4131411873749729831), KQU( 1329013581168250330),
+ KQU(10095353333051193949), KQU(10749518561022462716),
+ KQU( 9050611429810755847), KQU(15022028840236655649),
+ KQU( 8775554279239748298), KQU(13105754025489230502),
+ KQU(15471300118574167585), KQU( 89864764002355628),
+ KQU( 8776416323420466637), KQU( 5280258630612040891),
+ KQU( 2719174488591862912), KQU( 7599309137399661994),
+ KQU(15012887256778039979), KQU(14062981725630928925),
+ KQU(12038536286991689603), KQU( 7089756544681775245),
+ KQU(10376661532744718039), KQU( 1265198725901533130),
+ KQU(13807996727081142408), KQU( 2935019626765036403),
+ KQU( 7651672460680700141), KQU( 3644093016200370795),
+ KQU( 2840982578090080674), KQU(17956262740157449201),
+ KQU(18267979450492880548), KQU(11799503659796848070),
+ KQU( 9942537025669672388), KQU(11886606816406990297),
+ KQU( 5488594946437447576), KQU( 7226714353282744302),
+ KQU( 3784851653123877043), KQU( 878018453244803041),
+ KQU(12110022586268616085), KQU( 734072179404675123),
+ KQU(11869573627998248542), KQU( 469150421297783998),
+ KQU( 260151124912803804), KQU(11639179410120968649),
+ KQU( 9318165193840846253), KQU(12795671722734758075),
+ KQU(15318410297267253933), KQU( 691524703570062620),
+ KQU( 5837129010576994601), KQU(15045963859726941052),
+ KQU( 5850056944932238169), KQU(12017434144750943807),
+ KQU( 7447139064928956574), KQU( 3101711812658245019),
+ KQU(16052940704474982954), KQU(18195745945986994042),
+ KQU( 8932252132785575659), KQU(13390817488106794834),
+ KQU(11582771836502517453), KQU( 4964411326683611686),
+ KQU( 2195093981702694011), KQU(14145229538389675669),
+ KQU(16459605532062271798), KQU( 866316924816482864),
+ KQU( 4593041209937286377), KQU( 8415491391910972138),
+ KQU( 4171236715600528969), KQU(16637569303336782889),
+ KQU( 2002011073439212680), KQU(17695124661097601411),
+ KQU( 4627687053598611702), KQU( 7895831936020190403),
+ KQU( 8455951300917267802), KQU( 2923861649108534854),
+ KQU( 8344557563927786255), KQU( 6408671940373352556),
+ KQU(12210227354536675772), KQU(14294804157294222295),
+ KQU(10103022425071085127), KQU(10092959489504123771),
+ KQU( 6554774405376736268), KQU(12629917718410641774),
+ KQU( 6260933257596067126), KQU( 2460827021439369673),
+ KQU( 2541962996717103668), KQU( 597377203127351475),
+ KQU( 5316984203117315309), KQU( 4811211393563241961),
+ KQU(13119698597255811641), KQU( 8048691512862388981),
+ KQU(10216818971194073842), KQU( 4612229970165291764),
+ KQU(10000980798419974770), KQU( 6877640812402540687),
+ KQU( 1488727563290436992), KQU( 2227774069895697318),
+ KQU(11237754507523316593), KQU(13478948605382290972),
+ KQU( 1963583846976858124), KQU( 5512309205269276457),
+ KQU( 3972770164717652347), KQU( 3841751276198975037),
+ KQU(10283343042181903117), KQU( 8564001259792872199),
+ KQU(16472187244722489221), KQU( 8953493499268945921),
+ KQU( 3518747340357279580), KQU( 4003157546223963073),
+ KQU( 3270305958289814590), KQU( 3966704458129482496),
+ KQU( 8122141865926661939), KQU(14627734748099506653),
+ KQU(13064426990862560568), KQU( 2414079187889870829),
+ KQU( 5378461209354225306), KQU(10841985740128255566),
+ KQU( 538582442885401738), KQU( 7535089183482905946),
+ KQU(16117559957598879095), KQU( 8477890721414539741),
+ KQU( 1459127491209533386), KQU(17035126360733620462),
+ KQU( 8517668552872379126), KQU(10292151468337355014),
+ KQU(17081267732745344157), KQU(13751455337946087178),
+ KQU(14026945459523832966), KQU( 6653278775061723516),
+ KQU(10619085543856390441), KQU( 2196343631481122885),
+ KQU(10045966074702826136), KQU(10082317330452718282),
+ KQU( 5920859259504831242), KQU( 9951879073426540617),
+ KQU( 7074696649151414158), KQU(15808193543879464318),
+ KQU( 7385247772746953374), KQU( 3192003544283864292),
+ KQU(18153684490917593847), KQU(12423498260668568905),
+ KQU(10957758099756378169), KQU(11488762179911016040),
+ KQU( 2099931186465333782), KQU(11180979581250294432),
+ KQU( 8098916250668367933), KQU( 3529200436790763465),
+ KQU(12988418908674681745), KQU( 6147567275954808580),
+ KQU( 3207503344604030989), KQU(10761592604898615360),
+ KQU( 229854861031893504), KQU( 8809853962667144291),
+ KQU(13957364469005693860), KQU( 7634287665224495886),
+ KQU(12353487366976556874), KQU( 1134423796317152034),
+ KQU( 2088992471334107068), KQU( 7393372127190799698),
+ KQU( 1845367839871058391), KQU( 207922563987322884),
+ KQU(11960870813159944976), KQU(12182120053317317363),
+ KQU(17307358132571709283), KQU(13871081155552824936),
+ KQU(18304446751741566262), KQU( 7178705220184302849),
+ KQU(10929605677758824425), KQU(16446976977835806844),
+ KQU(13723874412159769044), KQU( 6942854352100915216),
+ KQU( 1726308474365729390), KQU( 2150078766445323155),
+ KQU(15345558947919656626), KQU(12145453828874527201),
+ KQU( 2054448620739726849), KQU( 2740102003352628137),
+ KQU(11294462163577610655), KQU( 756164283387413743),
+ KQU(17841144758438810880), KQU(10802406021185415861),
+ KQU( 8716455530476737846), KQU( 6321788834517649606),
+ KQU(14681322910577468426), KQU(17330043563884336387),
+ KQU(12701802180050071614), KQU(14695105111079727151),
+ KQU( 5112098511654172830), KQU( 4957505496794139973),
+ KQU( 8270979451952045982), KQU(12307685939199120969),
+ KQU(12425799408953443032), KQU( 8376410143634796588),
+ KQU(16621778679680060464), KQU( 3580497854566660073),
+ KQU( 1122515747803382416), KQU( 857664980960597599),
+ KQU( 6343640119895925918), KQU(12878473260854462891),
+ KQU(10036813920765722626), KQU(14451335468363173812),
+ KQU( 5476809692401102807), KQU(16442255173514366342),
+ KQU(13060203194757167104), KQU(14354124071243177715),
+ KQU(15961249405696125227), KQU(13703893649690872584),
+ KQU( 363907326340340064), KQU( 6247455540491754842),
+ KQU(12242249332757832361), KQU( 156065475679796717),
+ KQU( 9351116235749732355), KQU( 4590350628677701405),
+ KQU( 1671195940982350389), KQU(13501398458898451905),
+ KQU( 6526341991225002255), KQU( 1689782913778157592),
+ KQU( 7439222350869010334), KQU(13975150263226478308),
+ KQU(11411961169932682710), KQU(17204271834833847277),
+ KQU( 541534742544435367), KQU( 6591191931218949684),
+ KQU( 2645454775478232486), KQU( 4322857481256485321),
+ KQU( 8477416487553065110), KQU(12902505428548435048),
+ KQU( 971445777981341415), KQU(14995104682744976712),
+ KQU( 4243341648807158063), KQU( 8695061252721927661),
+ KQU( 5028202003270177222), KQU( 2289257340915567840),
+ KQU(13870416345121866007), KQU(13994481698072092233),
+ KQU( 6912785400753196481), KQU( 2278309315841980139),
+ KQU( 4329765449648304839), KQU( 5963108095785485298),
+ KQU( 4880024847478722478), KQU(16015608779890240947),
+ KQU( 1866679034261393544), KQU( 914821179919731519),
+ KQU( 9643404035648760131), KQU( 2418114953615593915),
+ KQU( 944756836073702374), KQU(15186388048737296834),
+ KQU( 7723355336128442206), KQU( 7500747479679599691),
+ KQU(18013961306453293634), KQU( 2315274808095756456),
+ KQU(13655308255424029566), KQU(17203800273561677098),
+ KQU( 1382158694422087756), KQU( 5090390250309588976),
+ KQU( 517170818384213989), KQU( 1612709252627729621),
+ KQU( 1330118955572449606), KQU( 300922478056709885),
+ KQU(18115693291289091987), KQU(13491407109725238321),
+ KQU(15293714633593827320), KQU( 5151539373053314504),
+ KQU( 5951523243743139207), KQU(14459112015249527975),
+ KQU( 5456113959000700739), KQU( 3877918438464873016),
+ KQU(12534071654260163555), KQU(15871678376893555041),
+ KQU(11005484805712025549), KQU(16353066973143374252),
+ KQU( 4358331472063256685), KQU( 8268349332210859288),
+ KQU(12485161590939658075), KQU(13955993592854471343),
+ KQU( 5911446886848367039), KQU(14925834086813706974),
+ KQU( 6590362597857994805), KQU( 1280544923533661875),
+ KQU( 1637756018947988164), KQU( 4734090064512686329),
+ KQU(16693705263131485912), KQU( 6834882340494360958),
+ KQU( 8120732176159658505), KQU( 2244371958905329346),
+ KQU(10447499707729734021), KQU( 7318742361446942194),
+ KQU( 8032857516355555296), KQU(14023605983059313116),
+ KQU( 1032336061815461376), KQU( 9840995337876562612),
+ KQU( 9869256223029203587), KQU(12227975697177267636),
+ KQU(12728115115844186033), KQU( 7752058479783205470),
+ KQU( 729733219713393087), KQU(12954017801239007622)
};
static const uint64_t init_by_array_64_expected[] = {
- QU( 2100341266307895239LLU), QU( 8344256300489757943LLU),
- QU(15687933285484243894LLU), QU( 8268620370277076319LLU),
- QU(12371852309826545459LLU), QU( 8800491541730110238LLU),
- QU(18113268950100835773LLU), QU( 2886823658884438119LLU),
- QU( 3293667307248180724LLU), QU( 9307928143300172731LLU),
- QU( 7688082017574293629LLU), QU( 900986224735166665LLU),
- QU( 9977972710722265039LLU), QU( 6008205004994830552LLU),
- QU( 546909104521689292LLU), QU( 7428471521869107594LLU),
- QU(14777563419314721179LLU), QU(16116143076567350053LLU),
- QU( 5322685342003142329LLU), QU( 4200427048445863473LLU),
- QU( 4693092150132559146LLU), QU(13671425863759338582LLU),
- QU( 6747117460737639916LLU), QU( 4732666080236551150LLU),
- QU( 5912839950611941263LLU), QU( 3903717554504704909LLU),
- QU( 2615667650256786818LLU), QU(10844129913887006352LLU),
- QU(13786467861810997820LLU), QU(14267853002994021570LLU),
- QU(13767807302847237439LLU), QU(16407963253707224617LLU),
- QU( 4802498363698583497LLU), QU( 2523802839317209764LLU),
- QU( 3822579397797475589LLU), QU( 8950320572212130610LLU),
- QU( 3745623504978342534LLU), QU(16092609066068482806LLU),
- QU( 9817016950274642398LLU), QU(10591660660323829098LLU),
- QU(11751606650792815920LLU), QU( 5122873818577122211LLU),
- QU(17209553764913936624LLU), QU( 6249057709284380343LLU),
- QU(15088791264695071830LLU), QU(15344673071709851930LLU),
- QU( 4345751415293646084LLU), QU( 2542865750703067928LLU),
- QU(13520525127852368784LLU), QU(18294188662880997241LLU),
- QU( 3871781938044881523LLU), QU( 2873487268122812184LLU),
- QU(15099676759482679005LLU), QU(15442599127239350490LLU),
- QU( 6311893274367710888LLU), QU( 3286118760484672933LLU),
- QU( 4146067961333542189LLU), QU(13303942567897208770LLU),
- QU( 8196013722255630418LLU), QU( 4437815439340979989LLU),
- QU(15433791533450605135LLU), QU( 4254828956815687049LLU),
- QU( 1310903207708286015LLU), QU(10529182764462398549LLU),
- QU(14900231311660638810LLU), QU( 9727017277104609793LLU),
- QU( 1821308310948199033LLU), QU(11628861435066772084LLU),
- QU( 9469019138491546924LLU), QU( 3145812670532604988LLU),
- QU( 9938468915045491919LLU), QU( 1562447430672662142LLU),
- QU(13963995266697989134LLU), QU( 3356884357625028695LLU),
- QU( 4499850304584309747LLU), QU( 8456825817023658122LLU),
- QU(10859039922814285279LLU), QU( 8099512337972526555LLU),
- QU( 348006375109672149LLU), QU(11919893998241688603LLU),
- QU( 1104199577402948826LLU), QU(16689191854356060289LLU),
- QU(10992552041730168078LLU), QU( 7243733172705465836LLU),
- QU( 5668075606180319560LLU), QU(18182847037333286970LLU),
- QU( 4290215357664631322LLU), QU( 4061414220791828613LLU),
- QU(13006291061652989604LLU), QU( 7140491178917128798LLU),
- QU(12703446217663283481LLU), QU( 5500220597564558267LLU),
- QU(10330551509971296358LLU), QU(15958554768648714492LLU),
- QU( 5174555954515360045LLU), QU( 1731318837687577735LLU),
- QU( 3557700801048354857LLU), QU(13764012341928616198LLU),
- QU(13115166194379119043LLU), QU( 7989321021560255519LLU),
- QU( 2103584280905877040LLU), QU( 9230788662155228488LLU),
- QU(16396629323325547654LLU), QU( 657926409811318051LLU),
- QU(15046700264391400727LLU), QU( 5120132858771880830LLU),
- QU( 7934160097989028561LLU), QU( 6963121488531976245LLU),
- QU(17412329602621742089LLU), QU(15144843053931774092LLU),
- QU(17204176651763054532LLU), QU(13166595387554065870LLU),
- QU( 8590377810513960213LLU), QU( 5834365135373991938LLU),
- QU( 7640913007182226243LLU), QU( 3479394703859418425LLU),
- QU(16402784452644521040LLU), QU( 4993979809687083980LLU),
- QU(13254522168097688865LLU), QU(15643659095244365219LLU),
- QU( 5881437660538424982LLU), QU(11174892200618987379LLU),
- QU( 254409966159711077LLU), QU(17158413043140549909LLU),
- QU( 3638048789290376272LLU), QU( 1376816930299489190LLU),
- QU( 4622462095217761923LLU), QU(15086407973010263515LLU),
- QU(13253971772784692238LLU), QU( 5270549043541649236LLU),
- QU(11182714186805411604LLU), QU(12283846437495577140LLU),
- QU( 5297647149908953219LLU), QU(10047451738316836654LLU),
- QU( 4938228100367874746LLU), QU(12328523025304077923LLU),
- QU( 3601049438595312361LLU), QU( 9313624118352733770LLU),
- QU(13322966086117661798LLU), QU(16660005705644029394LLU),
- QU(11337677526988872373LLU), QU(13869299102574417795LLU),
- QU(15642043183045645437LLU), QU( 3021755569085880019LLU),
- QU( 4979741767761188161LLU), QU(13679979092079279587LLU),
- QU( 3344685842861071743LLU), QU(13947960059899588104LLU),
- QU( 305806934293368007LLU), QU( 5749173929201650029LLU),
- QU(11123724852118844098LLU), QU(15128987688788879802LLU),
- QU(15251651211024665009LLU), QU( 7689925933816577776LLU),
- QU(16732804392695859449LLU), QU(17087345401014078468LLU),
- QU(14315108589159048871LLU), QU( 4820700266619778917LLU),
- QU(16709637539357958441LLU), QU( 4936227875177351374LLU),
- QU( 2137907697912987247LLU), QU(11628565601408395420LLU),
- QU( 2333250549241556786LLU), QU( 5711200379577778637LLU),
- QU( 5170680131529031729LLU), QU(12620392043061335164LLU),
- QU( 95363390101096078LLU), QU( 5487981914081709462LLU),
- QU( 1763109823981838620LLU), QU( 3395861271473224396LLU),
- QU( 1300496844282213595LLU), QU( 6894316212820232902LLU),
- QU(10673859651135576674LLU), QU( 5911839658857903252LLU),
- QU(17407110743387299102LLU), QU( 8257427154623140385LLU),
- QU(11389003026741800267LLU), QU( 4070043211095013717LLU),
- QU(11663806997145259025LLU), QU(15265598950648798210LLU),
- QU( 630585789434030934LLU), QU( 3524446529213587334LLU),
- QU( 7186424168495184211LLU), QU(10806585451386379021LLU),
- QU(11120017753500499273LLU), QU( 1586837651387701301LLU),
- QU(17530454400954415544LLU), QU( 9991670045077880430LLU),
- QU( 7550997268990730180LLU), QU( 8640249196597379304LLU),
- QU( 3522203892786893823LLU), QU(10401116549878854788LLU),
- QU(13690285544733124852LLU), QU( 8295785675455774586LLU),
- QU(15535716172155117603LLU), QU( 3112108583723722511LLU),
- QU(17633179955339271113LLU), QU(18154208056063759375LLU),
- QU( 1866409236285815666LLU), QU(13326075895396412882LLU),
- QU( 8756261842948020025LLU), QU( 6281852999868439131LLU),
- QU(15087653361275292858LLU), QU(10333923911152949397LLU),
- QU( 5265567645757408500LLU), QU(12728041843210352184LLU),
- QU( 6347959327507828759LLU), QU( 154112802625564758LLU),
- QU(18235228308679780218LLU), QU( 3253805274673352418LLU),
- QU( 4849171610689031197LLU), QU(17948529398340432518LLU),
- QU(13803510475637409167LLU), QU(13506570190409883095LLU),
- QU(15870801273282960805LLU), QU( 8451286481299170773LLU),
- QU( 9562190620034457541LLU), QU( 8518905387449138364LLU),
- QU(12681306401363385655LLU), QU( 3788073690559762558LLU),
- QU( 5256820289573487769LLU), QU( 2752021372314875467LLU),
- QU( 6354035166862520716LLU), QU( 4328956378309739069LLU),
- QU( 449087441228269600LLU), QU( 5533508742653090868LLU),
- QU( 1260389420404746988LLU), QU(18175394473289055097LLU),
- QU( 1535467109660399420LLU), QU( 8818894282874061442LLU),
- QU(12140873243824811213LLU), QU(15031386653823014946LLU),
- QU( 1286028221456149232LLU), QU( 6329608889367858784LLU),
- QU( 9419654354945132725LLU), QU( 6094576547061672379LLU),
- QU(17706217251847450255LLU), QU( 1733495073065878126LLU),
- QU(16918923754607552663LLU), QU( 8881949849954945044LLU),
- QU(12938977706896313891LLU), QU(14043628638299793407LLU),
- QU(18393874581723718233LLU), QU( 6886318534846892044LLU),
- QU(14577870878038334081LLU), QU(13541558383439414119LLU),
- QU(13570472158807588273LLU), QU(18300760537910283361LLU),
- QU( 818368572800609205LLU), QU( 1417000585112573219LLU),
- QU(12337533143867683655LLU), QU(12433180994702314480LLU),
- QU( 778190005829189083LLU), QU(13667356216206524711LLU),
- QU( 9866149895295225230LLU), QU(11043240490417111999LLU),
- QU( 1123933826541378598LLU), QU( 6469631933605123610LLU),
- QU(14508554074431980040LLU), QU(13918931242962026714LLU),
- QU( 2870785929342348285LLU), QU(14786362626740736974LLU),
- QU(13176680060902695786LLU), QU( 9591778613541679456LLU),
- QU( 9097662885117436706LLU), QU( 749262234240924947LLU),
- QU( 1944844067793307093LLU), QU( 4339214904577487742LLU),
- QU( 8009584152961946551LLU), QU(16073159501225501777LLU),
- QU( 3335870590499306217LLU), QU(17088312653151202847LLU),
- QU( 3108893142681931848LLU), QU(16636841767202792021LLU),
- QU(10423316431118400637LLU), QU( 8008357368674443506LLU),
- QU(11340015231914677875LLU), QU(17687896501594936090LLU),
- QU(15173627921763199958LLU), QU( 542569482243721959LLU),
- QU(15071714982769812975LLU), QU( 4466624872151386956LLU),
- QU( 1901780715602332461LLU), QU( 9822227742154351098LLU),
- QU( 1479332892928648780LLU), QU( 6981611948382474400LLU),
- QU( 7620824924456077376LLU), QU(14095973329429406782LLU),
- QU( 7902744005696185404LLU), QU(15830577219375036920LLU),
- QU(10287076667317764416LLU), QU(12334872764071724025LLU),
- QU( 4419302088133544331LLU), QU(14455842851266090520LLU),
- QU(12488077416504654222LLU), QU( 7953892017701886766LLU),
- QU( 6331484925529519007LLU), QU( 4902145853785030022LLU),
- QU(17010159216096443073LLU), QU(11945354668653886087LLU),
- QU(15112022728645230829LLU), QU(17363484484522986742LLU),
- QU( 4423497825896692887LLU), QU( 8155489510809067471LLU),
- QU( 258966605622576285LLU), QU( 5462958075742020534LLU),
- QU( 6763710214913276228LLU), QU( 2368935183451109054LLU),
- QU(14209506165246453811LLU), QU( 2646257040978514881LLU),
- QU( 3776001911922207672LLU), QU( 1419304601390147631LLU),
- QU(14987366598022458284LLU), QU( 3977770701065815721LLU),
- QU( 730820417451838898LLU), QU( 3982991703612885327LLU),
- QU( 2803544519671388477LLU), QU(17067667221114424649LLU),
- QU( 2922555119737867166LLU), QU( 1989477584121460932LLU),
- QU(15020387605892337354LLU), QU( 9293277796427533547LLU),
- QU(10722181424063557247LLU), QU(16704542332047511651LLU),
- QU( 5008286236142089514LLU), QU(16174732308747382540LLU),
- QU(17597019485798338402LLU), QU(13081745199110622093LLU),
- QU( 8850305883842258115LLU), QU(12723629125624589005LLU),
- QU( 8140566453402805978LLU), QU(15356684607680935061LLU),
- QU(14222190387342648650LLU), QU(11134610460665975178LLU),
- QU( 1259799058620984266LLU), QU(13281656268025610041LLU),
- QU( 298262561068153992LLU), QU(12277871700239212922LLU),
- QU(13911297774719779438LLU), QU(16556727962761474934LLU),
- QU(17903010316654728010LLU), QU( 9682617699648434744LLU),
- QU(14757681836838592850LLU), QU( 1327242446558524473LLU),
- QU(11126645098780572792LLU), QU( 1883602329313221774LLU),
- QU( 2543897783922776873LLU), QU(15029168513767772842LLU),
- QU(12710270651039129878LLU), QU(16118202956069604504LLU),
- QU(15010759372168680524LLU), QU( 2296827082251923948LLU),
- QU(10793729742623518101LLU), QU(13829764151845413046LLU),
- QU(17769301223184451213LLU), QU( 3118268169210783372LLU),
- QU(17626204544105123127LLU), QU( 7416718488974352644LLU),
- QU(10450751996212925994LLU), QU( 9352529519128770586LLU),
- QU( 259347569641110140LLU), QU( 8048588892269692697LLU),
- QU( 1774414152306494058LLU), QU(10669548347214355622LLU),
- QU(13061992253816795081LLU), QU(18432677803063861659LLU),
- QU( 8879191055593984333LLU), QU(12433753195199268041LLU),
- QU(14919392415439730602LLU), QU( 6612848378595332963LLU),
- QU( 6320986812036143628LLU), QU(10465592420226092859LLU),
- QU( 4196009278962570808LLU), QU( 3747816564473572224LLU),
- QU(17941203486133732898LLU), QU( 2350310037040505198LLU),
- QU( 5811779859134370113LLU), QU(10492109599506195126LLU),
- QU( 7699650690179541274LLU), QU( 1954338494306022961LLU),
- QU(14095816969027231152LLU), QU( 5841346919964852061LLU),
- QU(14945969510148214735LLU), QU( 3680200305887550992LLU),
- QU( 6218047466131695792LLU), QU( 8242165745175775096LLU),
- QU(11021371934053307357LLU), QU( 1265099502753169797LLU),
- QU( 4644347436111321718LLU), QU( 3609296916782832859LLU),
- QU( 8109807992218521571LLU), QU(18387884215648662020LLU),
- QU(14656324896296392902LLU), QU(17386819091238216751LLU),
- QU(17788300878582317152LLU), QU( 7919446259742399591LLU),
- QU( 4466613134576358004LLU), QU(12928181023667938509LLU),
- QU(13147446154454932030LLU), QU(16552129038252734620LLU),
- QU( 8395299403738822450LLU), QU(11313817655275361164LLU),
- QU( 434258809499511718LLU), QU( 2074882104954788676LLU),
- QU( 7929892178759395518LLU), QU( 9006461629105745388LLU),
- QU( 5176475650000323086LLU), QU(11128357033468341069LLU),
- QU(12026158851559118955LLU), QU(14699716249471156500LLU),
- QU( 448982497120206757LLU), QU( 4156475356685519900LLU),
- QU( 6063816103417215727LLU), QU(10073289387954971479LLU),
- QU( 8174466846138590962LLU), QU( 2675777452363449006LLU),
- QU( 9090685420572474281LLU), QU( 6659652652765562060LLU),
- QU(12923120304018106621LLU), QU(11117480560334526775LLU),
- QU( 937910473424587511LLU), QU( 1838692113502346645LLU),
- QU(11133914074648726180LLU), QU( 7922600945143884053LLU),
- QU(13435287702700959550LLU), QU( 5287964921251123332LLU),
- QU(11354875374575318947LLU), QU(17955724760748238133LLU),
- QU(13728617396297106512LLU), QU( 4107449660118101255LLU),
- QU( 1210269794886589623LLU), QU(11408687205733456282LLU),
- QU( 4538354710392677887LLU), QU(13566803319341319267LLU),
- QU(17870798107734050771LLU), QU( 3354318982568089135LLU),
- QU( 9034450839405133651LLU), QU(13087431795753424314LLU),
- QU( 950333102820688239LLU), QU( 1968360654535604116LLU),
- QU(16840551645563314995LLU), QU( 8867501803892924995LLU),
- QU(11395388644490626845LLU), QU( 1529815836300732204LLU),
- QU(13330848522996608842LLU), QU( 1813432878817504265LLU),
- QU( 2336867432693429560LLU), QU(15192805445973385902LLU),
- QU( 2528593071076407877LLU), QU( 128459777936689248LLU),
- QU( 9976345382867214866LLU), QU( 6208885766767996043LLU),
- QU(14982349522273141706LLU), QU( 3099654362410737822LLU),
- QU(13776700761947297661LLU), QU( 8806185470684925550LLU),
- QU( 8151717890410585321LLU), QU( 640860591588072925LLU),
- QU(14592096303937307465LLU), QU( 9056472419613564846LLU),
- QU(14861544647742266352LLU), QU(12703771500398470216LLU),
- QU( 3142372800384138465LLU), QU( 6201105606917248196LLU),
- QU(18337516409359270184LLU), QU(15042268695665115339LLU),
- QU(15188246541383283846LLU), QU(12800028693090114519LLU),
- QU( 5992859621101493472LLU), QU(18278043971816803521LLU),
- QU( 9002773075219424560LLU), QU( 7325707116943598353LLU),
- QU( 7930571931248040822LLU), QU( 5645275869617023448LLU),
- QU( 7266107455295958487LLU), QU( 4363664528273524411LLU),
- QU(14313875763787479809LLU), QU(17059695613553486802LLU),
- QU( 9247761425889940932LLU), QU(13704726459237593128LLU),
- QU( 2701312427328909832LLU), QU(17235532008287243115LLU),
- QU(14093147761491729538LLU), QU( 6247352273768386516LLU),
- QU( 8268710048153268415LLU), QU( 7985295214477182083LLU),
- QU(15624495190888896807LLU), QU( 3772753430045262788LLU),
- QU( 9133991620474991698LLU), QU( 5665791943316256028LLU),
- QU( 7551996832462193473LLU), QU(13163729206798953877LLU),
- QU( 9263532074153846374LLU), QU( 1015460703698618353LLU),
- QU(17929874696989519390LLU), QU(18257884721466153847LLU),
- QU(16271867543011222991LLU), QU( 3905971519021791941LLU),
- QU(16814488397137052085LLU), QU( 1321197685504621613LLU),
- QU( 2870359191894002181LLU), QU(14317282970323395450LLU),
- QU(13663920845511074366LLU), QU( 2052463995796539594LLU),
- QU(14126345686431444337LLU), QU( 1727572121947022534LLU),
- QU(17793552254485594241LLU), QU( 6738857418849205750LLU),
- QU( 1282987123157442952LLU), QU(16655480021581159251LLU),
- QU( 6784587032080183866LLU), QU(14726758805359965162LLU),
- QU( 7577995933961987349LLU), QU(12539609320311114036LLU),
- QU(10789773033385439494LLU), QU( 8517001497411158227LLU),
- QU(10075543932136339710LLU), QU(14838152340938811081LLU),
- QU( 9560840631794044194LLU), QU(17445736541454117475LLU),
- QU(10633026464336393186LLU), QU(15705729708242246293LLU),
- QU( 1117517596891411098LLU), QU( 4305657943415886942LLU),
- QU( 4948856840533979263LLU), QU(16071681989041789593LLU),
- QU(13723031429272486527LLU), QU( 7639567622306509462LLU),
- QU(12670424537483090390LLU), QU( 9715223453097197134LLU),
- QU( 5457173389992686394LLU), QU( 289857129276135145LLU),
- QU(17048610270521972512LLU), QU( 692768013309835485LLU),
- QU(14823232360546632057LLU), QU(18218002361317895936LLU),
- QU( 3281724260212650204LLU), QU(16453957266549513795LLU),
- QU( 8592711109774511881LLU), QU( 929825123473369579LLU),
- QU(15966784769764367791LLU), QU( 9627344291450607588LLU),
- QU(10849555504977813287LLU), QU( 9234566913936339275LLU),
- QU( 6413807690366911210LLU), QU(10862389016184219267LLU),
- QU(13842504799335374048LLU), QU( 1531994113376881174LLU),
- QU( 2081314867544364459LLU), QU(16430628791616959932LLU),
- QU( 8314714038654394368LLU), QU( 9155473892098431813LLU),
- QU(12577843786670475704LLU), QU( 4399161106452401017LLU),
- QU( 1668083091682623186LLU), QU( 1741383777203714216LLU),
- QU( 2162597285417794374LLU), QU(15841980159165218736LLU),
- QU( 1971354603551467079LLU), QU( 1206714764913205968LLU),
- QU( 4790860439591272330LLU), QU(14699375615594055799LLU),
- QU( 8374423871657449988LLU), QU(10950685736472937738LLU),
- QU( 697344331343267176LLU), QU(10084998763118059810LLU),
- QU(12897369539795983124LLU), QU(12351260292144383605LLU),
- QU( 1268810970176811234LLU), QU( 7406287800414582768LLU),
- QU( 516169557043807831LLU), QU( 5077568278710520380LLU),
- QU( 3828791738309039304LLU), QU( 7721974069946943610LLU),
- QU( 3534670260981096460LLU), QU( 4865792189600584891LLU),
- QU(16892578493734337298LLU), QU( 9161499464278042590LLU),
- QU(11976149624067055931LLU), QU(13219479887277343990LLU),
- QU(14161556738111500680LLU), QU(14670715255011223056LLU),
- QU( 4671205678403576558LLU), QU(12633022931454259781LLU),
- QU(14821376219869187646LLU), QU( 751181776484317028LLU),
- QU( 2192211308839047070LLU), QU(11787306362361245189LLU),
- QU(10672375120744095707LLU), QU( 4601972328345244467LLU),
- QU(15457217788831125879LLU), QU( 8464345256775460809LLU),
- QU(10191938789487159478LLU), QU( 6184348739615197613LLU),
- QU(11425436778806882100LLU), QU( 2739227089124319793LLU),
- QU( 461464518456000551LLU), QU( 4689850170029177442LLU),
- QU( 6120307814374078625LLU), QU(11153579230681708671LLU),
- QU( 7891721473905347926LLU), QU(10281646937824872400LLU),
- QU( 3026099648191332248LLU), QU( 8666750296953273818LLU),
- QU(14978499698844363232LLU), QU(13303395102890132065LLU),
- QU( 8182358205292864080LLU), QU(10560547713972971291LLU),
- QU(11981635489418959093LLU), QU( 3134621354935288409LLU),
- QU(11580681977404383968LLU), QU(14205530317404088650LLU),
- QU( 5997789011854923157LLU), QU(13659151593432238041LLU),
- QU(11664332114338865086LLU), QU( 7490351383220929386LLU),
- QU( 7189290499881530378LLU), QU(15039262734271020220LLU),
- QU( 2057217285976980055LLU), QU( 555570804905355739LLU),
- QU(11235311968348555110LLU), QU(13824557146269603217LLU),
- QU(16906788840653099693LLU), QU( 7222878245455661677LLU),
- QU( 5245139444332423756LLU), QU( 4723748462805674292LLU),
- QU(12216509815698568612LLU), QU(17402362976648951187LLU),
- QU(17389614836810366768LLU), QU( 4880936484146667711LLU),
- QU( 9085007839292639880LLU), QU(13837353458498535449LLU),
- QU(11914419854360366677LLU), QU(16595890135313864103LLU),
- QU( 6313969847197627222LLU), QU(18296909792163910431LLU),
- QU(10041780113382084042LLU), QU( 2499478551172884794LLU),
- QU(11057894246241189489LLU), QU( 9742243032389068555LLU),
- QU(12838934582673196228LLU), QU(13437023235248490367LLU),
- QU(13372420669446163240LLU), QU( 6752564244716909224LLU),
- QU( 7157333073400313737LLU), QU(12230281516370654308LLU),
- QU( 1182884552219419117LLU), QU( 2955125381312499218LLU),
- QU(10308827097079443249LLU), QU( 1337648572986534958LLU),
- QU(16378788590020343939LLU), QU( 108619126514420935LLU),
- QU( 3990981009621629188LLU), QU( 5460953070230946410LLU),
- QU( 9703328329366531883LLU), QU(13166631489188077236LLU),
- QU( 1104768831213675170LLU), QU( 3447930458553877908LLU),
- QU( 8067172487769945676LLU), QU( 5445802098190775347LLU),
- QU( 3244840981648973873LLU), QU(17314668322981950060LLU),
- QU( 5006812527827763807LLU), QU(18158695070225526260LLU),
- QU( 2824536478852417853LLU), QU(13974775809127519886LLU),
- QU( 9814362769074067392LLU), QU(17276205156374862128LLU),
- QU(11361680725379306967LLU), QU( 3422581970382012542LLU),
- QU(11003189603753241266LLU), QU(11194292945277862261LLU),
- QU( 6839623313908521348LLU), QU(11935326462707324634LLU),
- QU( 1611456788685878444LLU), QU(13112620989475558907LLU),
- QU( 517659108904450427LLU), QU(13558114318574407624LLU),
- QU(15699089742731633077LLU), QU( 4988979278862685458LLU),
- QU( 8111373583056521297LLU), QU( 3891258746615399627LLU),
- QU( 8137298251469718086LLU), QU(12748663295624701649LLU),
- QU( 4389835683495292062LLU), QU( 5775217872128831729LLU),
- QU( 9462091896405534927LLU), QU( 8498124108820263989LLU),
- QU( 8059131278842839525LLU), QU(10503167994254090892LLU),
- QU(11613153541070396656LLU), QU(18069248738504647790LLU),
- QU( 570657419109768508LLU), QU( 3950574167771159665LLU),
- QU( 5514655599604313077LLU), QU( 2908460854428484165LLU),
- QU(10777722615935663114LLU), QU(12007363304839279486LLU),
- QU( 9800646187569484767LLU), QU( 8795423564889864287LLU),
- QU(14257396680131028419LLU), QU( 6405465117315096498LLU),
- QU( 7939411072208774878LLU), QU(17577572378528990006LLU),
- QU(14785873806715994850LLU), QU(16770572680854747390LLU),
- QU(18127549474419396481LLU), QU(11637013449455757750LLU),
- QU(14371851933996761086LLU), QU( 3601181063650110280LLU),
- QU( 4126442845019316144LLU), QU(10198287239244320669LLU),
- QU(18000169628555379659LLU), QU(18392482400739978269LLU),
- QU( 6219919037686919957LLU), QU( 3610085377719446052LLU),
- QU( 2513925039981776336LLU), QU(16679413537926716955LLU),
- QU(12903302131714909434LLU), QU( 5581145789762985009LLU),
- QU(12325955044293303233LLU), QU(17216111180742141204LLU),
- QU( 6321919595276545740LLU), QU( 3507521147216174501LLU),
- QU( 9659194593319481840LLU), QU(11473976005975358326LLU),
- QU(14742730101435987026LLU), QU( 492845897709954780LLU),
- QU(16976371186162599676LLU), QU(17712703422837648655LLU),
- QU( 9881254778587061697LLU), QU( 8413223156302299551LLU),
- QU( 1563841828254089168LLU), QU( 9996032758786671975LLU),
- QU( 138877700583772667LLU), QU(13003043368574995989LLU),
- QU( 4390573668650456587LLU), QU( 8610287390568126755LLU),
- QU(15126904974266642199LLU), QU( 6703637238986057662LLU),
- QU( 2873075592956810157LLU), QU( 6035080933946049418LLU),
- QU(13382846581202353014LLU), QU( 7303971031814642463LLU),
- QU(18418024405307444267LLU), QU( 5847096731675404647LLU),
- QU( 4035880699639842500LLU), QU(11525348625112218478LLU),
- QU( 3041162365459574102LLU), QU( 2604734487727986558LLU),
- QU(15526341771636983145LLU), QU(14556052310697370254LLU),
- QU(12997787077930808155LLU), QU( 9601806501755554499LLU),
- QU(11349677952521423389LLU), QU(14956777807644899350LLU),
- QU(16559736957742852721LLU), QU(12360828274778140726LLU),
- QU( 6685373272009662513LLU), QU(16932258748055324130LLU),
- QU(15918051131954158508LLU), QU( 1692312913140790144LLU),
- QU( 546653826801637367LLU), QU( 5341587076045986652LLU),
- QU(14975057236342585662LLU), QU(12374976357340622412LLU),
- QU(10328833995181940552LLU), QU(12831807101710443149LLU),
- QU(10548514914382545716LLU), QU( 2217806727199715993LLU),
- QU(12627067369242845138LLU), QU( 4598965364035438158LLU),
- QU( 150923352751318171LLU), QU(14274109544442257283LLU),
- QU( 4696661475093863031LLU), QU( 1505764114384654516LLU),
- QU(10699185831891495147LLU), QU( 2392353847713620519LLU),
- QU( 3652870166711788383LLU), QU( 8640653276221911108LLU),
- QU( 3894077592275889704LLU), QU( 4918592872135964845LLU),
- QU(16379121273281400789LLU), QU(12058465483591683656LLU),
- QU(11250106829302924945LLU), QU( 1147537556296983005LLU),
- QU( 6376342756004613268LLU), QU(14967128191709280506LLU),
- QU(18007449949790627628LLU), QU( 9497178279316537841LLU),
- QU( 7920174844809394893LLU), QU(10037752595255719907LLU),
- QU(15875342784985217697LLU), QU(15311615921712850696LLU),
- QU( 9552902652110992950LLU), QU(14054979450099721140LLU),
- QU( 5998709773566417349LLU), QU(18027910339276320187LLU),
- QU( 8223099053868585554LLU), QU( 7842270354824999767LLU),
- QU( 4896315688770080292LLU), QU(12969320296569787895LLU),
- QU( 2674321489185759961LLU), QU( 4053615936864718439LLU),
- QU(11349775270588617578LLU), QU( 4743019256284553975LLU),
- QU( 5602100217469723769LLU), QU(14398995691411527813LLU),
- QU( 7412170493796825470LLU), QU( 836262406131744846LLU),
- QU( 8231086633845153022LLU), QU( 5161377920438552287LLU),
- QU( 8828731196169924949LLU), QU(16211142246465502680LLU),
- QU( 3307990879253687818LLU), QU( 5193405406899782022LLU),
- QU( 8510842117467566693LLU), QU( 6070955181022405365LLU),
- QU(14482950231361409799LLU), QU(12585159371331138077LLU),
- QU( 3511537678933588148LLU), QU( 2041849474531116417LLU),
- QU(10944936685095345792LLU), QU(18303116923079107729LLU),
- QU( 2720566371239725320LLU), QU( 4958672473562397622LLU),
- QU( 3032326668253243412LLU), QU(13689418691726908338LLU),
- QU( 1895205511728843996LLU), QU( 8146303515271990527LLU),
- QU(16507343500056113480LLU), QU( 473996939105902919LLU),
- QU( 9897686885246881481LLU), QU(14606433762712790575LLU),
- QU( 6732796251605566368LLU), QU( 1399778120855368916LLU),
- QU( 935023885182833777LLU), QU(16066282816186753477LLU),
- QU( 7291270991820612055LLU), QU(17530230393129853844LLU),
- QU(10223493623477451366LLU), QU(15841725630495676683LLU),
- QU(17379567246435515824LLU), QU( 8588251429375561971LLU),
- QU(18339511210887206423LLU), QU(17349587430725976100LLU),
- QU(12244876521394838088LLU), QU( 6382187714147161259LLU),
- QU(12335807181848950831LLU), QU(16948885622305460665LLU),
- QU(13755097796371520506LLU), QU(14806740373324947801LLU),
- QU( 4828699633859287703LLU), QU( 8209879281452301604LLU),
- QU(12435716669553736437LLU), QU(13970976859588452131LLU),
- QU( 6233960842566773148LLU), QU(12507096267900505759LLU),
- QU( 1198713114381279421LLU), QU(14989862731124149015LLU),
- QU(15932189508707978949LLU), QU( 2526406641432708722LLU),
- QU( 29187427817271982LLU), QU( 1499802773054556353LLU),
- QU(10816638187021897173LLU), QU( 5436139270839738132LLU),
- QU( 6659882287036010082LLU), QU( 2154048955317173697LLU),
- QU(10887317019333757642LLU), QU(16281091802634424955LLU),
- QU(10754549879915384901LLU), QU(10760611745769249815LLU),
- QU( 2161505946972504002LLU), QU( 5243132808986265107LLU),
- QU(10129852179873415416LLU), QU( 710339480008649081LLU),
- QU( 7802129453068808528LLU), QU(17967213567178907213LLU),
- QU(15730859124668605599LLU), QU(13058356168962376502LLU),
- QU( 3701224985413645909LLU), QU(14464065869149109264LLU),
- QU( 9959272418844311646LLU), QU(10157426099515958752LLU),
- QU(14013736814538268528LLU), QU(17797456992065653951LLU),
- QU(17418878140257344806LLU), QU(15457429073540561521LLU),
- QU( 2184426881360949378LLU), QU( 2062193041154712416LLU),
- QU( 8553463347406931661LLU), QU( 4913057625202871854LLU),
- QU( 2668943682126618425LLU), QU(17064444737891172288LLU),
- QU( 4997115903913298637LLU), QU(12019402608892327416LLU),
- QU(17603584559765897352LLU), QU(11367529582073647975LLU),
- QU( 8211476043518436050LLU), QU( 8676849804070323674LLU),
- QU(18431829230394475730LLU), QU(10490177861361247904LLU),
- QU( 9508720602025651349LLU), QU( 7409627448555722700LLU),
- QU( 5804047018862729008LLU), QU(11943858176893142594LLU),
- QU(11908095418933847092LLU), QU( 5415449345715887652LLU),
- QU( 1554022699166156407LLU), QU( 9073322106406017161LLU),
- QU( 7080630967969047082LLU), QU(18049736940860732943LLU),
- QU(12748714242594196794LLU), QU( 1226992415735156741LLU),
- QU(17900981019609531193LLU), QU(11720739744008710999LLU),
- QU( 3006400683394775434LLU), QU(11347974011751996028LLU),
- QU( 3316999628257954608LLU), QU( 8384484563557639101LLU),
- QU(18117794685961729767LLU), QU( 1900145025596618194LLU),
- QU(17459527840632892676LLU), QU( 5634784101865710994LLU),
- QU( 7918619300292897158LLU), QU( 3146577625026301350LLU),
- QU( 9955212856499068767LLU), QU( 1873995843681746975LLU),
- QU( 1561487759967972194LLU), QU( 8322718804375878474LLU),
- QU(11300284215327028366LLU), QU( 4667391032508998982LLU),
- QU( 9820104494306625580LLU), QU(17922397968599970610LLU),
- QU( 1784690461886786712LLU), QU(14940365084341346821LLU),
- QU( 5348719575594186181LLU), QU(10720419084507855261LLU),
- QU(14210394354145143274LLU), QU( 2426468692164000131LLU),
- QU(16271062114607059202LLU), QU(14851904092357070247LLU),
- QU( 6524493015693121897LLU), QU( 9825473835127138531LLU),
- QU(14222500616268569578LLU), QU(15521484052007487468LLU),
- QU(14462579404124614699LLU), QU(11012375590820665520LLU),
- QU(11625327350536084927LLU), QU(14452017765243785417LLU),
- QU( 9989342263518766305LLU), QU( 3640105471101803790LLU),
- QU( 4749866455897513242LLU), QU(13963064946736312044LLU),
- QU(10007416591973223791LLU), QU(18314132234717431115LLU),
- QU( 3286596588617483450LLU), QU( 7726163455370818765LLU),
- QU( 7575454721115379328LLU), QU( 5308331576437663422LLU),
- QU(18288821894903530934LLU), QU( 8028405805410554106LLU),
- QU(15744019832103296628LLU), QU( 149765559630932100LLU),
- QU( 6137705557200071977LLU), QU(14513416315434803615LLU),
- QU(11665702820128984473LLU), QU( 218926670505601386LLU),
- QU( 6868675028717769519LLU), QU(15282016569441512302LLU),
- QU( 5707000497782960236LLU), QU( 6671120586555079567LLU),
- QU( 2194098052618985448LLU), QU(16849577895477330978LLU),
- QU(12957148471017466283LLU), QU( 1997805535404859393LLU),
- QU( 1180721060263860490LLU), QU(13206391310193756958LLU),
- QU(12980208674461861797LLU), QU( 3825967775058875366LLU),
- QU(17543433670782042631LLU), QU( 1518339070120322730LLU),
- QU(16344584340890991669LLU), QU( 2611327165318529819LLU),
- QU(11265022723283422529LLU), QU( 4001552800373196817LLU),
- QU(14509595890079346161LLU), QU( 3528717165416234562LLU),
- QU(18153222571501914072LLU), QU( 9387182977209744425LLU),
- QU(10064342315985580021LLU), QU(11373678413215253977LLU),
- QU( 2308457853228798099LLU), QU( 9729042942839545302LLU),
- QU( 7833785471140127746LLU), QU( 6351049900319844436LLU),
- QU(14454610627133496067LLU), QU(12533175683634819111LLU),
- QU(15570163926716513029LLU), QU(13356980519185762498LLU)
+ KQU( 2100341266307895239), KQU( 8344256300489757943),
+ KQU(15687933285484243894), KQU( 8268620370277076319),
+ KQU(12371852309826545459), KQU( 8800491541730110238),
+ KQU(18113268950100835773), KQU( 2886823658884438119),
+ KQU( 3293667307248180724), KQU( 9307928143300172731),
+ KQU( 7688082017574293629), KQU( 900986224735166665),
+ KQU( 9977972710722265039), KQU( 6008205004994830552),
+ KQU( 546909104521689292), KQU( 7428471521869107594),
+ KQU(14777563419314721179), KQU(16116143076567350053),
+ KQU( 5322685342003142329), KQU( 4200427048445863473),
+ KQU( 4693092150132559146), KQU(13671425863759338582),
+ KQU( 6747117460737639916), KQU( 4732666080236551150),
+ KQU( 5912839950611941263), KQU( 3903717554504704909),
+ KQU( 2615667650256786818), KQU(10844129913887006352),
+ KQU(13786467861810997820), KQU(14267853002994021570),
+ KQU(13767807302847237439), KQU(16407963253707224617),
+ KQU( 4802498363698583497), KQU( 2523802839317209764),
+ KQU( 3822579397797475589), KQU( 8950320572212130610),
+ KQU( 3745623504978342534), KQU(16092609066068482806),
+ KQU( 9817016950274642398), KQU(10591660660323829098),
+ KQU(11751606650792815920), KQU( 5122873818577122211),
+ KQU(17209553764913936624), KQU( 6249057709284380343),
+ KQU(15088791264695071830), KQU(15344673071709851930),
+ KQU( 4345751415293646084), KQU( 2542865750703067928),
+ KQU(13520525127852368784), KQU(18294188662880997241),
+ KQU( 3871781938044881523), KQU( 2873487268122812184),
+ KQU(15099676759482679005), KQU(15442599127239350490),
+ KQU( 6311893274367710888), KQU( 3286118760484672933),
+ KQU( 4146067961333542189), KQU(13303942567897208770),
+ KQU( 8196013722255630418), KQU( 4437815439340979989),
+ KQU(15433791533450605135), KQU( 4254828956815687049),
+ KQU( 1310903207708286015), KQU(10529182764462398549),
+ KQU(14900231311660638810), KQU( 9727017277104609793),
+ KQU( 1821308310948199033), KQU(11628861435066772084),
+ KQU( 9469019138491546924), KQU( 3145812670532604988),
+ KQU( 9938468915045491919), KQU( 1562447430672662142),
+ KQU(13963995266697989134), KQU( 3356884357625028695),
+ KQU( 4499850304584309747), KQU( 8456825817023658122),
+ KQU(10859039922814285279), KQU( 8099512337972526555),
+ KQU( 348006375109672149), KQU(11919893998241688603),
+ KQU( 1104199577402948826), KQU(16689191854356060289),
+ KQU(10992552041730168078), KQU( 7243733172705465836),
+ KQU( 5668075606180319560), KQU(18182847037333286970),
+ KQU( 4290215357664631322), KQU( 4061414220791828613),
+ KQU(13006291061652989604), KQU( 7140491178917128798),
+ KQU(12703446217663283481), KQU( 5500220597564558267),
+ KQU(10330551509971296358), KQU(15958554768648714492),
+ KQU( 5174555954515360045), KQU( 1731318837687577735),
+ KQU( 3557700801048354857), KQU(13764012341928616198),
+ KQU(13115166194379119043), KQU( 7989321021560255519),
+ KQU( 2103584280905877040), KQU( 9230788662155228488),
+ KQU(16396629323325547654), KQU( 657926409811318051),
+ KQU(15046700264391400727), KQU( 5120132858771880830),
+ KQU( 7934160097989028561), KQU( 6963121488531976245),
+ KQU(17412329602621742089), KQU(15144843053931774092),
+ KQU(17204176651763054532), KQU(13166595387554065870),
+ KQU( 8590377810513960213), KQU( 5834365135373991938),
+ KQU( 7640913007182226243), KQU( 3479394703859418425),
+ KQU(16402784452644521040), KQU( 4993979809687083980),
+ KQU(13254522168097688865), KQU(15643659095244365219),
+ KQU( 5881437660538424982), KQU(11174892200618987379),
+ KQU( 254409966159711077), KQU(17158413043140549909),
+ KQU( 3638048789290376272), KQU( 1376816930299489190),
+ KQU( 4622462095217761923), KQU(15086407973010263515),
+ KQU(13253971772784692238), KQU( 5270549043541649236),
+ KQU(11182714186805411604), KQU(12283846437495577140),
+ KQU( 5297647149908953219), KQU(10047451738316836654),
+ KQU( 4938228100367874746), KQU(12328523025304077923),
+ KQU( 3601049438595312361), KQU( 9313624118352733770),
+ KQU(13322966086117661798), KQU(16660005705644029394),
+ KQU(11337677526988872373), KQU(13869299102574417795),
+ KQU(15642043183045645437), KQU( 3021755569085880019),
+ KQU( 4979741767761188161), KQU(13679979092079279587),
+ KQU( 3344685842861071743), KQU(13947960059899588104),
+ KQU( 305806934293368007), KQU( 5749173929201650029),
+ KQU(11123724852118844098), KQU(15128987688788879802),
+ KQU(15251651211024665009), KQU( 7689925933816577776),
+ KQU(16732804392695859449), KQU(17087345401014078468),
+ KQU(14315108589159048871), KQU( 4820700266619778917),
+ KQU(16709637539357958441), KQU( 4936227875177351374),
+ KQU( 2137907697912987247), KQU(11628565601408395420),
+ KQU( 2333250549241556786), KQU( 5711200379577778637),
+ KQU( 5170680131529031729), KQU(12620392043061335164),
+ KQU( 95363390101096078), KQU( 5487981914081709462),
+ KQU( 1763109823981838620), KQU( 3395861271473224396),
+ KQU( 1300496844282213595), KQU( 6894316212820232902),
+ KQU(10673859651135576674), KQU( 5911839658857903252),
+ KQU(17407110743387299102), KQU( 8257427154623140385),
+ KQU(11389003026741800267), KQU( 4070043211095013717),
+ KQU(11663806997145259025), KQU(15265598950648798210),
+ KQU( 630585789434030934), KQU( 3524446529213587334),
+ KQU( 7186424168495184211), KQU(10806585451386379021),
+ KQU(11120017753500499273), KQU( 1586837651387701301),
+ KQU(17530454400954415544), KQU( 9991670045077880430),
+ KQU( 7550997268990730180), KQU( 8640249196597379304),
+ KQU( 3522203892786893823), KQU(10401116549878854788),
+ KQU(13690285544733124852), KQU( 8295785675455774586),
+ KQU(15535716172155117603), KQU( 3112108583723722511),
+ KQU(17633179955339271113), KQU(18154208056063759375),
+ KQU( 1866409236285815666), KQU(13326075895396412882),
+ KQU( 8756261842948020025), KQU( 6281852999868439131),
+ KQU(15087653361275292858), KQU(10333923911152949397),
+ KQU( 5265567645757408500), KQU(12728041843210352184),
+ KQU( 6347959327507828759), KQU( 154112802625564758),
+ KQU(18235228308679780218), KQU( 3253805274673352418),
+ KQU( 4849171610689031197), KQU(17948529398340432518),
+ KQU(13803510475637409167), KQU(13506570190409883095),
+ KQU(15870801273282960805), KQU( 8451286481299170773),
+ KQU( 9562190620034457541), KQU( 8518905387449138364),
+ KQU(12681306401363385655), KQU( 3788073690559762558),
+ KQU( 5256820289573487769), KQU( 2752021372314875467),
+ KQU( 6354035166862520716), KQU( 4328956378309739069),
+ KQU( 449087441228269600), KQU( 5533508742653090868),
+ KQU( 1260389420404746988), KQU(18175394473289055097),
+ KQU( 1535467109660399420), KQU( 8818894282874061442),
+ KQU(12140873243824811213), KQU(15031386653823014946),
+ KQU( 1286028221456149232), KQU( 6329608889367858784),
+ KQU( 9419654354945132725), KQU( 6094576547061672379),
+ KQU(17706217251847450255), KQU( 1733495073065878126),
+ KQU(16918923754607552663), KQU( 8881949849954945044),
+ KQU(12938977706896313891), KQU(14043628638299793407),
+ KQU(18393874581723718233), KQU( 6886318534846892044),
+ KQU(14577870878038334081), KQU(13541558383439414119),
+ KQU(13570472158807588273), KQU(18300760537910283361),
+ KQU( 818368572800609205), KQU( 1417000585112573219),
+ KQU(12337533143867683655), KQU(12433180994702314480),
+ KQU( 778190005829189083), KQU(13667356216206524711),
+ KQU( 9866149895295225230), KQU(11043240490417111999),
+ KQU( 1123933826541378598), KQU( 6469631933605123610),
+ KQU(14508554074431980040), KQU(13918931242962026714),
+ KQU( 2870785929342348285), KQU(14786362626740736974),
+ KQU(13176680060902695786), KQU( 9591778613541679456),
+ KQU( 9097662885117436706), KQU( 749262234240924947),
+ KQU( 1944844067793307093), KQU( 4339214904577487742),
+ KQU( 8009584152961946551), KQU(16073159501225501777),
+ KQU( 3335870590499306217), KQU(17088312653151202847),
+ KQU( 3108893142681931848), KQU(16636841767202792021),
+ KQU(10423316431118400637), KQU( 8008357368674443506),
+ KQU(11340015231914677875), KQU(17687896501594936090),
+ KQU(15173627921763199958), KQU( 542569482243721959),
+ KQU(15071714982769812975), KQU( 4466624872151386956),
+ KQU( 1901780715602332461), KQU( 9822227742154351098),
+ KQU( 1479332892928648780), KQU( 6981611948382474400),
+ KQU( 7620824924456077376), KQU(14095973329429406782),
+ KQU( 7902744005696185404), KQU(15830577219375036920),
+ KQU(10287076667317764416), KQU(12334872764071724025),
+ KQU( 4419302088133544331), KQU(14455842851266090520),
+ KQU(12488077416504654222), KQU( 7953892017701886766),
+ KQU( 6331484925529519007), KQU( 4902145853785030022),
+ KQU(17010159216096443073), KQU(11945354668653886087),
+ KQU(15112022728645230829), KQU(17363484484522986742),
+ KQU( 4423497825896692887), KQU( 8155489510809067471),
+ KQU( 258966605622576285), KQU( 5462958075742020534),
+ KQU( 6763710214913276228), KQU( 2368935183451109054),
+ KQU(14209506165246453811), KQU( 2646257040978514881),
+ KQU( 3776001911922207672), KQU( 1419304601390147631),
+ KQU(14987366598022458284), KQU( 3977770701065815721),
+ KQU( 730820417451838898), KQU( 3982991703612885327),
+ KQU( 2803544519671388477), KQU(17067667221114424649),
+ KQU( 2922555119737867166), KQU( 1989477584121460932),
+ KQU(15020387605892337354), KQU( 9293277796427533547),
+ KQU(10722181424063557247), KQU(16704542332047511651),
+ KQU( 5008286236142089514), KQU(16174732308747382540),
+ KQU(17597019485798338402), KQU(13081745199110622093),
+ KQU( 8850305883842258115), KQU(12723629125624589005),
+ KQU( 8140566453402805978), KQU(15356684607680935061),
+ KQU(14222190387342648650), KQU(11134610460665975178),
+ KQU( 1259799058620984266), KQU(13281656268025610041),
+ KQU( 298262561068153992), KQU(12277871700239212922),
+ KQU(13911297774719779438), KQU(16556727962761474934),
+ KQU(17903010316654728010), KQU( 9682617699648434744),
+ KQU(14757681836838592850), KQU( 1327242446558524473),
+ KQU(11126645098780572792), KQU( 1883602329313221774),
+ KQU( 2543897783922776873), KQU(15029168513767772842),
+ KQU(12710270651039129878), KQU(16118202956069604504),
+ KQU(15010759372168680524), KQU( 2296827082251923948),
+ KQU(10793729742623518101), KQU(13829764151845413046),
+ KQU(17769301223184451213), KQU( 3118268169210783372),
+ KQU(17626204544105123127), KQU( 7416718488974352644),
+ KQU(10450751996212925994), KQU( 9352529519128770586),
+ KQU( 259347569641110140), KQU( 8048588892269692697),
+ KQU( 1774414152306494058), KQU(10669548347214355622),
+ KQU(13061992253816795081), KQU(18432677803063861659),
+ KQU( 8879191055593984333), KQU(12433753195199268041),
+ KQU(14919392415439730602), KQU( 6612848378595332963),
+ KQU( 6320986812036143628), KQU(10465592420226092859),
+ KQU( 4196009278962570808), KQU( 3747816564473572224),
+ KQU(17941203486133732898), KQU( 2350310037040505198),
+ KQU( 5811779859134370113), KQU(10492109599506195126),
+ KQU( 7699650690179541274), KQU( 1954338494306022961),
+ KQU(14095816969027231152), KQU( 5841346919964852061),
+ KQU(14945969510148214735), KQU( 3680200305887550992),
+ KQU( 6218047466131695792), KQU( 8242165745175775096),
+ KQU(11021371934053307357), KQU( 1265099502753169797),
+ KQU( 4644347436111321718), KQU( 3609296916782832859),
+ KQU( 8109807992218521571), KQU(18387884215648662020),
+ KQU(14656324896296392902), KQU(17386819091238216751),
+ KQU(17788300878582317152), KQU( 7919446259742399591),
+ KQU( 4466613134576358004), KQU(12928181023667938509),
+ KQU(13147446154454932030), KQU(16552129038252734620),
+ KQU( 8395299403738822450), KQU(11313817655275361164),
+ KQU( 434258809499511718), KQU( 2074882104954788676),
+ KQU( 7929892178759395518), KQU( 9006461629105745388),
+ KQU( 5176475650000323086), KQU(11128357033468341069),
+ KQU(12026158851559118955), KQU(14699716249471156500),
+ KQU( 448982497120206757), KQU( 4156475356685519900),
+ KQU( 6063816103417215727), KQU(10073289387954971479),
+ KQU( 8174466846138590962), KQU( 2675777452363449006),
+ KQU( 9090685420572474281), KQU( 6659652652765562060),
+ KQU(12923120304018106621), KQU(11117480560334526775),
+ KQU( 937910473424587511), KQU( 1838692113502346645),
+ KQU(11133914074648726180), KQU( 7922600945143884053),
+ KQU(13435287702700959550), KQU( 5287964921251123332),
+ KQU(11354875374575318947), KQU(17955724760748238133),
+ KQU(13728617396297106512), KQU( 4107449660118101255),
+ KQU( 1210269794886589623), KQU(11408687205733456282),
+ KQU( 4538354710392677887), KQU(13566803319341319267),
+ KQU(17870798107734050771), KQU( 3354318982568089135),
+ KQU( 9034450839405133651), KQU(13087431795753424314),
+ KQU( 950333102820688239), KQU( 1968360654535604116),
+ KQU(16840551645563314995), KQU( 8867501803892924995),
+ KQU(11395388644490626845), KQU( 1529815836300732204),
+ KQU(13330848522996608842), KQU( 1813432878817504265),
+ KQU( 2336867432693429560), KQU(15192805445973385902),
+ KQU( 2528593071076407877), KQU( 128459777936689248),
+ KQU( 9976345382867214866), KQU( 6208885766767996043),
+ KQU(14982349522273141706), KQU( 3099654362410737822),
+ KQU(13776700761947297661), KQU( 8806185470684925550),
+ KQU( 8151717890410585321), KQU( 640860591588072925),
+ KQU(14592096303937307465), KQU( 9056472419613564846),
+ KQU(14861544647742266352), KQU(12703771500398470216),
+ KQU( 3142372800384138465), KQU( 6201105606917248196),
+ KQU(18337516409359270184), KQU(15042268695665115339),
+ KQU(15188246541383283846), KQU(12800028693090114519),
+ KQU( 5992859621101493472), KQU(18278043971816803521),
+ KQU( 9002773075219424560), KQU( 7325707116943598353),
+ KQU( 7930571931248040822), KQU( 5645275869617023448),
+ KQU( 7266107455295958487), KQU( 4363664528273524411),
+ KQU(14313875763787479809), KQU(17059695613553486802),
+ KQU( 9247761425889940932), KQU(13704726459237593128),
+ KQU( 2701312427328909832), KQU(17235532008287243115),
+ KQU(14093147761491729538), KQU( 6247352273768386516),
+ KQU( 8268710048153268415), KQU( 7985295214477182083),
+ KQU(15624495190888896807), KQU( 3772753430045262788),
+ KQU( 9133991620474991698), KQU( 5665791943316256028),
+ KQU( 7551996832462193473), KQU(13163729206798953877),
+ KQU( 9263532074153846374), KQU( 1015460703698618353),
+ KQU(17929874696989519390), KQU(18257884721466153847),
+ KQU(16271867543011222991), KQU( 3905971519021791941),
+ KQU(16814488397137052085), KQU( 1321197685504621613),
+ KQU( 2870359191894002181), KQU(14317282970323395450),
+ KQU(13663920845511074366), KQU( 2052463995796539594),
+ KQU(14126345686431444337), KQU( 1727572121947022534),
+ KQU(17793552254485594241), KQU( 6738857418849205750),
+ KQU( 1282987123157442952), KQU(16655480021581159251),
+ KQU( 6784587032080183866), KQU(14726758805359965162),
+ KQU( 7577995933961987349), KQU(12539609320311114036),
+ KQU(10789773033385439494), KQU( 8517001497411158227),
+ KQU(10075543932136339710), KQU(14838152340938811081),
+ KQU( 9560840631794044194), KQU(17445736541454117475),
+ KQU(10633026464336393186), KQU(15705729708242246293),
+ KQU( 1117517596891411098), KQU( 4305657943415886942),
+ KQU( 4948856840533979263), KQU(16071681989041789593),
+ KQU(13723031429272486527), KQU( 7639567622306509462),
+ KQU(12670424537483090390), KQU( 9715223453097197134),
+ KQU( 5457173389992686394), KQU( 289857129276135145),
+ KQU(17048610270521972512), KQU( 692768013309835485),
+ KQU(14823232360546632057), KQU(18218002361317895936),
+ KQU( 3281724260212650204), KQU(16453957266549513795),
+ KQU( 8592711109774511881), KQU( 929825123473369579),
+ KQU(15966784769764367791), KQU( 9627344291450607588),
+ KQU(10849555504977813287), KQU( 9234566913936339275),
+ KQU( 6413807690366911210), KQU(10862389016184219267),
+ KQU(13842504799335374048), KQU( 1531994113376881174),
+ KQU( 2081314867544364459), KQU(16430628791616959932),
+ KQU( 8314714038654394368), KQU( 9155473892098431813),
+ KQU(12577843786670475704), KQU( 4399161106452401017),
+ KQU( 1668083091682623186), KQU( 1741383777203714216),
+ KQU( 2162597285417794374), KQU(15841980159165218736),
+ KQU( 1971354603551467079), KQU( 1206714764913205968),
+ KQU( 4790860439591272330), KQU(14699375615594055799),
+ KQU( 8374423871657449988), KQU(10950685736472937738),
+ KQU( 697344331343267176), KQU(10084998763118059810),
+ KQU(12897369539795983124), KQU(12351260292144383605),
+ KQU( 1268810970176811234), KQU( 7406287800414582768),
+ KQU( 516169557043807831), KQU( 5077568278710520380),
+ KQU( 3828791738309039304), KQU( 7721974069946943610),
+ KQU( 3534670260981096460), KQU( 4865792189600584891),
+ KQU(16892578493734337298), KQU( 9161499464278042590),
+ KQU(11976149624067055931), KQU(13219479887277343990),
+ KQU(14161556738111500680), KQU(14670715255011223056),
+ KQU( 4671205678403576558), KQU(12633022931454259781),
+ KQU(14821376219869187646), KQU( 751181776484317028),
+ KQU( 2192211308839047070), KQU(11787306362361245189),
+ KQU(10672375120744095707), KQU( 4601972328345244467),
+ KQU(15457217788831125879), KQU( 8464345256775460809),
+ KQU(10191938789487159478), KQU( 6184348739615197613),
+ KQU(11425436778806882100), KQU( 2739227089124319793),
+ KQU( 461464518456000551), KQU( 4689850170029177442),
+ KQU( 6120307814374078625), KQU(11153579230681708671),
+ KQU( 7891721473905347926), KQU(10281646937824872400),
+ KQU( 3026099648191332248), KQU( 8666750296953273818),
+ KQU(14978499698844363232), KQU(13303395102890132065),
+ KQU( 8182358205292864080), KQU(10560547713972971291),
+ KQU(11981635489418959093), KQU( 3134621354935288409),
+ KQU(11580681977404383968), KQU(14205530317404088650),
+ KQU( 5997789011854923157), KQU(13659151593432238041),
+ KQU(11664332114338865086), KQU( 7490351383220929386),
+ KQU( 7189290499881530378), KQU(15039262734271020220),
+ KQU( 2057217285976980055), KQU( 555570804905355739),
+ KQU(11235311968348555110), KQU(13824557146269603217),
+ KQU(16906788840653099693), KQU( 7222878245455661677),
+ KQU( 5245139444332423756), KQU( 4723748462805674292),
+ KQU(12216509815698568612), KQU(17402362976648951187),
+ KQU(17389614836810366768), KQU( 4880936484146667711),
+ KQU( 9085007839292639880), KQU(13837353458498535449),
+ KQU(11914419854360366677), KQU(16595890135313864103),
+ KQU( 6313969847197627222), KQU(18296909792163910431),
+ KQU(10041780113382084042), KQU( 2499478551172884794),
+ KQU(11057894246241189489), KQU( 9742243032389068555),
+ KQU(12838934582673196228), KQU(13437023235248490367),
+ KQU(13372420669446163240), KQU( 6752564244716909224),
+ KQU( 7157333073400313737), KQU(12230281516370654308),
+ KQU( 1182884552219419117), KQU( 2955125381312499218),
+ KQU(10308827097079443249), KQU( 1337648572986534958),
+ KQU(16378788590020343939), KQU( 108619126514420935),
+ KQU( 3990981009621629188), KQU( 5460953070230946410),
+ KQU( 9703328329366531883), KQU(13166631489188077236),
+ KQU( 1104768831213675170), KQU( 3447930458553877908),
+ KQU( 8067172487769945676), KQU( 5445802098190775347),
+ KQU( 3244840981648973873), KQU(17314668322981950060),
+ KQU( 5006812527827763807), KQU(18158695070225526260),
+ KQU( 2824536478852417853), KQU(13974775809127519886),
+ KQU( 9814362769074067392), KQU(17276205156374862128),
+ KQU(11361680725379306967), KQU( 3422581970382012542),
+ KQU(11003189603753241266), KQU(11194292945277862261),
+ KQU( 6839623313908521348), KQU(11935326462707324634),
+ KQU( 1611456788685878444), KQU(13112620989475558907),
+ KQU( 517659108904450427), KQU(13558114318574407624),
+ KQU(15699089742731633077), KQU( 4988979278862685458),
+ KQU( 8111373583056521297), KQU( 3891258746615399627),
+ KQU( 8137298251469718086), KQU(12748663295624701649),
+ KQU( 4389835683495292062), KQU( 5775217872128831729),
+ KQU( 9462091896405534927), KQU( 8498124108820263989),
+ KQU( 8059131278842839525), KQU(10503167994254090892),
+ KQU(11613153541070396656), KQU(18069248738504647790),
+ KQU( 570657419109768508), KQU( 3950574167771159665),
+ KQU( 5514655599604313077), KQU( 2908460854428484165),
+ KQU(10777722615935663114), KQU(12007363304839279486),
+ KQU( 9800646187569484767), KQU( 8795423564889864287),
+ KQU(14257396680131028419), KQU( 6405465117315096498),
+ KQU( 7939411072208774878), KQU(17577572378528990006),
+ KQU(14785873806715994850), KQU(16770572680854747390),
+ KQU(18127549474419396481), KQU(11637013449455757750),
+ KQU(14371851933996761086), KQU( 3601181063650110280),
+ KQU( 4126442845019316144), KQU(10198287239244320669),
+ KQU(18000169628555379659), KQU(18392482400739978269),
+ KQU( 6219919037686919957), KQU( 3610085377719446052),
+ KQU( 2513925039981776336), KQU(16679413537926716955),
+ KQU(12903302131714909434), KQU( 5581145789762985009),
+ KQU(12325955044293303233), KQU(17216111180742141204),
+ KQU( 6321919595276545740), KQU( 3507521147216174501),
+ KQU( 9659194593319481840), KQU(11473976005975358326),
+ KQU(14742730101435987026), KQU( 492845897709954780),
+ KQU(16976371186162599676), KQU(17712703422837648655),
+ KQU( 9881254778587061697), KQU( 8413223156302299551),
+ KQU( 1563841828254089168), KQU( 9996032758786671975),
+ KQU( 138877700583772667), KQU(13003043368574995989),
+ KQU( 4390573668650456587), KQU( 8610287390568126755),
+ KQU(15126904974266642199), KQU( 6703637238986057662),
+ KQU( 2873075592956810157), KQU( 6035080933946049418),
+ KQU(13382846581202353014), KQU( 7303971031814642463),
+ KQU(18418024405307444267), KQU( 5847096731675404647),
+ KQU( 4035880699639842500), KQU(11525348625112218478),
+ KQU( 3041162365459574102), KQU( 2604734487727986558),
+ KQU(15526341771636983145), KQU(14556052310697370254),
+ KQU(12997787077930808155), KQU( 9601806501755554499),
+ KQU(11349677952521423389), KQU(14956777807644899350),
+ KQU(16559736957742852721), KQU(12360828274778140726),
+ KQU( 6685373272009662513), KQU(16932258748055324130),
+ KQU(15918051131954158508), KQU( 1692312913140790144),
+ KQU( 546653826801637367), KQU( 5341587076045986652),
+ KQU(14975057236342585662), KQU(12374976357340622412),
+ KQU(10328833995181940552), KQU(12831807101710443149),
+ KQU(10548514914382545716), KQU( 2217806727199715993),
+ KQU(12627067369242845138), KQU( 4598965364035438158),
+ KQU( 150923352751318171), KQU(14274109544442257283),
+ KQU( 4696661475093863031), KQU( 1505764114384654516),
+ KQU(10699185831891495147), KQU( 2392353847713620519),
+ KQU( 3652870166711788383), KQU( 8640653276221911108),
+ KQU( 3894077592275889704), KQU( 4918592872135964845),
+ KQU(16379121273281400789), KQU(12058465483591683656),
+ KQU(11250106829302924945), KQU( 1147537556296983005),
+ KQU( 6376342756004613268), KQU(14967128191709280506),
+ KQU(18007449949790627628), KQU( 9497178279316537841),
+ KQU( 7920174844809394893), KQU(10037752595255719907),
+ KQU(15875342784985217697), KQU(15311615921712850696),
+ KQU( 9552902652110992950), KQU(14054979450099721140),
+ KQU( 5998709773566417349), KQU(18027910339276320187),
+ KQU( 8223099053868585554), KQU( 7842270354824999767),
+ KQU( 4896315688770080292), KQU(12969320296569787895),
+ KQU( 2674321489185759961), KQU( 4053615936864718439),
+ KQU(11349775270588617578), KQU( 4743019256284553975),
+ KQU( 5602100217469723769), KQU(14398995691411527813),
+ KQU( 7412170493796825470), KQU( 836262406131744846),
+ KQU( 8231086633845153022), KQU( 5161377920438552287),
+ KQU( 8828731196169924949), KQU(16211142246465502680),
+ KQU( 3307990879253687818), KQU( 5193405406899782022),
+ KQU( 8510842117467566693), KQU( 6070955181022405365),
+ KQU(14482950231361409799), KQU(12585159371331138077),
+ KQU( 3511537678933588148), KQU( 2041849474531116417),
+ KQU(10944936685095345792), KQU(18303116923079107729),
+ KQU( 2720566371239725320), KQU( 4958672473562397622),
+ KQU( 3032326668253243412), KQU(13689418691726908338),
+ KQU( 1895205511728843996), KQU( 8146303515271990527),
+ KQU(16507343500056113480), KQU( 473996939105902919),
+ KQU( 9897686885246881481), KQU(14606433762712790575),
+ KQU( 6732796251605566368), KQU( 1399778120855368916),
+ KQU( 935023885182833777), KQU(16066282816186753477),
+ KQU( 7291270991820612055), KQU(17530230393129853844),
+ KQU(10223493623477451366), KQU(15841725630495676683),
+ KQU(17379567246435515824), KQU( 8588251429375561971),
+ KQU(18339511210887206423), KQU(17349587430725976100),
+ KQU(12244876521394838088), KQU( 6382187714147161259),
+ KQU(12335807181848950831), KQU(16948885622305460665),
+ KQU(13755097796371520506), KQU(14806740373324947801),
+ KQU( 4828699633859287703), KQU( 8209879281452301604),
+ KQU(12435716669553736437), KQU(13970976859588452131),
+ KQU( 6233960842566773148), KQU(12507096267900505759),
+ KQU( 1198713114381279421), KQU(14989862731124149015),
+ KQU(15932189508707978949), KQU( 2526406641432708722),
+ KQU( 29187427817271982), KQU( 1499802773054556353),
+ KQU(10816638187021897173), KQU( 5436139270839738132),
+ KQU( 6659882287036010082), KQU( 2154048955317173697),
+ KQU(10887317019333757642), KQU(16281091802634424955),
+ KQU(10754549879915384901), KQU(10760611745769249815),
+ KQU( 2161505946972504002), KQU( 5243132808986265107),
+ KQU(10129852179873415416), KQU( 710339480008649081),
+ KQU( 7802129453068808528), KQU(17967213567178907213),
+ KQU(15730859124668605599), KQU(13058356168962376502),
+ KQU( 3701224985413645909), KQU(14464065869149109264),
+ KQU( 9959272418844311646), KQU(10157426099515958752),
+ KQU(14013736814538268528), KQU(17797456992065653951),
+ KQU(17418878140257344806), KQU(15457429073540561521),
+ KQU( 2184426881360949378), KQU( 2062193041154712416),
+ KQU( 8553463347406931661), KQU( 4913057625202871854),
+ KQU( 2668943682126618425), KQU(17064444737891172288),
+ KQU( 4997115903913298637), KQU(12019402608892327416),
+ KQU(17603584559765897352), KQU(11367529582073647975),
+ KQU( 8211476043518436050), KQU( 8676849804070323674),
+ KQU(18431829230394475730), KQU(10490177861361247904),
+ KQU( 9508720602025651349), KQU( 7409627448555722700),
+ KQU( 5804047018862729008), KQU(11943858176893142594),
+ KQU(11908095418933847092), KQU( 5415449345715887652),
+ KQU( 1554022699166156407), KQU( 9073322106406017161),
+ KQU( 7080630967969047082), KQU(18049736940860732943),
+ KQU(12748714242594196794), KQU( 1226992415735156741),
+ KQU(17900981019609531193), KQU(11720739744008710999),
+ KQU( 3006400683394775434), KQU(11347974011751996028),
+ KQU( 3316999628257954608), KQU( 8384484563557639101),
+ KQU(18117794685961729767), KQU( 1900145025596618194),
+ KQU(17459527840632892676), KQU( 5634784101865710994),
+ KQU( 7918619300292897158), KQU( 3146577625026301350),
+ KQU( 9955212856499068767), KQU( 1873995843681746975),
+ KQU( 1561487759967972194), KQU( 8322718804375878474),
+ KQU(11300284215327028366), KQU( 4667391032508998982),
+ KQU( 9820104494306625580), KQU(17922397968599970610),
+ KQU( 1784690461886786712), KQU(14940365084341346821),
+ KQU( 5348719575594186181), KQU(10720419084507855261),
+ KQU(14210394354145143274), KQU( 2426468692164000131),
+ KQU(16271062114607059202), KQU(14851904092357070247),
+ KQU( 6524493015693121897), KQU( 9825473835127138531),
+ KQU(14222500616268569578), KQU(15521484052007487468),
+ KQU(14462579404124614699), KQU(11012375590820665520),
+ KQU(11625327350536084927), KQU(14452017765243785417),
+ KQU( 9989342263518766305), KQU( 3640105471101803790),
+ KQU( 4749866455897513242), KQU(13963064946736312044),
+ KQU(10007416591973223791), KQU(18314132234717431115),
+ KQU( 3286596588617483450), KQU( 7726163455370818765),
+ KQU( 7575454721115379328), KQU( 5308331576437663422),
+ KQU(18288821894903530934), KQU( 8028405805410554106),
+ KQU(15744019832103296628), KQU( 149765559630932100),
+ KQU( 6137705557200071977), KQU(14513416315434803615),
+ KQU(11665702820128984473), KQU( 218926670505601386),
+ KQU( 6868675028717769519), KQU(15282016569441512302),
+ KQU( 5707000497782960236), KQU( 6671120586555079567),
+ KQU( 2194098052618985448), KQU(16849577895477330978),
+ KQU(12957148471017466283), KQU( 1997805535404859393),
+ KQU( 1180721060263860490), KQU(13206391310193756958),
+ KQU(12980208674461861797), KQU( 3825967775058875366),
+ KQU(17543433670782042631), KQU( 1518339070120322730),
+ KQU(16344584340890991669), KQU( 2611327165318529819),
+ KQU(11265022723283422529), KQU( 4001552800373196817),
+ KQU(14509595890079346161), KQU( 3528717165416234562),
+ KQU(18153222571501914072), KQU( 9387182977209744425),
+ KQU(10064342315985580021), KQU(11373678413215253977),
+ KQU( 2308457853228798099), KQU( 9729042942839545302),
+ KQU( 7833785471140127746), KQU( 6351049900319844436),
+ KQU(14454610627133496067), KQU(12533175683634819111),
+ KQU(15570163926716513029), KQU(13356980519185762498)
};
TEST_BEGIN(test_gen_rand_32)
@@ -1543,13 +1543,13 @@ TEST_BEGIN(test_gen_rand_64)
}
r = gen_rand64(ctx);
assert_u64_eq(r, array64[i],
- "Mismatch at array64[%d]=%"PRIx64", gen=%"PRIx64, i,
+ "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
array64[i], r);
}
for (i = 0; i < COUNT_2; i++) {
r = gen_rand64(ctx);
assert_u64_eq(r, array64_2[i],
- "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64"", i,
+ "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
array64_2[i], r);
}
fini_gen_rand(ctx);
@@ -1580,13 +1580,13 @@ TEST_BEGIN(test_by_array_64)
}
r = gen_rand64(ctx);
assert_u64_eq(r, array64[i],
- "Mismatch at array64[%d]=%"PRIx64" gen=%"PRIx64, i,
+ "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
array64[i], r);
}
for (i = 0; i < COUNT_2; i++) {
r = gen_rand64(ctx);
assert_u64_eq(r, array64_2[i],
- "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64, i,
+ "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
array64_2[i], r);
}
fini_gen_rand(ctx);
diff --git a/deps/jemalloc/test/unit/atomic.c b/deps/jemalloc/test/unit/atomic.c
new file mode 100644
index 000000000..bdd74f659
--- /dev/null
+++ b/deps/jemalloc/test/unit/atomic.c
@@ -0,0 +1,122 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_STRUCT(p, t) \
+struct p##_test_s { \
+ t accum0; \
+ t x; \
+ t s; \
+}; \
+typedef struct p##_test_s p##_test_t;
+
+#define TEST_BODY(p, t, tc, ta, FMT) do { \
+ const p##_test_t tests[] = { \
+ {(t)-1, (t)-1, (t)-2}, \
+ {(t)-1, (t) 0, (t)-2}, \
+ {(t)-1, (t) 1, (t)-2}, \
+ \
+ {(t) 0, (t)-1, (t)-2}, \
+ {(t) 0, (t) 0, (t)-2}, \
+ {(t) 0, (t) 1, (t)-2}, \
+ \
+ {(t) 1, (t)-1, (t)-2}, \
+ {(t) 1, (t) 0, (t)-2}, \
+ {(t) 1, (t) 1, (t)-2}, \
+ \
+ {(t)0, (t)-(1 << 22), (t)-2}, \
+ {(t)0, (t)(1 << 22), (t)-2}, \
+ {(t)(1 << 22), (t)-(1 << 22), (t)-2}, \
+ {(t)(1 << 22), (t)(1 << 22), (t)-2} \
+ }; \
+ unsigned i; \
+ \
+ for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) { \
+ bool err; \
+ t accum = tests[i].accum0; \
+ assert_##ta##_eq(atomic_read_##p(&accum), \
+ tests[i].accum0, \
+ "Erroneous read, i=%u", i); \
+ \
+ assert_##ta##_eq(atomic_add_##p(&accum, tests[i].x), \
+ (t)((tc)tests[i].accum0 + (tc)tests[i].x), \
+ "i=%u, accum=%"FMT", x=%"FMT, \
+ i, tests[i].accum0, tests[i].x); \
+ assert_##ta##_eq(atomic_read_##p(&accum), accum, \
+ "Erroneous add, i=%u", i); \
+ \
+ accum = tests[i].accum0; \
+ assert_##ta##_eq(atomic_sub_##p(&accum, tests[i].x), \
+ (t)((tc)tests[i].accum0 - (tc)tests[i].x), \
+ "i=%u, accum=%"FMT", x=%"FMT, \
+ i, tests[i].accum0, tests[i].x); \
+ assert_##ta##_eq(atomic_read_##p(&accum), accum, \
+ "Erroneous sub, i=%u", i); \
+ \
+ accum = tests[i].accum0; \
+ err = atomic_cas_##p(&accum, tests[i].x, tests[i].s); \
+ assert_b_eq(err, tests[i].accum0 != tests[i].x, \
+ "Erroneous cas success/failure result"); \
+ assert_##ta##_eq(accum, err ? tests[i].accum0 : \
+ tests[i].s, "Erroneous cas effect, i=%u", i); \
+ \
+ accum = tests[i].accum0; \
+ atomic_write_##p(&accum, tests[i].s); \
+ assert_##ta##_eq(accum, tests[i].s, \
+ "Erroneous write, i=%u", i); \
+ } \
+} while (0)
+
+TEST_STRUCT(uint64, uint64_t)
+TEST_BEGIN(test_atomic_uint64)
+{
+
+#if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+ test_skip("64-bit atomic operations not supported");
+#else
+ TEST_BODY(uint64, uint64_t, uint64_t, u64, FMTx64);
+#endif
+}
+TEST_END
+
+TEST_STRUCT(uint32, uint32_t)
+TEST_BEGIN(test_atomic_uint32)
+{
+
+ TEST_BODY(uint32, uint32_t, uint32_t, u32, "#"FMTx32);
+}
+TEST_END
+
+TEST_STRUCT(p, void *)
+TEST_BEGIN(test_atomic_p)
+{
+
+ TEST_BODY(p, void *, uintptr_t, ptr, "p");
+}
+TEST_END
+
+TEST_STRUCT(z, size_t)
+TEST_BEGIN(test_atomic_z)
+{
+
+ TEST_BODY(z, size_t, size_t, zu, "#zx");
+}
+TEST_END
+
+TEST_STRUCT(u, unsigned)
+TEST_BEGIN(test_atomic_u)
+{
+
+ TEST_BODY(u, unsigned, unsigned, u, "#x");
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_atomic_uint64,
+ test_atomic_uint32,
+ test_atomic_p,
+ test_atomic_z,
+ test_atomic_u));
+}
diff --git a/deps/jemalloc/test/unit/bitmap.c b/deps/jemalloc/test/unit/bitmap.c
index 8086b8885..7da583d85 100644
--- a/deps/jemalloc/test/unit/bitmap.c
+++ b/deps/jemalloc/test/unit/bitmap.c
@@ -1,17 +1,11 @@
#include "test/jemalloc_test.h"
-#if (LG_BITMAP_MAXBITS > 12)
-# define MAXBITS 4500
-#else
-# define MAXBITS (1U << LG_BITMAP_MAXBITS)
-#endif
-
TEST_BEGIN(test_bitmap_size)
{
size_t i, prev_size;
prev_size = 0;
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 1; i <= BITMAP_MAXBITS; i++) {
size_t size = bitmap_size(i);
assert_true(size >= prev_size,
"Bitmap size is smaller than expected");
@@ -24,12 +18,12 @@ TEST_BEGIN(test_bitmap_init)
{
size_t i;
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 1; i <= BITMAP_MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+ bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
bitmap_info_ngroups(&binfo));
bitmap_init(bitmap, &binfo);
@@ -47,12 +41,12 @@ TEST_BEGIN(test_bitmap_set)
{
size_t i;
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 1; i <= BITMAP_MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+ bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
bitmap_info_ngroups(&binfo));
bitmap_init(bitmap, &binfo);
@@ -70,12 +64,12 @@ TEST_BEGIN(test_bitmap_unset)
{
size_t i;
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 1; i <= BITMAP_MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+ bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
bitmap_info_ngroups(&binfo));
bitmap_init(bitmap, &binfo);
@@ -99,12 +93,12 @@ TEST_BEGIN(test_bitmap_sfu)
{
size_t i;
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 1; i <= BITMAP_MAXBITS; i++) {
bitmap_info_t binfo;
bitmap_info_init(&binfo, i);
{
ssize_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+ bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
bitmap_info_ngroups(&binfo));
bitmap_init(bitmap, &binfo);
diff --git a/deps/jemalloc/test/unit/ckh.c b/deps/jemalloc/test/unit/ckh.c
index b214c279a..b11759599 100644
--- a/deps/jemalloc/test/unit/ckh.c
+++ b/deps/jemalloc/test/unit/ckh.c
@@ -2,20 +2,24 @@
TEST_BEGIN(test_new_delete)
{
+ tsd_t *tsd;
ckh_t ckh;
- assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp),
- "Unexpected ckh_new() error");
- ckh_delete(&ckh);
+ tsd = tsd_fetch();
- assert_false(ckh_new(&ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp),
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
"Unexpected ckh_new() error");
- ckh_delete(&ckh);
+ ckh_delete(tsd, &ckh);
+
+ assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+ ckh_pointer_keycomp), "Unexpected ckh_new() error");
+ ckh_delete(tsd, &ckh);
}
TEST_END
TEST_BEGIN(test_count_insert_search_remove)
{
+ tsd_t *tsd;
ckh_t ckh;
const char *strs[] = {
"a string",
@@ -26,7 +30,9 @@ TEST_BEGIN(test_count_insert_search_remove)
const char *missing = "A string not in the hash table.";
size_t i;
- assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp),
+ tsd = tsd_fetch();
+
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
"Unexpected ckh_new() error");
assert_zu_eq(ckh_count(&ckh), 0,
"ckh_count() should return %zu, but it returned %zu", ZU(0),
@@ -34,7 +40,7 @@ TEST_BEGIN(test_count_insert_search_remove)
/* Insert. */
for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
- ckh_insert(&ckh, strs[i], strs[i]);
+ ckh_insert(tsd, &ckh, strs[i], strs[i]);
assert_zu_eq(ckh_count(&ckh), i+1,
"ckh_count() should return %zu, but it returned %zu", i+1,
ckh_count(&ckh));
@@ -58,10 +64,10 @@ TEST_BEGIN(test_count_insert_search_remove)
ks = (i & 1) ? strs[i] : (const char *)NULL;
vs = (i & 2) ? strs[i] : (const char *)NULL;
- assert_ptr_eq((void *)ks, (void *)k.s,
- "Key mismatch, i=%zu", i);
- assert_ptr_eq((void *)vs, (void *)v.s,
- "Value mismatch, i=%zu", i);
+ assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+ i);
+ assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+ i);
}
assert_true(ckh_search(&ckh, missing, NULL, NULL),
"Unexpected ckh_search() success");
@@ -79,36 +85,39 @@ TEST_BEGIN(test_count_insert_search_remove)
vp = (i & 2) ? &v.p : NULL;
k.p = NULL;
v.p = NULL;
- assert_false(ckh_remove(&ckh, strs[i], kp, vp),
+ assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
"Unexpected ckh_remove() error");
ks = (i & 1) ? strs[i] : (const char *)NULL;
vs = (i & 2) ? strs[i] : (const char *)NULL;
- assert_ptr_eq((void *)ks, (void *)k.s,
- "Key mismatch, i=%zu", i);
- assert_ptr_eq((void *)vs, (void *)v.s,
- "Value mismatch, i=%zu", i);
+ assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+ i);
+ assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+ i);
assert_zu_eq(ckh_count(&ckh),
sizeof(strs)/sizeof(const char *) - i - 1,
"ckh_count() should return %zu, but it returned %zu",
- sizeof(strs)/sizeof(const char *) - i - 1,
+ sizeof(strs)/sizeof(const char *) - i - 1,
ckh_count(&ckh));
}
- ckh_delete(&ckh);
+ ckh_delete(tsd, &ckh);
}
TEST_END
TEST_BEGIN(test_insert_iter_remove)
{
#define NITEMS ZU(1000)
+ tsd_t *tsd;
ckh_t ckh;
void **p[NITEMS];
void *q, *r;
size_t i;
- assert_false(ckh_new(&ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp),
- "Unexpected ckh_new() error");
+ tsd = tsd_fetch();
+
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+ ckh_pointer_keycomp), "Unexpected ckh_new() error");
for (i = 0; i < NITEMS; i++) {
p[i] = mallocx(i+1, 0);
@@ -119,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
size_t j;
for (j = i; j < NITEMS; j++) {
- assert_false(ckh_insert(&ckh, p[j], p[j]),
+ assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
"Unexpected ckh_insert() failure");
assert_false(ckh_search(&ckh, p[j], &q, &r),
"Unexpected ckh_search() failure");
@@ -134,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
for (j = i + 1; j < NITEMS; j++) {
assert_false(ckh_search(&ckh, p[j], NULL, NULL),
"Unexpected ckh_search() failure");
- assert_false(ckh_remove(&ckh, p[j], &q, &r),
+ assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
"Unexpected ckh_remove() failure");
assert_ptr_eq(p[j], q, "Key pointer mismatch");
assert_ptr_eq(p[j], r, "Value pointer mismatch");
assert_true(ckh_search(&ckh, p[j], NULL, NULL),
"Unexpected ckh_search() success");
- assert_true(ckh_remove(&ckh, p[j], &q, &r),
+ assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
"Unexpected ckh_remove() success");
}
@@ -150,8 +159,7 @@ TEST_BEGIN(test_insert_iter_remove)
memset(seen, 0, sizeof(seen));
- for (tabind = 0; ckh_iter(&ckh, &tabind, &q, &r) ==
- false;) {
+ for (tabind = 0; !ckh_iter(&ckh, &tabind, &q, &r);) {
size_t k;
assert_ptr_eq(q, r, "Key and val not equal");
@@ -176,21 +184,21 @@ TEST_BEGIN(test_insert_iter_remove)
for (i = 0; i < NITEMS; i++) {
assert_false(ckh_search(&ckh, p[i], NULL, NULL),
"Unexpected ckh_search() failure");
- assert_false(ckh_remove(&ckh, p[i], &q, &r),
+ assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
"Unexpected ckh_remove() failure");
assert_ptr_eq(p[i], q, "Key pointer mismatch");
assert_ptr_eq(p[i], r, "Value pointer mismatch");
assert_true(ckh_search(&ckh, p[i], NULL, NULL),
"Unexpected ckh_search() success");
- assert_true(ckh_remove(&ckh, p[i], &q, &r),
+ assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
"Unexpected ckh_remove() success");
dallocx(p[i], 0);
}
assert_zu_eq(ckh_count(&ckh), 0,
- "ckh_count() should return %zu, but it returned %zu", ZU(0),
- ckh_count(&ckh));
- ckh_delete(&ckh);
+ "ckh_count() should return %zu, but it returned %zu",
+ ZU(0), ckh_count(&ckh));
+ ckh_delete(tsd, &ckh);
#undef NITEMS
}
TEST_END
diff --git a/deps/jemalloc/test/unit/hash.c b/deps/jemalloc/test/unit/hash.c
index abb394ac0..77a8cede9 100644
--- a/deps/jemalloc/test/unit/hash.c
+++ b/deps/jemalloc/test/unit/hash.c
@@ -64,8 +64,8 @@ hash_variant_verify(hash_variant_t variant)
{
const size_t hashbytes = hash_variant_bits(variant) / 8;
uint8_t key[256];
- uint8_t hashes[hashbytes * 256];
- uint8_t final[hashbytes];
+ VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
+ VARIABLE_ARRAY(uint8_t, final, hashbytes);
unsigned i;
uint32_t computed, expected;
diff --git a/deps/jemalloc/test/unit/junk.c b/deps/jemalloc/test/unit/junk.c
index 85bbf9e2b..b23dd1e95 100644
--- a/deps/jemalloc/test/unit/junk.c
+++ b/deps/jemalloc/test/unit/junk.c
@@ -1,14 +1,26 @@
#include "test/jemalloc_test.h"
#ifdef JEMALLOC_FILL
+# ifndef JEMALLOC_TEST_JUNK_OPT
+# define JEMALLOC_TEST_JUNK_OPT "junk:true"
+# endif
const char *malloc_conf =
- "abort:false,junk:true,zero:false,redzone:true,quarantine:0";
+ "abort:false,zero:false,redzone:true,quarantine:0," JEMALLOC_TEST_JUNK_OPT;
#endif
static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig;
static huge_dalloc_junk_t *huge_dalloc_junk_orig;
-static void *most_recently_junked;
+static void *watch_for_junking;
+static bool saw_junking;
+
+static void
+watch_junking(void *p)
+{
+
+ watch_for_junking = p;
+ saw_junking = false;
+}
static void
arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
@@ -21,7 +33,8 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
"Missing junk fill for byte %zu/%zu of deallocated region",
i, bin_info->reg_size);
}
- most_recently_junked = ptr;
+ if (ptr == watch_for_junking)
+ saw_junking = true;
}
static void
@@ -35,7 +48,8 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
"Missing junk fill for byte %zu/%zu of deallocated region",
i, usize);
}
- most_recently_junked = ptr;
+ if (ptr == watch_for_junking)
+ saw_junking = true;
}
static void
@@ -48,7 +62,8 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize)
* enough that it doesn't make sense to duplicate the decision logic in
* test code, so don't actually check that the region is junk-filled.
*/
- most_recently_junked = ptr;
+ if (ptr == watch_for_junking)
+ saw_junking = true;
}
static void
@@ -57,12 +72,14 @@ test_junk(size_t sz_min, size_t sz_max)
char *s;
size_t sz_prev, sz, i;
- arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
- arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
- arena_dalloc_junk_large_orig = arena_dalloc_junk_large;
- arena_dalloc_junk_large = arena_dalloc_junk_large_intercept;
- huge_dalloc_junk_orig = huge_dalloc_junk;
- huge_dalloc_junk = huge_dalloc_junk_intercept;
+ if (opt_junk_free) {
+ arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
+ arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
+ arena_dalloc_junk_large_orig = arena_dalloc_junk_large;
+ arena_dalloc_junk_large = arena_dalloc_junk_large_intercept;
+ huge_dalloc_junk_orig = huge_dalloc_junk;
+ huge_dalloc_junk = huge_dalloc_junk_intercept;
+ }
sz_prev = 0;
s = (char *)mallocx(sz_min, 0);
@@ -80,34 +97,35 @@ test_junk(size_t sz_min, size_t sz_max)
}
for (i = sz_prev; i < sz; i++) {
- assert_c_eq(s[i], 0xa5,
- "Newly allocated byte %zu/%zu isn't junk-filled",
- i, sz);
+ if (opt_junk_alloc) {
+ assert_c_eq(s[i], 0xa5,
+ "Newly allocated byte %zu/%zu isn't "
+ "junk-filled", i, sz);
+ }
s[i] = 'a';
}
if (xallocx(s, sz+1, 0, 0) == sz) {
- void *junked = (void *)s;
-
+ watch_junking(s);
s = (char *)rallocx(s, sz+1, 0);
assert_ptr_not_null((void *)s,
"Unexpected rallocx() failure");
- if (!config_mremap || sz+1 <= arena_maxclass) {
- assert_ptr_eq(most_recently_junked, junked,
- "Expected region of size %zu to be "
- "junk-filled",
- sz);
- }
+ assert_true(!opt_junk_free || saw_junking,
+ "Expected region of size %zu to be junk-filled",
+ sz);
}
}
+ watch_junking(s);
dallocx(s, 0);
- assert_ptr_eq(most_recently_junked, (void *)s,
+ assert_true(!opt_junk_free || saw_junking,
"Expected region of size %zu to be junk-filled", sz);
- arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
- arena_dalloc_junk_large = arena_dalloc_junk_large_orig;
- huge_dalloc_junk = huge_dalloc_junk_orig;
+ if (opt_junk_free) {
+ arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
+ arena_dalloc_junk_large = arena_dalloc_junk_large_orig;
+ huge_dalloc_junk = huge_dalloc_junk_orig;
+ }
}
TEST_BEGIN(test_junk_small)
@@ -122,7 +140,7 @@ TEST_BEGIN(test_junk_large)
{
test_skip_if(!config_fill);
- test_junk(SMALL_MAXCLASS+1, arena_maxclass);
+ test_junk(SMALL_MAXCLASS+1, large_maxclass);
}
TEST_END
@@ -130,20 +148,32 @@ TEST_BEGIN(test_junk_huge)
{
test_skip_if(!config_fill);
- test_junk(arena_maxclass+1, chunksize*2);
+ test_junk(large_maxclass+1, chunksize*2);
}
TEST_END
arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig;
static void *most_recently_trimmed;
+static size_t
+shrink_size(size_t size)
+{
+ size_t shrink_size;
+
+ for (shrink_size = size - 1; nallocx(shrink_size, 0) == size;
+ shrink_size--)
+ ; /* Do nothing. */
+
+ return (shrink_size);
+}
+
static void
arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize)
{
arena_ralloc_junk_large_orig(ptr, old_usize, usize);
- assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize");
- assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize");
+ assert_zu_eq(old_usize, large_maxclass, "Unexpected old_usize");
+ assert_zu_eq(usize, shrink_size(large_maxclass), "Unexpected usize");
most_recently_trimmed = ptr;
}
@@ -151,13 +181,13 @@ TEST_BEGIN(test_junk_large_ralloc_shrink)
{
void *p1, *p2;
- p1 = mallocx(arena_maxclass, 0);
+ p1 = mallocx(large_maxclass, 0);
assert_ptr_not_null(p1, "Unexpected mallocx() failure");
arena_ralloc_junk_large_orig = arena_ralloc_junk_large;
arena_ralloc_junk_large = arena_ralloc_junk_large_intercept;
- p2 = rallocx(p1, arena_maxclass-PAGE, 0);
+ p2 = rallocx(p1, shrink_size(large_maxclass), 0);
assert_ptr_eq(p1, p2, "Unexpected move during shrink");
arena_ralloc_junk_large = arena_ralloc_junk_large_orig;
@@ -183,6 +213,7 @@ TEST_BEGIN(test_junk_redzone)
arena_redzone_corruption_t *arena_redzone_corruption_orig;
test_skip_if(!config_fill);
+ test_skip_if(!opt_junk_alloc || !opt_junk_free);
arena_redzone_corruption_orig = arena_redzone_corruption;
arena_redzone_corruption = arena_redzone_corruption_replacement;
@@ -213,6 +244,7 @@ int
main(void)
{
+ assert(!config_fill || opt_junk_alloc || opt_junk_free);
return (test(
test_junk_small,
test_junk_large,
diff --git a/deps/jemalloc/test/unit/junk_alloc.c b/deps/jemalloc/test/unit/junk_alloc.c
new file mode 100644
index 000000000..8db3331d2
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_alloc.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#include "junk.c"
+#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/deps/jemalloc/test/unit/junk_free.c b/deps/jemalloc/test/unit/junk_free.c
new file mode 100644
index 000000000..482a61d07
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_free.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_TEST_JUNK_OPT "junk:free"
+#include "junk.c"
+#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/deps/jemalloc/test/unit/lg_chunk.c b/deps/jemalloc/test/unit/lg_chunk.c
new file mode 100644
index 000000000..7e5df3814
--- /dev/null
+++ b/deps/jemalloc/test/unit/lg_chunk.c
@@ -0,0 +1,26 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Make sure that opt.lg_chunk clamping is sufficient. In practice, this test
+ * program will fail a debug assertion during initialization and abort (rather
+ * than the test soft-failing) if clamping is insufficient.
+ */
+const char *malloc_conf = "lg_chunk:0";
+
+TEST_BEGIN(test_lg_chunk_clamp)
+{
+ void *p;
+
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ dallocx(p, 0);
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_lg_chunk_clamp));
+}
diff --git a/deps/jemalloc/test/unit/mallctl.c b/deps/jemalloc/test/unit/mallctl.c
index 31fb81057..31e354ca7 100644
--- a/deps/jemalloc/test/unit/mallctl.c
+++ b/deps/jemalloc/test/unit/mallctl.c
@@ -126,11 +126,10 @@ TEST_BEGIN(test_mallctl_config)
assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \
} while (0)
+ TEST_MALLCTL_CONFIG(cache_oblivious);
TEST_MALLCTL_CONFIG(debug);
- TEST_MALLCTL_CONFIG(dss);
TEST_MALLCTL_CONFIG(fill);
TEST_MALLCTL_CONFIG(lazy_lock);
- TEST_MALLCTL_CONFIG(mremap);
TEST_MALLCTL_CONFIG(munmap);
TEST_MALLCTL_CONFIG(prof);
TEST_MALLCTL_CONFIG(prof_libgcc);
@@ -166,12 +165,11 @@ TEST_BEGIN(test_mallctl_opt)
TEST_MALLCTL_OPT(size_t, narenas, always);
TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always);
TEST_MALLCTL_OPT(bool, stats_print, always);
- TEST_MALLCTL_OPT(bool, junk, fill);
+ TEST_MALLCTL_OPT(const char *, junk, fill);
TEST_MALLCTL_OPT(size_t, quarantine, fill);
TEST_MALLCTL_OPT(bool, redzone, fill);
TEST_MALLCTL_OPT(bool, zero, fill);
TEST_MALLCTL_OPT(bool, utrace, utrace);
- TEST_MALLCTL_OPT(bool, valgrind, valgrind);
TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
TEST_MALLCTL_OPT(bool, tcache, tcache);
TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
@@ -214,6 +212,126 @@ TEST_BEGIN(test_manpage_example)
}
TEST_END
+TEST_BEGIN(test_tcache_none)
+{
+ void *p0, *q, *p1;
+
+ test_skip_if(!config_tcache);
+
+ /* Allocate p and q. */
+ p0 = mallocx(42, 0);
+ assert_ptr_not_null(p0, "Unexpected mallocx() failure");
+ q = mallocx(42, 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+
+ /* Deallocate p and q, but bypass the tcache for q. */
+ dallocx(p0, 0);
+ dallocx(q, MALLOCX_TCACHE_NONE);
+
+ /* Make sure that tcache-based allocation returns p, not q. */
+ p1 = mallocx(42, 0);
+ assert_ptr_not_null(p1, "Unexpected mallocx() failure");
+ assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+
+ /* Clean up. */
+ dallocx(p1, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
+TEST_BEGIN(test_tcache)
+{
+#define NTCACHES 10
+ unsigned tis[NTCACHES];
+ void *ps[NTCACHES];
+ void *qs[NTCACHES];
+ unsigned i;
+ size_t sz, psz, qsz;
+
+ test_skip_if(!config_tcache);
+
+ psz = 42;
+ qsz = nallocx(psz, 0) + 1;
+
+ /* Create tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure, i=%u", i);
+ }
+
+ /* Exercise tcache ID recycling. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+ for (i = 0; i < NTCACHES; i++) {
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure, i=%u", i);
+ }
+
+ /* Flush empty tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+
+ /* Cache some allocations. */
+ for (i = 0; i < NTCACHES; i++) {
+ ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
+
+ qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
+ }
+
+ /* Verify that tcaches allocate cached regions. */
+ for (i = 0; i < NTCACHES; i++) {
+ void *p0 = ps[i];
+ ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ assert_ptr_eq(ps[i], p0,
+ "Expected mallocx() to allocate cached region, i=%u", i);
+ }
+
+ /* Verify that reallocation uses cached regions. */
+ for (i = 0; i < NTCACHES; i++) {
+ void *q0 = qs[i];
+ qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
+ i);
+ assert_ptr_eq(qs[i], q0,
+ "Expected rallocx() to allocate cached region, i=%u", i);
+ /* Avoid undefined behavior in case of test failure. */
+ if (qs[i] == NULL)
+ qs[i] = ps[i];
+ }
+ for (i = 0; i < NTCACHES; i++)
+ dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
+
+ /* Flush some non-empty tcaches. */
+ for (i = 0; i < NTCACHES/2; i++) {
+ assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+
+ /* Destroy tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+}
+TEST_END
+
TEST_BEGIN(test_thread_arena)
{
unsigned arena_old, arena_new, narenas;
@@ -231,6 +349,38 @@ TEST_BEGIN(test_thread_arena)
}
TEST_END
+TEST_BEGIN(test_arena_i_lg_dirty_mult)
+{
+ ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
+ size_t sz = sizeof(ssize_t);
+
+ assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+
+ lg_dirty_mult = -2;
+ assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
+ &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ lg_dirty_mult = (sizeof(size_t) << 3);
+ assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
+ &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
+ lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult
+ = lg_dirty_mult, lg_dirty_mult++) {
+ ssize_t old_lg_dirty_mult;
+
+ assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult,
+ &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
+ "Unexpected mallctl() failure");
+ assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
+ "Unexpected old arena.0.lg_dirty_mult");
+ }
+}
+TEST_END
+
TEST_BEGIN(test_arena_i_purge)
{
unsigned narenas;
@@ -255,27 +405,41 @@ TEST_BEGIN(test_arena_i_dss)
{
const char *dss_prec_old, *dss_prec_new;
size_t sz = sizeof(dss_prec_old);
+ size_t mib[3];
+ size_t miblen;
- dss_prec_new = "primary";
- assert_d_eq(mallctl("arena.0.dss", &dss_prec_old, &sz, &dss_prec_new,
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+
+ dss_prec_new = "disabled";
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
assert_str_ne(dss_prec_old, "primary",
"Unexpected default for dss precedence");
- assert_d_eq(mallctl("arena.0.dss", &dss_prec_new, &sz, &dss_prec_old,
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure");
-}
-TEST_END
-TEST_BEGIN(test_arenas_purge)
-{
- unsigned arena = 0;
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected value for dss precedence");
- assert_d_eq(mallctl("arenas.purge", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
+ mib[1] = narenas_total_get();
+ dss_prec_new = "disabled";
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
+ sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected default for dss precedence");
- assert_d_eq(mallctl("arenas.purge", NULL, NULL, NULL, 0), 0,
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
+ sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+
+ assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected value for dss precedence");
}
TEST_END
@@ -287,7 +451,7 @@ TEST_BEGIN(test_arenas_initialized)
assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
{
- bool initialized[narenas];
+ VARIABLE_ARRAY(bool, initialized, narenas);
sz = narenas * sizeof(bool);
assert_d_eq(mallctl("arenas.initialized", initialized, &sz,
@@ -296,6 +460,38 @@ TEST_BEGIN(test_arenas_initialized)
}
TEST_END
+TEST_BEGIN(test_arenas_lg_dirty_mult)
+{
+ ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
+ size_t sz = sizeof(ssize_t);
+
+ assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+
+ lg_dirty_mult = -2;
+ assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
+ &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ lg_dirty_mult = (sizeof(size_t) << 3);
+ assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
+ &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
+ lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult =
+ lg_dirty_mult, lg_dirty_mult++) {
+ ssize_t old_lg_dirty_mult;
+
+ assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult,
+ &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
+ "Unexpected mallctl() failure");
+ assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
+ "Unexpected old arenas.lg_dirty_mult");
+ }
+}
+TEST_END
+
TEST_BEGIN(test_arenas_constants)
{
@@ -310,7 +506,8 @@ TEST_BEGIN(test_arenas_constants)
TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
TEST_ARENAS_CONSTANT(size_t, page, PAGE);
TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS);
- TEST_ARENAS_CONSTANT(size_t, nlruns, nlclasses);
+ TEST_ARENAS_CONSTANT(unsigned, nlruns, nlclasses);
+ TEST_ARENAS_CONSTANT(unsigned, nhchunks, nhclasses);
#undef TEST_ARENAS_CONSTANT
}
@@ -346,12 +543,29 @@ TEST_BEGIN(test_arenas_lrun_constants)
assert_zu_eq(name, expected, "Incorrect "#name" size"); \
} while (0)
- TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE));
+ TEST_ARENAS_LRUN_CONSTANT(size_t, size, LARGE_MINCLASS);
#undef TEST_ARENAS_LRUN_CONSTANT
}
TEST_END
+TEST_BEGIN(test_arenas_hchunk_constants)
+{
+
+#define TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do { \
+ t name; \
+ size_t sz = sizeof(t); \
+ assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL, \
+ 0), 0, "Unexpected mallctl() failure"); \
+ assert_zu_eq(name, expected, "Incorrect "#name" size"); \
+} while (0)
+
+ TEST_ARENAS_HCHUNK_CONSTANT(size_t, size, chunksize);
+
+#undef TEST_ARENAS_HCHUNK_CONSTANT
+}
+TEST_END
+
TEST_BEGIN(test_arenas_extend)
{
unsigned narenas_before, arena, narenas_after;
@@ -402,14 +616,18 @@ main(void)
test_mallctl_config,
test_mallctl_opt,
test_manpage_example,
+ test_tcache_none,
+ test_tcache,
test_thread_arena,
+ test_arena_i_lg_dirty_mult,
test_arena_i_purge,
test_arena_i_dss,
- test_arenas_purge,
test_arenas_initialized,
+ test_arenas_lg_dirty_mult,
test_arenas_constants,
test_arenas_bin_constants,
test_arenas_lrun_constants,
+ test_arenas_hchunk_constants,
test_arenas_extend,
test_stats_arenas));
}
diff --git a/deps/jemalloc/test/unit/math.c b/deps/jemalloc/test/unit/math.c
index a1b288ea1..ebec77a62 100644
--- a/deps/jemalloc/test/unit/math.c
+++ b/deps/jemalloc/test/unit/math.c
@@ -3,6 +3,12 @@
#define MAX_REL_ERR 1.0e-9
#define MAX_ABS_ERR 1.0e-9
+#include <float.h>
+
+#ifndef INFINITY
+#define INFINITY (DBL_MAX + DBL_MAX)
+#endif
+
static bool
double_eq_rel(double a, double b, double max_rel_err, double max_abs_err)
{
diff --git a/deps/jemalloc/test/unit/mq.c b/deps/jemalloc/test/unit/mq.c
index f57e96af1..bde2a480b 100644
--- a/deps/jemalloc/test/unit/mq.c
+++ b/deps/jemalloc/test/unit/mq.c
@@ -54,7 +54,7 @@ thd_sender_start(void *arg)
mq_msg_t *msg;
void *p;
p = mallocx(sizeof(mq_msg_t), 0);
- assert_ptr_not_null(p, "Unexpected allocm() failure");
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
msg = (mq_msg_t *)p;
mq_put(mq, msg);
}
@@ -85,6 +85,7 @@ TEST_END
int
main(void)
{
+
return (test(
test_mq_basic,
test_mq_threaded));
diff --git a/deps/jemalloc/test/unit/prof_accum.c b/deps/jemalloc/test/unit/prof_accum.c
index 050a8a7ee..fd229e0fd 100644
--- a/deps/jemalloc/test/unit/prof_accum.c
+++ b/deps/jemalloc/test/unit/prof_accum.c
@@ -1,4 +1,9 @@
-#include "prof_accum.h"
+#include "test/jemalloc_test.h"
+
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD 50
+#define DUMP_INTERVAL 1
+#define BT_COUNT_CHECK_INTERVAL 5
#ifdef JEMALLOC_PROF
const char *malloc_conf =
@@ -20,7 +25,7 @@ static void *
alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration)
{
- return (alloc_0(thd_ind*NALLOCS_PER_THREAD + iteration));
+ return (btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration));
}
static void *
diff --git a/deps/jemalloc/test/unit/prof_accum.h b/deps/jemalloc/test/unit/prof_accum.h
deleted file mode 100644
index 109d86b59..000000000
--- a/deps/jemalloc/test/unit/prof_accum.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define NTHREADS 4
-#define NALLOCS_PER_THREAD 50
-#define DUMP_INTERVAL 1
-#define BT_COUNT_CHECK_INTERVAL 5
-
-#define alloc_n_proto(n) \
-void *alloc_##n(unsigned bits);
-alloc_n_proto(0)
-alloc_n_proto(1)
-
-#define alloc_n_gen(n) \
-void * \
-alloc_##n(unsigned bits) \
-{ \
- void *p; \
- \
- if (bits == 0) \
- p = mallocx(1, 0); \
- else { \
- switch (bits & 0x1U) { \
- case 0: \
- p = (alloc_0(bits >> 1)); \
- break; \
- case 1: \
- p = (alloc_1(bits >> 1)); \
- break; \
- default: not_reached(); \
- } \
- } \
- /* Intentionally sabotage tail call optimization. */ \
- assert_ptr_not_null(p, "Unexpected mallocx() failure"); \
- return (p); \
-}
diff --git a/deps/jemalloc/test/unit/prof_accum_a.c b/deps/jemalloc/test/unit/prof_accum_a.c
deleted file mode 100644
index 42ad521d8..000000000
--- a/deps/jemalloc/test/unit/prof_accum_a.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "prof_accum.h"
-
-alloc_n_gen(0)
diff --git a/deps/jemalloc/test/unit/prof_accum_b.c b/deps/jemalloc/test/unit/prof_accum_b.c
deleted file mode 100644
index 60d9dab6a..000000000
--- a/deps/jemalloc/test/unit/prof_accum_b.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "prof_accum.h"
-
-alloc_n_gen(1)
diff --git a/deps/jemalloc/test/unit/prof_active.c b/deps/jemalloc/test/unit/prof_active.c
new file mode 100644
index 000000000..814909572
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_active.c
@@ -0,0 +1,136 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf =
+ "prof:true,prof_thread_active_init:false,lg_prof_sample:0";
+#endif
+
+static void
+mallctl_bool_get(const char *name, bool expected, const char *func, int line)
+{
+ bool old;
+ size_t sz;
+
+ sz = sizeof(old);
+ assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0,
+ "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
+ assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
+ name);
+}
+
+static void
+mallctl_bool_set(const char *name, bool old_expected, bool val_new,
+ const char *func, int line)
+{
+ bool old;
+ size_t sz;
+
+ sz = sizeof(old);
+ assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0,
+ "%s():%d: Unexpected mallctl failure reading/writing %s", func,
+ line, name);
+ assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
+ line, name);
+}
+
+static void
+mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
+ int line)
+{
+
+ mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
+}
+#define mallctl_prof_active_get(a) \
+ mallctl_prof_active_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_prof_active_set_impl(bool prof_active_old_expected,
+ bool prof_active_new, const char *func, int line)
+{
+
+ mallctl_bool_set("prof.active", prof_active_old_expected,
+ prof_active_new, func, line);
+}
+#define mallctl_prof_active_set(a, b) \
+ mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
+
+static void
+mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
+ const char *func, int line)
+{
+
+ mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
+ func, line);
+}
+#define mallctl_thread_prof_active_get(a) \
+ mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
+ bool thread_prof_active_new, const char *func, int line)
+{
+
+ mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
+ thread_prof_active_new, func, line);
+}
+#define mallctl_thread_prof_active_set(a, b) \
+ mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
+
+static void
+prof_sampling_probe_impl(bool expect_sample, const char *func, int line)
+{
+ void *p;
+ size_t expected_backtraces = expect_sample ? 1 : 0;
+
+ assert_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
+ line);
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ assert_zu_eq(prof_bt_count(), expected_backtraces,
+ "%s():%d: Unexpected backtrace count", func, line);
+ dallocx(p, 0);
+}
+#define prof_sampling_probe(a) \
+ prof_sampling_probe_impl(a, __func__, __LINE__)
+
+TEST_BEGIN(test_prof_active)
+{
+
+ test_skip_if(!config_prof);
+
+ mallctl_prof_active_get(true);
+ mallctl_thread_prof_active_get(false);
+
+ mallctl_prof_active_set(true, true);
+ mallctl_thread_prof_active_set(false, false);
+ /* prof.active, !thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(true, false);
+ mallctl_thread_prof_active_set(false, false);
+ /* !prof.active, !thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(false, false);
+ mallctl_thread_prof_active_set(false, true);
+ /* !prof.active, thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(false, true);
+ mallctl_thread_prof_active_set(true, true);
+ /* prof.active, thread.prof.active. */
+ prof_sampling_probe(true);
+
+ /* Restore settings. */
+ mallctl_prof_active_set(true, true);
+ mallctl_thread_prof_active_set(true, false);
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_prof_active));
+}
diff --git a/deps/jemalloc/test/unit/prof_gdump.c b/deps/jemalloc/test/unit/prof_gdump.c
index a00b1054f..a0e6ee921 100644
--- a/deps/jemalloc/test/unit/prof_gdump.c
+++ b/deps/jemalloc/test/unit/prof_gdump.c
@@ -21,8 +21,9 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
TEST_BEGIN(test_gdump)
{
- bool active;
- void *p, *q;
+ bool active, gdump, gdump_old;
+ void *p, *q, *r, *s;
+ size_t sz;
test_skip_if(!config_prof);
@@ -42,8 +43,32 @@ TEST_BEGIN(test_gdump)
assert_ptr_not_null(q, "Unexpected mallocx() failure");
assert_true(did_prof_dump_open, "Expected a profile dump");
+ gdump = false;
+ sz = sizeof(gdump_old);
+ assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
+ sizeof(gdump)), 0,
+ "Unexpected mallctl failure while disabling prof.gdump");
+ assert(gdump_old);
+ did_prof_dump_open = false;
+ r = mallocx(chunksize, 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+ assert_false(did_prof_dump_open, "Unexpected profile dump");
+
+ gdump = true;
+ sz = sizeof(gdump_old);
+ assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
+ sizeof(gdump)), 0,
+ "Unexpected mallctl failure while enabling prof.gdump");
+ assert(!gdump_old);
+ did_prof_dump_open = false;
+ s = mallocx(chunksize, 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+ assert_true(did_prof_dump_open, "Expected a profile dump");
+
dallocx(p, 0);
dallocx(q, 0);
+ dallocx(r, 0);
+ dallocx(s, 0);
}
TEST_END
diff --git a/deps/jemalloc/test/unit/prof_reset.c b/deps/jemalloc/test/unit/prof_reset.c
new file mode 100644
index 000000000..69983e5e5
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_reset.c
@@ -0,0 +1,302 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf =
+ "prof:true,prof_active:false,lg_prof_sample:0";
+#endif
+
+static int
+prof_dump_open_intercept(bool propagate_err, const char *filename)
+{
+ int fd;
+
+ fd = open("/dev/null", O_WRONLY);
+ assert_d_ne(fd, -1, "Unexpected open() failure");
+
+ return (fd);
+}
+
+static void
+set_prof_active(bool active)
+{
+
+ assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
+ 0, "Unexpected mallctl failure");
+}
+
+static size_t
+get_lg_prof_sample(void)
+{
+ size_t lg_prof_sample;
+ size_t sz = sizeof(size_t);
+
+ assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
+ "Unexpected mallctl failure while reading profiling sample rate");
+ return (lg_prof_sample);
+}
+
+static void
+do_prof_reset(size_t lg_prof_sample)
+{
+ assert_d_eq(mallctl("prof.reset", NULL, NULL,
+ &lg_prof_sample, sizeof(size_t)), 0,
+ "Unexpected mallctl failure while resetting profile data");
+ assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+ "Expected profile sample rate change");
+}
+
+TEST_BEGIN(test_prof_reset_basic)
+{
+ size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
+ size_t sz;
+ unsigned i;
+
+ test_skip_if(!config_prof);
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz,
+ NULL, 0), 0,
+ "Unexpected mallctl failure while reading profiling sample rate");
+ assert_zu_eq(lg_prof_sample_orig, 0,
+ "Unexpected profiling sample rate");
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected disagreement between \"opt.lg_prof_sample\" and "
+ "\"prof.lg_sample\"");
+
+ /* Test simple resets. */
+ for (i = 0; i < 2; i++) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl failure while resetting profile data");
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected profile sample rate change");
+ }
+
+ /* Test resets with prof.lg_sample changes. */
+ lg_prof_sample_next = 1;
+ for (i = 0; i < 2; i++) {
+ do_prof_reset(lg_prof_sample_next);
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
+ "Expected profile sample rate change");
+ lg_prof_sample_next = lg_prof_sample_orig;
+ }
+
+ /* Make sure the test code restored prof.lg_sample. */
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected disagreement between \"opt.lg_prof_sample\" and "
+ "\"prof.lg_sample\"");
+}
+TEST_END
+
+bool prof_dump_header_intercepted = false;
+prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
+static bool
+prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
+{
+
+ prof_dump_header_intercepted = true;
+ memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
+
+ return (false);
+}
+
+TEST_BEGIN(test_prof_reset_cleanup)
+{
+ void *p;
+ prof_dump_header_t *prof_dump_header_orig;
+
+ test_skip_if(!config_prof);
+
+ set_prof_active(true);
+
+ assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+
+ prof_dump_header_orig = prof_dump_header;
+ prof_dump_header = prof_dump_header_intercept;
+ assert_false(prof_dump_header_intercepted, "Unexpected intercept");
+
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ assert_true(prof_dump_header_intercepted, "Expected intercept");
+ assert_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected error while resetting heap profile data");
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ assert_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+ assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+
+ prof_dump_header = prof_dump_header_orig;
+
+ dallocx(p, 0);
+ assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+
+ set_prof_active(false);
+}
+TEST_END
+
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD (1U << 13)
+#define OBJ_RING_BUF_COUNT 1531
+#define RESET_INTERVAL (1U << 10)
+#define DUMP_INTERVAL 3677
+static void *
+thd_start(void *varg)
+{
+ unsigned thd_ind = *(unsigned *)varg;
+ unsigned i;
+ void *objs[OBJ_RING_BUF_COUNT];
+
+ memset(objs, 0, sizeof(objs));
+
+ for (i = 0; i < NALLOCS_PER_THREAD; i++) {
+ if (i % RESET_INTERVAL == 0) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while resetting heap profile "
+ "data");
+ }
+
+ if (i % DUMP_INTERVAL == 0) {
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ }
+
+ {
+ void **pp = &objs[i % OBJ_RING_BUF_COUNT];
+ if (*pp != NULL) {
+ dallocx(*pp, 0);
+ *pp = NULL;
+ }
+ *pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
+ assert_ptr_not_null(*pp,
+ "Unexpected btalloc() failure");
+ }
+ }
+
+ /* Clean up any remaining objects. */
+ for (i = 0; i < OBJ_RING_BUF_COUNT; i++) {
+ void **pp = &objs[i % OBJ_RING_BUF_COUNT];
+ if (*pp != NULL) {
+ dallocx(*pp, 0);
+ *pp = NULL;
+ }
+ }
+
+ return (NULL);
+}
+
+TEST_BEGIN(test_prof_reset)
+{
+ size_t lg_prof_sample_orig;
+ thd_t thds[NTHREADS];
+ unsigned thd_args[NTHREADS];
+ unsigned i;
+ size_t bt_count, tdata_count;
+
+ test_skip_if(!config_prof);
+
+ bt_count = prof_bt_count();
+ assert_zu_eq(bt_count, 0,
+ "Unexpected pre-existing tdata structures");
+ tdata_count = prof_tdata_count();
+
+ lg_prof_sample_orig = get_lg_prof_sample();
+ do_prof_reset(5);
+
+ set_prof_active(true);
+
+ for (i = 0; i < NTHREADS; i++) {
+ thd_args[i] = i;
+ thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+ }
+ for (i = 0; i < NTHREADS; i++)
+ thd_join(thds[i], NULL);
+
+ assert_zu_eq(prof_bt_count(), bt_count,
+ "Unexpected bactrace count change");
+ assert_zu_eq(prof_tdata_count(), tdata_count,
+ "Unexpected remaining tdata structures");
+
+ set_prof_active(false);
+
+ do_prof_reset(lg_prof_sample_orig);
+}
+TEST_END
+#undef NTHREADS
+#undef NALLOCS_PER_THREAD
+#undef OBJ_RING_BUF_COUNT
+#undef RESET_INTERVAL
+#undef DUMP_INTERVAL
+
+/* Test sampling at the same allocation site across resets. */
+#define NITER 10
+TEST_BEGIN(test_xallocx)
+{
+ size_t lg_prof_sample_orig;
+ unsigned i;
+ void *ptrs[NITER];
+
+ test_skip_if(!config_prof);
+
+ lg_prof_sample_orig = get_lg_prof_sample();
+ set_prof_active(true);
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ for (i = 0; i < NITER; i++) {
+ void *p;
+ size_t sz, nsz;
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ /* Allocate small object (which will be promoted). */
+ p = ptrs[i] = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ /* Perform successful xallocx(). */
+ sz = sallocx(p, 0);
+ assert_zu_eq(xallocx(p, sz, 0, 0), sz,
+ "Unexpected xallocx() failure");
+
+ /* Perform unsuccessful xallocx(). */
+ nsz = nallocx(sz+1, 0);
+ assert_zu_eq(xallocx(p, nsz, 0, 0), sz,
+ "Unexpected xallocx() success");
+ }
+
+ for (i = 0; i < NITER; i++) {
+ /* dallocx. */
+ dallocx(ptrs[i], 0);
+ }
+
+ set_prof_active(false);
+ do_prof_reset(lg_prof_sample_orig);
+}
+TEST_END
+#undef NITER
+
+int
+main(void)
+{
+
+ /* Intercept dumping prior to running any tests. */
+ prof_dump_open = prof_dump_open_intercept;
+
+ return (test(
+ test_prof_reset_basic,
+ test_prof_reset_cleanup,
+ test_prof_reset,
+ test_xallocx));
+}
diff --git a/deps/jemalloc/test/unit/prof_thread_name.c b/deps/jemalloc/test/unit/prof_thread_name.c
new file mode 100644
index 000000000..f501158d7
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_thread_name.c
@@ -0,0 +1,129 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,prof_active:false";
+#endif
+
+static void
+mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
+ int line)
+{
+ const char *thread_name_old;
+ size_t sz;
+
+ sz = sizeof(thread_name_old);
+ assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0),
+ 0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+ func, line);
+ assert_str_eq(thread_name_old, thread_name_expected,
+ "%s():%d: Unexpected thread.prof.name value", func, line);
+}
+#define mallctl_thread_name_get(a) \
+ mallctl_thread_name_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_thread_name_set_impl(const char *thread_name, const char *func,
+ int line)
+{
+
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
+ sizeof(thread_name)), 0,
+ "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+ func, line);
+ mallctl_thread_name_get_impl(thread_name, func, line);
+}
+#define mallctl_thread_name_set(a) \
+ mallctl_thread_name_set_impl(a, __func__, __LINE__)
+
+TEST_BEGIN(test_prof_thread_name_validation)
+{
+ const char *thread_name;
+
+ test_skip_if(!config_prof);
+
+ mallctl_thread_name_get("");
+ mallctl_thread_name_set("hi there");
+
+ /* NULL input shouldn't be allowed. */
+ thread_name = NULL;
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
+ sizeof(thread_name)), EFAULT,
+ "Unexpected mallctl result writing \"%s\" to thread.prof.name",
+ thread_name);
+
+ /* '\n' shouldn't be allowed. */
+ thread_name = "hi\nthere";
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
+ sizeof(thread_name)), EFAULT,
+ "Unexpected mallctl result writing \"%s\" to thread.prof.name",
+ thread_name);
+
+ /* Simultaneous read/write shouldn't be allowed. */
+ {
+ const char *thread_name_old;
+ size_t sz;
+
+ sz = sizeof(thread_name_old);
+ assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz,
+ &thread_name, sizeof(thread_name)), EPERM,
+ "Unexpected mallctl result writing \"%s\" to "
+ "thread.prof.name", thread_name);
+ }
+
+ mallctl_thread_name_set("");
+}
+TEST_END
+
+#define NTHREADS 4
+#define NRESET 25
+static void *
+thd_start(void *varg)
+{
+ unsigned thd_ind = *(unsigned *)varg;
+ char thread_name[16] = "";
+ unsigned i;
+
+ malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind);
+
+ mallctl_thread_name_get("");
+ mallctl_thread_name_set(thread_name);
+
+ for (i = 0; i < NRESET; i++) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected error while resetting heap profile data");
+ mallctl_thread_name_get(thread_name);
+ }
+
+ mallctl_thread_name_set(thread_name);
+ mallctl_thread_name_set("");
+
+ return (NULL);
+}
+
+TEST_BEGIN(test_prof_thread_name_threaded)
+{
+ thd_t thds[NTHREADS];
+ unsigned thd_args[NTHREADS];
+ unsigned i;
+
+ test_skip_if(!config_prof);
+
+ for (i = 0; i < NTHREADS; i++) {
+ thd_args[i] = i;
+ thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+ }
+ for (i = 0; i < NTHREADS; i++)
+ thd_join(thds[i], NULL);
+}
+TEST_END
+#undef NTHREADS
+#undef NRESET
+
+int
+main(void)
+{
+
+ return (test(
+ test_prof_thread_name_validation,
+ test_prof_thread_name_threaded));
+}
diff --git a/deps/jemalloc/test/unit/rb.c b/deps/jemalloc/test/unit/rb.c
index b737485a7..b38eb0e33 100644
--- a/deps/jemalloc/test/unit/rb.c
+++ b/deps/jemalloc/test/unit/rb.c
@@ -5,7 +5,7 @@
for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \
rbp_bh_t != &(a_rbt)->rbt_nil; \
rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \
- if (rbtn_red_get(a_type, a_field, rbp_bh_t) == false) { \
+ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \
(r_height)++; \
} \
} \
@@ -49,6 +49,7 @@ TEST_BEGIN(test_rb_empty)
tree_new(&tree);
+ assert_true(tree_empty(&tree), "Tree should be empty");
assert_ptr_null(tree_first(&tree), "Unexpected node");
assert_ptr_null(tree_last(&tree), "Unexpected node");
@@ -74,7 +75,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth,
node_t *left_node = rbtn_left_get(node_t, link, node);
node_t *right_node = rbtn_right_get(node_t, link, node);
- if (rbtn_red_get(node_t, link, node) == false)
+ if (!rbtn_red_get(node_t, link, node))
black_depth++;
/* Red nodes must be interleaved with black nodes. */
@@ -265,6 +266,8 @@ TEST_BEGIN(test_rb_random)
assert_u_eq(tree_iterate_reverse(&tree), k+1,
"Unexpected node iteration count");
+ assert_false(tree_empty(&tree),
+ "Tree should not be empty");
assert_ptr_not_null(tree_first(&tree),
"Tree should not be empty");
assert_ptr_not_null(tree_last(&tree),
diff --git a/deps/jemalloc/test/unit/rtree.c b/deps/jemalloc/test/unit/rtree.c
index 5463055fe..b54b3e86f 100644
--- a/deps/jemalloc/test/unit/rtree.c
+++ b/deps/jemalloc/test/unit/rtree.c
@@ -1,14 +1,30 @@
#include "test/jemalloc_test.h"
+static rtree_node_elm_t *
+node_alloc(size_t nelms)
+{
+
+ return ((rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t)));
+}
+
+static void
+node_dalloc(rtree_node_elm_t *node)
+{
+
+ free(node);
+}
+
TEST_BEGIN(test_rtree_get_empty)
{
unsigned i;
for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
- assert_u_eq(rtree_get(rtree, 0), 0,
+ rtree_t rtree;
+ assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+ "Unexpected rtree_new() failure");
+ assert_ptr_null(rtree_get(&rtree, 0, false),
"rtree_get() should return NULL for empty tree");
- rtree_delete(rtree);
+ rtree_delete(&rtree);
}
}
TEST_END
@@ -16,19 +32,24 @@ TEST_END
TEST_BEGIN(test_rtree_extrema)
{
unsigned i;
+ extent_node_t node_a, node_b;
for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
+ rtree_t rtree;
+ assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+ "Unexpected rtree_new() failure");
- rtree_set(rtree, 0, 1);
- assert_u_eq(rtree_get(rtree, 0), 1,
+ assert_false(rtree_set(&rtree, 0, &node_a),
+ "Unexpected rtree_set() failure");
+ assert_ptr_eq(rtree_get(&rtree, 0, true), &node_a,
"rtree_get() should return previously set value");
- rtree_set(rtree, ~((uintptr_t)0), 1);
- assert_u_eq(rtree_get(rtree, ~((uintptr_t)0)), 1,
+ assert_false(rtree_set(&rtree, ~((uintptr_t)0), &node_b),
+ "Unexpected rtree_set() failure");
+ assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0), true), &node_b,
"rtree_get() should return previously set value");
- rtree_delete(rtree);
+ rtree_delete(&rtree);
}
}
TEST_END
@@ -40,26 +61,32 @@ TEST_BEGIN(test_rtree_bits)
for (i = 1; i < (sizeof(uintptr_t) << 3); i++) {
uintptr_t keys[] = {0, 1,
(((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
+ extent_node_t node;
+ rtree_t rtree;
+
+ assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+ "Unexpected rtree_new() failure");
for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
- rtree_set(rtree, keys[j], 1);
+ assert_false(rtree_set(&rtree, keys[j], &node),
+ "Unexpected rtree_set() failure");
for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
- assert_u_eq(rtree_get(rtree, keys[k]), 1,
- "rtree_get() should return previously set "
- "value and ignore insignificant key bits; "
- "i=%u, j=%u, k=%u, set key=%#"PRIxPTR", "
- "get key=%#"PRIxPTR, i, j, k, keys[j],
- keys[k]);
+ assert_ptr_eq(rtree_get(&rtree, keys[k], true),
+ &node, "rtree_get() should return "
+ "previously set value and ignore "
+ "insignificant key bits; i=%u, j=%u, k=%u, "
+ "set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
+ j, k, keys[j], keys[k]);
}
- assert_u_eq(rtree_get(rtree,
- (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0,
+ assert_ptr_null(rtree_get(&rtree,
+ (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
"Only leftmost rtree leaf should be set; "
"i=%u, j=%u", i, j);
- rtree_set(rtree, keys[j], 0);
+ assert_false(rtree_set(&rtree, keys[j], NULL),
+ "Unexpected rtree_set() failure");
}
- rtree_delete(rtree);
+ rtree_delete(&rtree);
}
}
TEST_END
@@ -68,37 +95,43 @@ TEST_BEGIN(test_rtree_random)
{
unsigned i;
sfmt_t *sfmt;
-#define NSET 100
+#define NSET 16
#define SEED 42
sfmt = init_gen_rand(SEED);
for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
uintptr_t keys[NSET];
+ extent_node_t node;
unsigned j;
+ rtree_t rtree;
+
+ assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+ "Unexpected rtree_new() failure");
for (j = 0; j < NSET; j++) {
keys[j] = (uintptr_t)gen_rand64(sfmt);
- rtree_set(rtree, keys[j], 1);
- assert_u_eq(rtree_get(rtree, keys[j]), 1,
+ assert_false(rtree_set(&rtree, keys[j], &node),
+ "Unexpected rtree_set() failure");
+ assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
"rtree_get() should return previously set value");
}
for (j = 0; j < NSET; j++) {
- assert_u_eq(rtree_get(rtree, keys[j]), 1,
+ assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
"rtree_get() should return previously set value");
}
for (j = 0; j < NSET; j++) {
- rtree_set(rtree, keys[j], 0);
- assert_u_eq(rtree_get(rtree, keys[j]), 0,
+ assert_false(rtree_set(&rtree, keys[j], NULL),
+ "Unexpected rtree_set() failure");
+ assert_ptr_null(rtree_get(&rtree, keys[j], true),
"rtree_get() should return previously set value");
}
for (j = 0; j < NSET; j++) {
- assert_u_eq(rtree_get(rtree, keys[j]), 0,
+ assert_ptr_null(rtree_get(&rtree, keys[j], true),
"rtree_get() should return previously set value");
}
- rtree_delete(rtree);
+ rtree_delete(&rtree);
}
fini_gen_rand(sfmt);
#undef NSET
diff --git a/deps/jemalloc/test/unit/size_classes.c b/deps/jemalloc/test/unit/size_classes.c
new file mode 100644
index 000000000..d3aaebd77
--- /dev/null
+++ b/deps/jemalloc/test/unit/size_classes.c
@@ -0,0 +1,89 @@
+#include "test/jemalloc_test.h"
+
+static size_t
+get_max_size_class(void)
+{
+ unsigned nhchunks;
+ size_t mib[4];
+ size_t sz, miblen, max_size_class;
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
+ "Unexpected mallctl() error");
+
+ miblen = sizeof(mib) / sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+ mib[2] = nhchunks - 1;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
+ "Unexpected mallctlbymib() error");
+
+ return (max_size_class);
+}
+
+TEST_BEGIN(test_size_classes)
+{
+ size_t size_class, max_size_class;
+ szind_t index, max_index;
+
+ max_size_class = get_max_size_class();
+ max_index = size2index(max_size_class);
+
+ for (index = 0, size_class = index2size(index); index < max_index ||
+ size_class < max_size_class; index++, size_class =
+ index2size(index)) {
+ assert_true(index < max_index,
+ "Loop conditionals should be equivalent; index=%u, "
+ "size_class=%zu (%#zx)", index, size_class, size_class);
+ assert_true(size_class < max_size_class,
+ "Loop conditionals should be equivalent; index=%u, "
+ "size_class=%zu (%#zx)", index, size_class, size_class);
+
+ assert_u_eq(index, size2index(size_class),
+ "size2index() does not reverse index2size(): index=%u -->"
+ " size_class=%zu --> index=%u --> size_class=%zu", index,
+ size_class, size2index(size_class),
+ index2size(size2index(size_class)));
+ assert_zu_eq(size_class, index2size(size2index(size_class)),
+ "index2size() does not reverse size2index(): index=%u -->"
+ " size_class=%zu --> index=%u --> size_class=%zu", index,
+ size_class, size2index(size_class),
+ index2size(size2index(size_class)));
+
+ assert_u_eq(index+1, size2index(size_class+1),
+ "Next size_class does not round up properly");
+
+ assert_zu_eq(size_class, (index > 0) ?
+ s2u(index2size(index-1)+1) : s2u(1),
+ "s2u() does not round up to size class");
+ assert_zu_eq(size_class, s2u(size_class-1),
+ "s2u() does not round up to size class");
+ assert_zu_eq(size_class, s2u(size_class),
+ "s2u() does not compute same size class");
+ assert_zu_eq(s2u(size_class+1), index2size(index+1),
+ "s2u() does not round up to next size class");
+ }
+
+ assert_u_eq(index, size2index(index2size(index)),
+ "size2index() does not reverse index2size()");
+ assert_zu_eq(max_size_class, index2size(size2index(max_size_class)),
+ "index2size() does not reverse size2index()");
+
+ assert_zu_eq(size_class, s2u(index2size(index-1)+1),
+ "s2u() does not round up to size class");
+ assert_zu_eq(size_class, s2u(size_class-1),
+ "s2u() does not round up to size class");
+ assert_zu_eq(size_class, s2u(size_class),
+ "s2u() does not compute same size class");
+}
+TEST_END
+
+int
+main(void)
+{
+
+ return (test(
+ test_size_classes));
+}
diff --git a/deps/jemalloc/test/unit/stats.c b/deps/jemalloc/test/unit/stats.c
index 03a55c7fd..8e4bc631e 100644
--- a/deps/jemalloc/test/unit/stats.c
+++ b/deps/jemalloc/test/unit/stats.c
@@ -3,7 +3,7 @@
TEST_BEGIN(test_stats_summary)
{
size_t *cactive;
- size_t sz, allocated, active, mapped;
+ size_t sz, allocated, active, resident, mapped;
int expected = config_stats ? 0 : ENOENT;
sz = sizeof(cactive);
@@ -15,6 +15,8 @@ TEST_BEGIN(test_stats_summary)
expected, "Unexpected mallctl() result");
assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected,
"Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.resident", &resident, &sz, NULL, 0),
+ expected, "Unexpected mallctl() result");
assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected,
"Unexpected mallctl() result");
@@ -23,34 +25,10 @@ TEST_BEGIN(test_stats_summary)
"active should be no larger than cactive");
assert_zu_le(allocated, active,
"allocated should be no larger than active");
- assert_zu_le(active, mapped,
- "active should be no larger than mapped");
- }
-}
-TEST_END
-
-TEST_BEGIN(test_stats_chunks)
-{
- size_t current, high;
- uint64_t total;
- size_t sz;
- int expected = config_stats ? 0 : ENOENT;
-
- sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.chunks.current", &current, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
- sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
- sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected,
- "Unexpected mallctl() result");
-
- if (config_stats) {
- assert_zu_le(current, high,
- "current should be no larger than high");
- assert_u64_le((uint64_t)high, total,
- "high should be no larger than total");
+ assert_zu_lt(active, resident,
+ "active should be less than resident");
+ assert_zu_lt(active, mapped,
+ "active should be less than mapped");
}
}
TEST_END
@@ -60,30 +38,34 @@ TEST_BEGIN(test_stats_huge)
void *p;
uint64_t epoch;
size_t allocated;
- uint64_t nmalloc, ndalloc;
+ uint64_t nmalloc, ndalloc, nrequests;
size_t sz;
int expected = config_stats ? 0 : ENOENT;
- p = mallocx(arena_maxclass+1, 0);
+ p = mallocx(large_maxclass+1, 0);
assert_ptr_not_null(p, "Unexpected mallocx() failure");
assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
"Unexpected mallctl() failure");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.huge.allocated", &allocated, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.huge.nmalloc", &nmalloc, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.huge.ndalloc", &ndalloc, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL,
+ 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL,
+ 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
if (config_stats) {
assert_zu_gt(allocated, 0,
"allocated should be greater than zero");
assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
+ assert_u64_le(nmalloc, nrequests,
+ "nmalloc should no larger than nrequests");
}
dallocx(p, 0);
@@ -93,7 +75,7 @@ TEST_END
TEST_BEGIN(test_stats_arenas_summary)
{
unsigned arena;
- void *small, *large;
+ void *little, *large, *huge;
uint64_t epoch;
size_t sz;
int expected = config_stats ? 0 : ENOENT;
@@ -104,10 +86,12 @@ TEST_BEGIN(test_stats_arenas_summary)
assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
0, "Unexpected mallctl() failure");
- small = mallocx(SMALL_MAXCLASS, 0);
- assert_ptr_not_null(small, "Unexpected mallocx() failure");
- large = mallocx(arena_maxclass, 0);
+ little = mallocx(SMALL_MAXCLASS, 0);
+ assert_ptr_not_null(little, "Unexpected mallocx() failure");
+ large = mallocx(large_maxclass, 0);
assert_ptr_not_null(large, "Unexpected mallocx() failure");
+ huge = mallocx(chunksize, 0);
+ assert_ptr_not_null(huge, "Unexpected mallocx() failure");
assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
"Unexpected mallctl() failure");
@@ -133,8 +117,9 @@ TEST_BEGIN(test_stats_arenas_summary)
"nmadvise should be no greater than purged");
}
- dallocx(small, 0);
+ dallocx(little, 0);
dallocx(large, 0);
+ dallocx(huge, 0);
}
TEST_END
@@ -215,7 +200,7 @@ TEST_BEGIN(test_stats_arenas_large)
assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
0, "Unexpected mallctl() failure");
- p = mallocx(arena_maxclass, 0);
+ p = mallocx(large_maxclass, 0);
assert_ptr_not_null(p, "Unexpected mallocx() failure");
assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -247,11 +232,51 @@ TEST_BEGIN(test_stats_arenas_large)
}
TEST_END
+TEST_BEGIN(test_stats_arenas_huge)
+{
+ unsigned arena;
+ void *p;
+ size_t sz, allocated;
+ uint64_t epoch, nmalloc, ndalloc;
+ int expected = config_stats ? 0 : ENOENT;
+
+ arena = 0;
+ assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
+ 0, "Unexpected mallctl() failure");
+
+ p = mallocx(chunksize, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+ assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
+ "Unexpected mallctl() failure");
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+ sz = sizeof(uint64_t);
+ assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+
+ if (config_stats) {
+ assert_zu_gt(allocated, 0,
+ "allocated should be greater than zero");
+ assert_zu_gt(nmalloc, 0,
+ "nmalloc should be greater than zero");
+ assert_zu_ge(nmalloc, ndalloc,
+ "nmalloc should be at least as large as ndalloc");
+ }
+
+ dallocx(p, 0);
+}
+TEST_END
+
TEST_BEGIN(test_stats_arenas_bins)
{
unsigned arena;
void *p;
- size_t sz, allocated, curruns;
+ size_t sz, curruns, curregs;
uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
uint64_t nruns, nreruns;
int expected = config_stats ? 0 : ENOENT;
@@ -269,9 +294,6 @@ TEST_BEGIN(test_stats_arenas_bins)
assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
"Unexpected mallctl() failure");
- sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.bins.0.allocated", &allocated, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
sz = sizeof(uint64_t);
assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz,
NULL, 0), expected, "Unexpected mallctl() result");
@@ -279,7 +301,11 @@ TEST_BEGIN(test_stats_arenas_bins)
NULL, 0), expected, "Unexpected mallctl() result");
assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz,
NULL, 0), expected, "Unexpected mallctl() result");
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+ sz = sizeof(uint64_t);
assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz,
NULL, 0), config_tcache ? expected : ENOENT,
"Unexpected mallctl() result");
@@ -296,14 +322,14 @@ TEST_BEGIN(test_stats_arenas_bins)
NULL, 0), expected, "Unexpected mallctl() result");
if (config_stats) {
- assert_zu_gt(allocated, 0,
- "allocated should be greater than zero");
assert_u64_gt(nmalloc, 0,
"nmalloc should be greater than zero");
assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
assert_u64_gt(nrequests, 0,
"nrequests should be greater than zero");
+ assert_zu_gt(curregs, 0,
+ "allocated should be greater than zero");
if (config_tcache) {
assert_u64_gt(nfills, 0,
"At least one fill should have occurred");
@@ -332,7 +358,7 @@ TEST_BEGIN(test_stats_arenas_lruns)
assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
0, "Unexpected mallctl() failure");
- p = mallocx(SMALL_MAXCLASS+1, 0);
+ p = mallocx(LARGE_MINCLASS, 0);
assert_ptr_not_null(p, "Unexpected mallocx() failure");
assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -364,17 +390,58 @@ TEST_BEGIN(test_stats_arenas_lruns)
}
TEST_END
+TEST_BEGIN(test_stats_arenas_hchunks)
+{
+ unsigned arena;
+ void *p;
+ uint64_t epoch, nmalloc, ndalloc;
+ size_t curhchunks, sz;
+ int expected = config_stats ? 0 : ENOENT;
+
+ arena = 0;
+ assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
+ 0, "Unexpected mallctl() failure");
+
+ p = mallocx(chunksize, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+ assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
+ "Unexpected mallctl() failure");
+
+ sz = sizeof(uint64_t);
+ assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz,
+ NULL, 0), expected, "Unexpected mallctl() result");
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+
+ if (config_stats) {
+ assert_u64_gt(nmalloc, 0,
+ "nmalloc should be greater than zero");
+ assert_u64_ge(nmalloc, ndalloc,
+ "nmalloc should be at least as large as ndalloc");
+ assert_u64_gt(curhchunks, 0,
+ "At least one chunk should be currently allocated");
+ }
+
+ dallocx(p, 0);
+}
+TEST_END
+
int
main(void)
{
return (test(
test_stats_summary,
- test_stats_chunks,
test_stats_huge,
test_stats_arenas_summary,
test_stats_arenas_small,
test_stats_arenas_large,
+ test_stats_arenas_huge,
test_stats_arenas_bins,
- test_stats_arenas_lruns));
+ test_stats_arenas_lruns,
+ test_stats_arenas_hchunks));
}
diff --git a/deps/jemalloc/test/unit/tsd.c b/deps/jemalloc/test/unit/tsd.c
index f421c1a3c..8be787fda 100644
--- a/deps/jemalloc/test/unit/tsd.c
+++ b/deps/jemalloc/test/unit/tsd.c
@@ -6,29 +6,64 @@ typedef unsigned int data_t;
static bool data_cleanup_executed;
+malloc_tsd_types(data_, data_t)
+malloc_tsd_protos(, data_, data_t)
+
void
data_cleanup(void *arg)
{
data_t *data = (data_t *)arg;
- assert_x_eq(*data, THREAD_DATA,
- "Argument passed into cleanup function should match tsd value");
+ if (!data_cleanup_executed) {
+ assert_x_eq(*data, THREAD_DATA,
+ "Argument passed into cleanup function should match tsd "
+ "value");
+ }
data_cleanup_executed = true;
+
+ /*
+ * Allocate during cleanup for two rounds, in order to assure that
+ * jemalloc's internal tsd reinitialization happens.
+ */
+ switch (*data) {
+ case THREAD_DATA:
+ *data = 1;
+ data_tsd_set(data);
+ break;
+ case 1:
+ *data = 2;
+ data_tsd_set(data);
+ break;
+ case 2:
+ return;
+ default:
+ not_reached();
+ }
+
+ {
+ void *p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpeced mallocx() failure");
+ dallocx(p, 0);
+ }
}
-malloc_tsd_protos(, data, data_t)
-malloc_tsd_externs(data, data_t)
+malloc_tsd_externs(data_, data_t)
#define DATA_INIT 0x12345678
-malloc_tsd_data(, data, data_t, DATA_INIT)
-malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup)
+malloc_tsd_data(, data_, data_t, DATA_INIT)
+malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup)
static void *
thd_start(void *arg)
{
data_t d = (data_t)(uintptr_t)arg;
+ void *p;
+
assert_x_eq(*data_tsd_get(), DATA_INIT,
"Initial tsd get should return initialization value");
+ p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+
data_tsd_set(&d);
assert_x_eq(*data_tsd_get(), d,
"After tsd set, tsd get should return value that was set");
@@ -37,6 +72,7 @@ thd_start(void *arg)
assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
"Resetting local data should have no effect on tsd");
+ free(p);
return (NULL);
}
diff --git a/deps/jemalloc/test/unit/util.c b/deps/jemalloc/test/unit/util.c
index dc3cfe8a9..8ab39a458 100644
--- a/deps/jemalloc/test/unit/util.c
+++ b/deps/jemalloc/test/unit/util.c
@@ -52,8 +52,8 @@ TEST_BEGIN(test_malloc_strtoumax)
const char *expected_errno_name;
uintmax_t expected_x;
};
-#define ERR(e) e, #e
-#define UMAX(x) ((uintmax_t)x##ULL)
+#define ERR(e) e, #e
+#define KUMAX(x) ((uintmax_t)x##ULL)
struct test_s tests[] = {
{"0", "0", -1, ERR(EINVAL), UINTMAX_MAX},
{"0", "0", 1, ERR(EINVAL), UINTMAX_MAX},
@@ -64,51 +64,51 @@ TEST_BEGIN(test_malloc_strtoumax)
{"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX},
{"-", "-", 0, ERR(EINVAL), UINTMAX_MAX},
- {"42", "", 0, ERR(0), UMAX(42)},
- {"+42", "", 0, ERR(0), UMAX(42)},
- {"-42", "", 0, ERR(0), UMAX(-42)},
- {"042", "", 0, ERR(0), UMAX(042)},
- {"+042", "", 0, ERR(0), UMAX(042)},
- {"-042", "", 0, ERR(0), UMAX(-042)},
- {"0x42", "", 0, ERR(0), UMAX(0x42)},
- {"+0x42", "", 0, ERR(0), UMAX(0x42)},
- {"-0x42", "", 0, ERR(0), UMAX(-0x42)},
-
- {"0", "", 0, ERR(0), UMAX(0)},
- {"1", "", 0, ERR(0), UMAX(1)},
-
- {"42", "", 0, ERR(0), UMAX(42)},
- {" 42", "", 0, ERR(0), UMAX(42)},
- {"42 ", " ", 0, ERR(0), UMAX(42)},
- {"0x", "x", 0, ERR(0), UMAX(0)},
- {"42x", "x", 0, ERR(0), UMAX(42)},
-
- {"07", "", 0, ERR(0), UMAX(7)},
- {"010", "", 0, ERR(0), UMAX(8)},
- {"08", "8", 0, ERR(0), UMAX(0)},
- {"0_", "_", 0, ERR(0), UMAX(0)},
-
- {"0x", "x", 0, ERR(0), UMAX(0)},
- {"0X", "X", 0, ERR(0), UMAX(0)},
- {"0xg", "xg", 0, ERR(0), UMAX(0)},
- {"0XA", "", 0, ERR(0), UMAX(10)},
-
- {"010", "", 10, ERR(0), UMAX(10)},
- {"0x3", "x3", 10, ERR(0), UMAX(0)},
-
- {"12", "2", 2, ERR(0), UMAX(1)},
- {"78", "8", 8, ERR(0), UMAX(7)},
- {"9a", "a", 10, ERR(0), UMAX(9)},
- {"9A", "A", 10, ERR(0), UMAX(9)},
- {"fg", "g", 16, ERR(0), UMAX(15)},
- {"FG", "G", 16, ERR(0), UMAX(15)},
- {"0xfg", "g", 16, ERR(0), UMAX(15)},
- {"0XFG", "G", 16, ERR(0), UMAX(15)},
- {"z_", "_", 36, ERR(0), UMAX(35)},
- {"Z_", "_", 36, ERR(0), UMAX(35)}
+ {"42", "", 0, ERR(0), KUMAX(42)},
+ {"+42", "", 0, ERR(0), KUMAX(42)},
+ {"-42", "", 0, ERR(0), KUMAX(-42)},
+ {"042", "", 0, ERR(0), KUMAX(042)},
+ {"+042", "", 0, ERR(0), KUMAX(042)},
+ {"-042", "", 0, ERR(0), KUMAX(-042)},
+ {"0x42", "", 0, ERR(0), KUMAX(0x42)},
+ {"+0x42", "", 0, ERR(0), KUMAX(0x42)},
+ {"-0x42", "", 0, ERR(0), KUMAX(-0x42)},
+
+ {"0", "", 0, ERR(0), KUMAX(0)},
+ {"1", "", 0, ERR(0), KUMAX(1)},
+
+ {"42", "", 0, ERR(0), KUMAX(42)},
+ {" 42", "", 0, ERR(0), KUMAX(42)},
+ {"42 ", " ", 0, ERR(0), KUMAX(42)},
+ {"0x", "x", 0, ERR(0), KUMAX(0)},
+ {"42x", "x", 0, ERR(0), KUMAX(42)},
+
+ {"07", "", 0, ERR(0), KUMAX(7)},
+ {"010", "", 0, ERR(0), KUMAX(8)},
+ {"08", "8", 0, ERR(0), KUMAX(0)},
+ {"0_", "_", 0, ERR(0), KUMAX(0)},
+
+ {"0x", "x", 0, ERR(0), KUMAX(0)},
+ {"0X", "X", 0, ERR(0), KUMAX(0)},
+ {"0xg", "xg", 0, ERR(0), KUMAX(0)},
+ {"0XA", "", 0, ERR(0), KUMAX(10)},
+
+ {"010", "", 10, ERR(0), KUMAX(10)},
+ {"0x3", "x3", 10, ERR(0), KUMAX(0)},
+
+ {"12", "2", 2, ERR(0), KUMAX(1)},
+ {"78", "8", 8, ERR(0), KUMAX(7)},
+ {"9a", "a", 10, ERR(0), KUMAX(9)},
+ {"9A", "A", 10, ERR(0), KUMAX(9)},
+ {"fg", "g", 16, ERR(0), KUMAX(15)},
+ {"FG", "G", 16, ERR(0), KUMAX(15)},
+ {"0xfg", "g", 16, ERR(0), KUMAX(15)},
+ {"0XFG", "G", 16, ERR(0), KUMAX(15)},
+ {"z_", "_", 36, ERR(0), KUMAX(35)},
+ {"Z_", "_", 36, ERR(0), KUMAX(35)}
};
#undef ERR
-#undef UMAX
+#undef KUMAX
unsigned i;
for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {
@@ -141,8 +141,8 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
char buf[BUFLEN];
int result;
size_t len;
-#define TEST(expected_str_untruncated, fmt...) do { \
- result = malloc_snprintf(buf, len, fmt); \
+#define TEST(expected_str_untruncated, ...) do { \
+ result = malloc_snprintf(buf, len, __VA_ARGS__); \
assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \
"Unexpected string inequality (\"%s\" vs \"%s\")", \
buf, expected_str_untruncated); \
@@ -173,8 +173,8 @@ TEST_BEGIN(test_malloc_snprintf)
#define BUFLEN 128
char buf[BUFLEN];
int result;
-#define TEST(expected_str, fmt...) do { \
- result = malloc_snprintf(buf, sizeof(buf), fmt); \
+#define TEST(expected_str, ...) do { \
+ result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__); \
assert_str_eq(buf, expected_str, "Unexpected output"); \
assert_d_eq(result, strlen(expected_str), "Unexpected result"); \
} while (0)
diff --git a/deps/jemalloc/test/unit/zero.c b/deps/jemalloc/test/unit/zero.c
index 65a8f0c9c..93afc2b87 100644
--- a/deps/jemalloc/test/unit/zero.c
+++ b/deps/jemalloc/test/unit/zero.c
@@ -55,7 +55,7 @@ TEST_BEGIN(test_zero_large)
{
test_skip_if(!config_fill);
- test_zero(SMALL_MAXCLASS+1, arena_maxclass);
+ test_zero(SMALL_MAXCLASS+1, large_maxclass);
}
TEST_END
@@ -63,7 +63,7 @@ TEST_BEGIN(test_zero_huge)
{
test_skip_if(!config_fill);
- test_zero(arena_maxclass+1, chunksize*2);
+ test_zero(large_maxclass+1, chunksize*2);
}
TEST_END
diff --git a/deps/linenoise/.gitignore b/deps/linenoise/.gitignore
index 28f258a30..7ab7825f5 100644
--- a/deps/linenoise/.gitignore
+++ b/deps/linenoise/.gitignore
@@ -1 +1,3 @@
-linenoise_example*
+linenoise_example
+*.dSYM
+history.txt
diff --git a/deps/linenoise/README.markdown b/deps/linenoise/README.markdown
index 9612da47f..e01642cf8 100644
--- a/deps/linenoise/README.markdown
+++ b/deps/linenoise/README.markdown
@@ -1,8 +1,14 @@
# Linenoise
-A minimal, zero-config, BSD licensed, readline replacement.
+A minimal, zero-config, BSD licensed, readline replacement used in Redis,
+MongoDB, and Android.
-News: linenoise is now part of [Android](http://android.git.kernel.org/?p=platform/system/core.git;a=tree;f=liblinenoise;h=56450eaed7f783760e5e6a5993ef75cde2e29dea;hb=HEAD Android)!
+* Single and multi line editing mode with the usual key bindings implemented.
+* History handling.
+* Completion.
+* Hints (suggestions at the right of the prompt as you type).
+* About 1,100 lines of BSD license source code.
+* Only uses a subset of VT100 escapes (ANSI.SYS compatible).
## Can a line editing library be 20k lines of code?
@@ -10,36 +16,209 @@ Line editing with some support for history is a really important feature for com
So what usually happens is either:
- * Large programs with configure scripts disabling line editing if readline is not present in the system, or not supporting it at all since readline is GPL licensed and libedit (the BSD clone) is not as known and available as readline is (Readl world example of this problem: Tclsh).
+ * Large programs with configure scripts disabling line editing if readline is not present in the system, or not supporting it at all since readline is GPL licensed and libedit (the BSD clone) is not as known and available as readline is (Real world example of this problem: Tclsh).
* Smaller programs not using a configure script not supporting line editing at all (A problem we had with Redis-cli for instance).
The result is a pollution of binaries without line editing support.
-So I spent more or less two hours doing a reality check resulting in this little library: is it *really* needed for a line editing library to be 20k lines of code? Apparently not, it is possibe to get a very small, zero configuration, trivial to embed library, that solves the problem. Smaller programs will just include this, supporing line editing out of the box. Larger programs may use this little library or just checking with configure if readline/libedit is available and resorting to linenoise if not.
+So I spent more or less two hours doing a reality check resulting in this little library: is it *really* needed for a line editing library to be 20k lines of code? Apparently not, it is possibe to get a very small, zero configuration, trivial to embed library, that solves the problem. Smaller programs will just include this, supporing line editing out of the box. Larger programs may use this little library or just checking with configure if readline/libedit is available and resorting to Linenoise if not.
## Terminals, in 2010.
-Apparently almost every terminal you can happen to use today has some kind of support for VT100 alike escape sequences. So I tried to write a lib using just very basic VT100 features. The resulting library appears to work everywhere I tried to use it.
+Apparently almost every terminal you can happen to use today has some kind of support for basic VT100 escape sequences. So I tried to write a lib using just very basic VT100 features. The resulting library appears to work everywhere I tried to use it, and now can work even on ANSI.SYS compatible terminals, since no
+VT220 specific sequences are used anymore.
-Since it's so young I guess there are a few bugs, or the lib may not compile or work with some operating system, but it's a matter of a few weeks and eventually we'll get it right, and there will be no excuses for not shipping command line tools without built-in line editing support.
-
-The library is currently less than 400 lines of code. In order to use it in your project just look at the *example.c* file in the source distribution, it is trivial. Linenoise is BSD code, so you can use both in free software and commercial software.
+The library is currently about 1100 lines of code. In order to use it in your project just look at the *example.c* file in the source distribution, it is trivial. Linenoise is BSD code, so you can use both in free software and commercial software.
## Tested with...
* Linux text only console ($TERM = linux)
* Linux KDE terminal application ($TERM = xterm)
* Linux xterm ($TERM = xterm)
+ * Linux Buildroot ($TERM = vt100)
* Mac OS X iTerm ($TERM = xterm)
* Mac OS X default Terminal.app ($TERM = xterm)
* OpenBSD 4.5 through an OSX Terminal.app ($TERM = screen)
* IBM AIX 6.1
* FreeBSD xterm ($TERM = xterm)
+ * ANSI.SYS
+ * Emacs comint mode ($TERM = dumb)
Please test it everywhere you can and report back!
## Let's push this forward!
-Please fork it and add something interesting and send me a pull request. What's especially interesting are fixes, new key bindings, completion.
+Patches should be provided in the respect of Linenoise sensibility for small
+easy to understand code.
Send feedbacks to antirez at gmail
+
+# The API
+
+Linenoise is very easy to use, and reading the example shipped with the
+library should get you up to speed ASAP. Here is a list of API calls
+and how to use them.
+
+ char *linenoise(const char *prompt);
+
+This is the main Linenoise call: it shows the user a prompt with line editing
+and history capabilities. The prompt you specify is used as a prompt, that is,
+it will be printed to the left of the cursor. The library returns a buffer
+with the line composed by the user, or NULL on end of file or when there
+is an out of memory condition.
+
+When a tty is detected (the user is actually typing into a terminal session)
+the maximum editable line length is `LINENOISE_MAX_LINE`. When instead the
+standard input is not a tty, which happens every time you redirect a file
+to a program, or use it in an Unix pipeline, there are no limits to the
+length of the line that can be returned.
+
+The returned line should be freed with the `free()` standard system call.
+However sometimes it could happen that your program uses a different dynamic
+allocation library, so you may also used `linenoiseFree` to make sure the
+line is freed with the same allocator it was created.
+
+The canonical loop used by a program using Linenoise will be something like
+this:
+
+ while((line = linenoise("hello> ")) != NULL) {
+ printf("You wrote: %s\n", line);
+ linenoiseFree(line); /* Or just free(line) if you use libc malloc. */
+ }
+
+## Single line VS multi line editing
+
+By default, Linenoise uses single line editing, that is, a single row on the
+screen will be used, and as the user types more, the text will scroll towards
+left to make room. This works if your program is one where the user is
+unlikely to write a lot of text, otherwise multi line editing, where multiple
+screens rows are used, can be a lot more comfortable.
+
+In order to enable multi line editing use the following API call:
+
+ linenoiseSetMultiLine(1);
+
+You can disable it using `0` as argument.
+
+## History
+
+Linenoise supporst history, so that the user does not have to retype
+again and again the same things, but can use the down and up arrows in order
+to search and re-edit already inserted lines of text.
+
+The followings are the history API calls:
+
+ int linenoiseHistoryAdd(const char *line);
+ int linenoiseHistorySetMaxLen(int len);
+ int linenoiseHistorySave(const char *filename);
+ int linenoiseHistoryLoad(const char *filename);
+
+Use `linenoiseHistoryAdd` every time you want to add a new element
+to the top of the history (it will be the first the user will see when
+using the up arrow).
+
+Note that for history to work, you have to set a length for the history
+(which is zero by default, so history will be disabled if you don't set
+a proper one). This is accomplished using the `linenoiseHistorySetMaxLen`
+function.
+
+Linenoise has direct support for persisting the history into an history
+file. The functions `linenoiseHistorySave` and `linenoiseHistoryLoad` do
+just that. Both functions return -1 on error and 0 on success.
+
+## Completion
+
+Linenoise supports completion, which is the ability to complete the user
+input when she or he presses the `<TAB>` key.
+
+In order to use completion, you need to register a completion callback, which
+is called every time the user presses `<TAB>`. Your callback will return a
+list of items that are completions for the current string.
+
+The following is an example of registering a completion callback:
+
+ linenoiseSetCompletionCallback(completion);
+
+The completion must be a function returning `void` and getting as input
+a `const char` pointer, which is the line the user has typed so far, and
+a `linenoiseCompletions` object pointer, which is used as argument of
+`linenoiseAddCompletion` in order to add completions inside the callback.
+An example will make it more clear:
+
+ void completion(const char *buf, linenoiseCompletions *lc) {
+ if (buf[0] == 'h') {
+ linenoiseAddCompletion(lc,"hello");
+ linenoiseAddCompletion(lc,"hello there");
+ }
+ }
+
+Basically in your completion callback, you inspect the input, and return
+a list of items that are good completions by using `linenoiseAddCompletion`.
+
+If you want to test the completion feature, compile the example program
+with `make`, run it, type `h` and press `<TAB>`.
+
+## Hints
+
+Linenoise has a feature called *hints* which is very useful when you
+use Linenoise in order to implement a REPL (Read Eval Print Loop) for
+a program that accepts commands and arguments, but may also be useful in
+other conditions.
+
+The feature shows, on the right of the cursor, as the user types, hints that
+may be useful. The hints can be displayed using a different color compared
+to the color the user is typing, and can also be bold.
+
+For example as the user starts to type `"git remote add"`, with hints it's
+possible to show on the right of the prompt a string `<name> <url>`.
+
+The feature works similarly to the history feature, using a callback.
+To register the callback we use:
+
+ linenoiseSetHintsCallback(hints);
+
+The callback itself is implemented like this:
+
+ char *hints(const char *buf, int *color, int *bold) {
+ if (!strcasecmp(buf,"git remote add")) {
+ *color = 35;
+ *bold = 0;
+ return " <name> <url>";
+ }
+ return NULL;
+ }
+
+The callback function returns the string that should be displayed or NULL
+if no hint is available for the text the user currently typed. The returned
+string will be trimmed as needed depending on the number of columns available
+on the screen.
+
+It is possible to return a string allocated in dynamic way, by also registering
+a function to deallocate the hint string once used:
+
+ void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
+
+The free hint callback will just receive the pointer and free the string
+as needed (depending on how the hits callback allocated it).
+
+As you can see in the example above, a `color` (in xterm color terminal codes)
+can be provided together with a `bold` attribute. If no color is set, the
+current terminal foreground color is used. If no bold attribute is set,
+non-bold text is printed.
+
+Color codes are:
+
+ red = 31
+ green = 32
+ yellow = 33
+ blue = 34
+ magenta = 35
+ cyan = 36
+ white = 37;
+
+## Screen handling
+
+Sometimes you may want to clear the screen as a result of something the
+user typed. You can do this by calling the following function:
+
+ void linenoiseClearScreen(void);
diff --git a/deps/linenoise/example.c b/deps/linenoise/example.c
index ea0b515c1..3a544d3c6 100644
--- a/deps/linenoise/example.c
+++ b/deps/linenoise/example.c
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "linenoise.h"
@@ -10,16 +11,62 @@ void completion(const char *buf, linenoiseCompletions *lc) {
}
}
-int main(void) {
+char *hints(const char *buf, int *color, int *bold) {
+ if (!strcasecmp(buf,"hello")) {
+ *color = 35;
+ *bold = 0;
+ return " World";
+ }
+ return NULL;
+}
+
+int main(int argc, char **argv) {
char *line;
+ char *prgname = argv[0];
+
+ /* Parse options, with --multiline we enable multi line editing. */
+ while(argc > 1) {
+ argc--;
+ argv++;
+ if (!strcmp(*argv,"--multiline")) {
+ linenoiseSetMultiLine(1);
+ printf("Multi-line mode enabled.\n");
+ } else if (!strcmp(*argv,"--keycodes")) {
+ linenoisePrintKeyCodes();
+ exit(0);
+ } else {
+ fprintf(stderr, "Usage: %s [--multiline] [--keycodes]\n", prgname);
+ exit(1);
+ }
+ }
+ /* Set the completion callback. This will be called every time the
+ * user uses the <tab> key. */
linenoiseSetCompletionCallback(completion);
+ linenoiseSetHintsCallback(hints);
+
+ /* Load history from file. The history file is just a plain text file
+ * where entries are separated by newlines. */
linenoiseHistoryLoad("history.txt"); /* Load the history at startup */
+
+ /* Now this is the main loop of the typical linenoise-based application.
+ * The call to linenoise() will block as long as the user types something
+ * and presses enter.
+ *
+ * The typed string is returned as a malloc() allocated string by
+ * linenoise, so the user needs to free() it. */
while((line = linenoise("hello> ")) != NULL) {
- if (line[0] != '\0') {
+ /* Do something with the string. */
+ if (line[0] != '\0' && line[0] != '/') {
printf("echo: '%s'\n", line);
- linenoiseHistoryAdd(line);
- linenoiseHistorySave("history.txt"); /* Save every new entry */
+ linenoiseHistoryAdd(line); /* Add to the history. */
+ linenoiseHistorySave("history.txt"); /* Save the history on disk. */
+ } else if (!strncmp(line,"/historylen",11)) {
+ /* The "/historylen" command will change the history len. */
+ int len = atoi(line+11);
+ linenoiseHistorySetMaxLen(len);
+ } else if (line[0] == '/') {
+ printf("Unreconized command: %s\n", line);
}
free(line);
}
diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c
index aef5cdd24..fce14a7c5 100644
--- a/deps/linenoise/linenoise.c
+++ b/deps/linenoise/linenoise.c
@@ -2,7 +2,7 @@
* line editing lib needs to be 20,000 lines of C code.
*
* You can find the latest source code at:
- *
+ *
* http://github.com/antirez/linenoise
*
* Does a number of crazy assumptions that happen to be true in 99.9999% of
@@ -10,22 +10,22 @@
*
* ------------------------------------------------------------------------
*
- * Copyright (c) 2010-2013, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
*
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
+ *
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -37,7 +37,7 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ *
* ------------------------------------------------------------------------
*
* References:
@@ -56,10 +56,6 @@
* flickering effect with some slow terminal, but the lesser sequences
* the more compatible.
*
- * CHA (Cursor Horizontal Absolute)
- * Sequence: ESC [ n G
- * Effect: moves cursor to column n
- *
* EL (Erase Line)
* Sequence: ESC [ n K
* Effect: if n is 0 or missing, clear from cursor to end of line
@@ -68,7 +64,19 @@
*
* CUF (CUrsor Forward)
* Sequence: ESC [ n C
- * Effect: moves cursor forward of n chars
+ * Effect: moves cursor forward n chars
+ *
+ * CUB (CUrsor Backward)
+ * Sequence: ESC [ n D
+ * Effect: moves cursor backward n chars
+ *
+ * The following is used to get the terminal width if getting
+ * the width with the TIOCGWINSZ ioctl fails
+ *
+ * DSR (Device Status Report)
+ * Sequence: ESC [ 6 n
+ * Effect: reports the current cusor position as ESC [ n ; m R
+ * where n is the row and m is the column
*
* When multi line mode is enabled, we also use an additional escape
* sequence. However multi line editing is disabled by default.
@@ -81,17 +89,18 @@
* Sequence: ESC [ n B
* Effect: moves cursor down of n chars.
*
- * The following are used to clear the screen: ESC [ H ESC [ 2 J
- * This is actually composed of two sequences:
+ * When linenoiseClearScreen() is called, two additional escape sequences
+ * are used in order to clear the screen and position the cursor at home
+ * position.
*
- * cursorhome
+ * CUP (Cursor position)
* Sequence: ESC [ H
* Effect: moves the cursor to upper left corner
*
- * ED2 (Clear entire screen)
+ * ED (Erase display)
* Sequence: ESC [ 2 J
* Effect: clear the whole screen
- *
+ *
*/
#include <termios.h>
@@ -102,6 +111,7 @@
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <unistd.h>
@@ -111,6 +121,8 @@
#define LINENOISE_MAX_LINE 4096
static char *unsupported_term[] = {"dumb","cons25","emacs",NULL};
static linenoiseCompletionCallback *completionCallback = NULL;
+static linenoiseHintsCallback *hintsCallback = NULL;
+static linenoiseFreeHintsCallback *freeHintsCallback = NULL;
static struct termios orig_termios; /* In order to restore at exit.*/
static int rawmode = 0; /* For atexit() function to check if restore is needed*/
@@ -332,7 +344,7 @@ static void freeCompletions(linenoiseCompletions *lc) {
/* This is an helper function for linenoiseEdit() and is called when the
* user types the <tab> key in order to complete the string currently in the
* input.
- *
+ *
* The state of the editing is encapsulated into the pointed linenoiseState
* structure as described in the structure definition. */
static int completeLine(struct linenoiseState *ls) {
@@ -398,6 +410,18 @@ void linenoiseSetCompletionCallback(linenoiseCompletionCallback *fn) {
completionCallback = fn;
}
+/* Register a hits function to be called to show hits to the user at the
+ * right of the prompt. */
+void linenoiseSetHintsCallback(linenoiseHintsCallback *fn) {
+ hintsCallback = fn;
+}
+
+/* Register a function to free the hints returned by the hints callback
+ * registered with linenoiseSetHintsCallback(). */
+void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *fn) {
+ freeHintsCallback = fn;
+}
+
/* This function is used by the callback function registered by the user
* in order to add completion options given the input string when the
* user typed <tab>. See the example.c source code for a very easy to
@@ -447,6 +471,30 @@ static void abFree(struct abuf *ab) {
free(ab->b);
}
+/* Helper of refreshSingleLine() and refreshMultiLine() to show hints
+ * to the right of the prompt. */
+void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) {
+ char seq[64];
+ if (hintsCallback && plen+l->len < l->cols) {
+ int color = -1, bold = 0;
+ char *hint = hintsCallback(l->buf,&color,&bold);
+ if (hint) {
+ int hintlen = strlen(hint);
+ int hintmaxlen = l->cols-(plen+l->len);
+ if (hintlen > hintmaxlen) hintlen = hintmaxlen;
+ if (bold == 1 && color == -1) color = 37;
+ if (color != -1 || bold != 0)
+ snprintf(seq,64,"\033[%d;%d;49m",bold,color);
+ abAppend(ab,seq,strlen(seq));
+ abAppend(ab,hint,hintlen);
+ if (color != -1 || bold != 0)
+ abAppend(ab,"\033[0m",4);
+ /* Call the function to free the hint returned. */
+ if (freeHintsCallback) freeHintsCallback(hint);
+ }
+ }
+}
+
/* Single line low level line refresh.
*
* Rewrite the currently edited line accordingly to the buffer content,
@@ -459,7 +507,7 @@ static void refreshSingleLine(struct linenoiseState *l) {
size_t len = l->len;
size_t pos = l->pos;
struct abuf ab;
-
+
while((plen+pos) >= l->cols) {
buf++;
len--;
@@ -471,16 +519,18 @@ static void refreshSingleLine(struct linenoiseState *l) {
abInit(&ab);
/* Cursor to left edge */
- snprintf(seq,64,"\x1b[0G");
+ snprintf(seq,64,"\r");
abAppend(&ab,seq,strlen(seq));
/* Write the prompt and the current buffer content */
abAppend(&ab,l->prompt,strlen(l->prompt));
abAppend(&ab,buf,len);
+ /* Show hits if any. */
+ refreshShowHints(&ab,l,plen);
/* Erase to right */
snprintf(seq,64,"\x1b[0K");
abAppend(&ab,seq,strlen(seq));
/* Move cursor to original position. */
- snprintf(seq,64,"\x1b[0G\x1b[%dC", (int)(pos+plen));
+ snprintf(seq,64,"\r\x1b[%dC", (int)(pos+plen));
abAppend(&ab,seq,strlen(seq));
if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */
abFree(&ab);
@@ -496,6 +546,7 @@ static void refreshMultiLine(struct linenoiseState *l) {
int rows = (plen+l->len+l->cols-1)/l->cols; /* rows used by current buf. */
int rpos = (plen+l->oldpos+l->cols)/l->cols; /* cursor relative row. */
int rpos2; /* rpos after refresh. */
+ int col; /* colum position, zero-based. */
int old_rows = l->maxrows;
int fd = l->ofd, j;
struct abuf ab;
@@ -515,19 +566,22 @@ static void refreshMultiLine(struct linenoiseState *l) {
/* Now for every row clear it, go up. */
for (j = 0; j < old_rows-1; j++) {
lndebug("clear+up");
- snprintf(seq,64,"\x1b[0G\x1b[0K\x1b[1A");
+ snprintf(seq,64,"\r\x1b[0K\x1b[1A");
abAppend(&ab,seq,strlen(seq));
}
/* Clean the top line. */
lndebug("clear");
- snprintf(seq,64,"\x1b[0G\x1b[0K");
+ snprintf(seq,64,"\r\x1b[0K");
abAppend(&ab,seq,strlen(seq));
-
+
/* Write the prompt and the current buffer content */
abAppend(&ab,l->prompt,strlen(l->prompt));
abAppend(&ab,l->buf,l->len);
+ /* Show hits if any. */
+ refreshShowHints(&ab,l,plen);
+
/* If we are at the very end of the screen with our prompt, we need to
* emit a newline and move the prompt to the first column. */
if (l->pos &&
@@ -536,7 +590,7 @@ static void refreshMultiLine(struct linenoiseState *l) {
{
lndebug("<newline>");
abAppend(&ab,"\n",1);
- snprintf(seq,64,"\x1b[0G");
+ snprintf(seq,64,"\r");
abAppend(&ab,seq,strlen(seq));
rows++;
if (rows > (int)l->maxrows) l->maxrows = rows;
@@ -554,8 +608,12 @@ static void refreshMultiLine(struct linenoiseState *l) {
}
/* Set column. */
- lndebug("set col %d", 1+((plen+(int)l->pos) % (int)l->cols));
- snprintf(seq,64,"\x1b[%dG", 1+((plen+(int)l->pos) % (int)l->cols));
+ col = (plen+(int)l->pos) % (int)l->cols;
+ lndebug("set col %d", 1+col);
+ if (col)
+ snprintf(seq,64,"\r\x1b[%dC", col);
+ else
+ snprintf(seq,64,"\r");
abAppend(&ab,seq,strlen(seq));
lndebug("\n");
@@ -584,7 +642,7 @@ int linenoiseEditInsert(struct linenoiseState *l, char c) {
l->pos++;
l->len++;
l->buf[l->len] = '\0';
- if ((!mlmode && l->plen+l->len < l->cols) /* || mlmode */) {
+ if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) {
/* Avoid a full update of the line in the
* trivial case. */
if (write(l->ofd,&c,1) == -1) return -1;
@@ -732,7 +790,7 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
/* The latest history entry is always our current buffer, that
* initially is just an empty string. */
linenoiseHistoryAdd("");
-
+
if (write(l.ofd,prompt,l.plen) == -1) return -1;
while(1) {
char c;
@@ -757,6 +815,15 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
case ENTER: /* enter */
history_len--;
free(history[history_len]);
+ if (mlmode) linenoiseEditMoveEnd(&l);
+ if (hintsCallback) {
+ /* Force a refresh without hints to leave the previous
+ * line as the user typed it after a newline. */
+ linenoiseHintsCallback *hc = hintsCallback;
+ hintsCallback = NULL;
+ refreshLine(&l);
+ hintsCallback = hc;
+ }
return (int)l.len;
case CTRL_C: /* ctrl-c */
errno = EAGAIN;
@@ -765,8 +832,8 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
case 8: /* ctrl-h */
linenoiseEditBackspace(&l);
break;
- case CTRL_D: /* ctrl-d, remove char at right of cursor, or of the
- line is empty, act as end-of-file. */
+ case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the
+ line is empty, act as end-of-file. */
if (l.len > 0) {
linenoiseEditDelete(&l);
} else {
@@ -904,7 +971,7 @@ void linenoisePrintKeyCodes(void) {
printf("'%c' %02x (%d) (type quit to exit)\n",
isprint(c) ? c : '?', (int)c, (int)c);
- printf("\x1b[0G"); /* Go left edge manually, we are in raw mode. */
+ printf("\r"); /* Go left edge manually, we are in raw mode. */
fflush(stdout);
}
disableRawMode(STDIN_FILENO);
@@ -919,22 +986,48 @@ static int linenoiseRaw(char *buf, size_t buflen, const char *prompt) {
errno = EINVAL;
return -1;
}
- if (!isatty(STDIN_FILENO)) {
- /* Not a tty: read from file / pipe. */
- if (fgets(buf, buflen, stdin) == NULL) return -1;
- count = strlen(buf);
- if (count && buf[count-1] == '\n') {
- count--;
- buf[count] = '\0';
+
+ if (enableRawMode(STDIN_FILENO) == -1) return -1;
+ count = linenoiseEdit(STDIN_FILENO, STDOUT_FILENO, buf, buflen, prompt);
+ disableRawMode(STDIN_FILENO);
+ printf("\n");
+ return count;
+}
+
+/* This function is called when linenoise() is called with the standard
+ * input file descriptor not attached to a TTY. So for example when the
+ * program using linenoise is called in pipe or with a file redirected
+ * to its standard input. In this case, we want to be able to return the
+ * line regardless of its length (by default we are limited to 4k). */
+static char *linenoiseNoTTY(void) {
+ char *line = NULL;
+ size_t len = 0, maxlen = 0;
+
+ while(1) {
+ if (len == maxlen) {
+ if (maxlen == 0) maxlen = 16;
+ maxlen *= 2;
+ char *oldval = line;
+ line = realloc(line,maxlen);
+ if (line == NULL) {
+ if (oldval) free(oldval);
+ return NULL;
+ }
+ }
+ int c = fgetc(stdin);
+ if (c == EOF || c == '\n') {
+ if (c == EOF && len == 0) {
+ free(line);
+ return NULL;
+ } else {
+ line[len] = '\0';
+ return line;
+ }
+ } else {
+ line[len] = c;
+ len++;
}
- } else {
- /* Interactive editing. */
- if (enableRawMode(STDIN_FILENO) == -1) return -1;
- count = linenoiseEdit(STDIN_FILENO, STDOUT_FILENO, buf, buflen, prompt);
- disableRawMode(STDIN_FILENO);
- printf("\n");
}
- return count;
}
/* The high level function that is the main API of the linenoise library.
@@ -946,7 +1039,11 @@ char *linenoise(const char *prompt) {
char buf[LINENOISE_MAX_LINE];
int count;
- if (isUnsupportedTerm()) {
+ if (!isatty(STDIN_FILENO)) {
+ /* Not a tty: read from file / pipe. In this mode we don't want any
+ * limit to the line size, so we call a function to handle that. */
+ return linenoiseNoTTY();
+ } else if (isUnsupportedTerm()) {
size_t len;
printf("%s",prompt);
@@ -965,6 +1062,14 @@ char *linenoise(const char *prompt) {
}
}
+/* This is just a wrapper the user may want to call in order to make sure
+ * the linenoise returned buffer is freed with the same allocator it was
+ * created with. Useful when the main program is using an alternative
+ * allocator. */
+void linenoiseFree(void *ptr) {
+ free(ptr);
+}
+
/* ================================ History ================================= */
/* Free the history, but does not reset it. Only used when we have to
@@ -1056,10 +1161,14 @@ int linenoiseHistorySetMaxLen(int len) {
/* Save the history in the specified file. On success 0 is returned
* otherwise -1 is returned. */
int linenoiseHistorySave(const char *filename) {
- FILE *fp = fopen(filename,"w");
+ mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO);
+ FILE *fp;
int j;
-
+
+ fp = fopen(filename,"w");
+ umask(old_umask);
if (fp == NULL) return -1;
+ chmod(filename,S_IRUSR|S_IWUSR);
for (j = 0; j < history_len; j++)
fprintf(fp,"%s\n",history[j]);
fclose(fp);
@@ -1074,12 +1183,12 @@ int linenoiseHistorySave(const char *filename) {
int linenoiseHistoryLoad(const char *filename) {
FILE *fp = fopen(filename,"r");
char buf[LINENOISE_MAX_LINE];
-
+
if (fp == NULL) return -1;
while (fgets(buf,LINENOISE_MAX_LINE,fp) != NULL) {
char *p;
-
+
p = strchr(buf,'\r');
if (!p) p = strchr(buf,'\n');
if (p) *p = '\0';
diff --git a/deps/linenoise/linenoise.h b/deps/linenoise/linenoise.h
index e22ebd3fd..ed20232c5 100644
--- a/deps/linenoise/linenoise.h
+++ b/deps/linenoise/linenoise.h
@@ -1,26 +1,28 @@
-/* linenoise.h -- guerrilla line editing library against the idea that a
- * line editing lib needs to be 20,000 lines of C code.
+/* linenoise.h -- VERSION 1.0
+ *
+ * Guerrilla line editing library against the idea that a line editing lib
+ * needs to be 20,000 lines of C code.
*
* See linenoise.c for more information.
*
* ------------------------------------------------------------------------
*
- * Copyright (c) 2010, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
*
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
+ *
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -47,10 +49,15 @@ typedef struct linenoiseCompletions {
} linenoiseCompletions;
typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *);
+typedef char*(linenoiseHintsCallback)(const char *, int *color, int *bold);
+typedef void(linenoiseFreeHintsCallback)(void *);
void linenoiseSetCompletionCallback(linenoiseCompletionCallback *);
+void linenoiseSetHintsCallback(linenoiseHintsCallback *);
+void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
void linenoiseAddCompletion(linenoiseCompletions *, const char *);
char *linenoise(const char *prompt);
+void linenoiseFree(void *ptr);
int linenoiseHistoryAdd(const char *line);
int linenoiseHistorySetMaxLen(int len);
int linenoiseHistorySave(const char *filename);
diff --git a/deps/lua/src/Makefile b/deps/lua/src/Makefile
index 34b0c3617..f3bba2f81 100644
--- a/deps/lua/src/Makefile
+++ b/deps/lua/src/Makefile
@@ -25,9 +25,10 @@ PLATS= aix ansi bsd freebsd generic linux macosx mingw posix solaris
LUA_A= liblua.a
CORE_O= lapi.o lcode.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o \
lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o \
- lundump.o lvm.o lzio.o strbuf.o
+ lundump.o lvm.o lzio.o strbuf.o fpconv.o
LIB_O= lauxlib.o lbaselib.o ldblib.o liolib.o lmathlib.o loslib.o ltablib.o \
- lstrlib.o loadlib.o linit.o lua_cjson.o lua_struct.o lua_cmsgpack.o
+ lstrlib.o loadlib.o linit.o lua_cjson.o lua_struct.o lua_cmsgpack.o \
+ lua_bit.o
LUA_T= lua
LUA_O= lua.o
diff --git a/deps/lua/src/fpconv.c b/deps/lua/src/fpconv.c
new file mode 100644
index 000000000..79908317a
--- /dev/null
+++ b/deps/lua/src/fpconv.c
@@ -0,0 +1,205 @@
+/* fpconv - Floating point conversion routines
+ *
+ * Copyright (c) 2011-2012 Mark Pulford <mark@kyne.com.au>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* JSON uses a '.' decimal separator. strtod() / sprintf() under C libraries
+ * with locale support will break when the decimal separator is a comma.
+ *
+ * fpconv_* will around these issues with a translation buffer if required.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#include "fpconv.h"
+
+/* Lua CJSON assumes the locale is the same for all threads within a
+ * process and doesn't change after initialisation.
+ *
+ * This avoids the need for per thread storage or expensive checks
+ * for call. */
+static char locale_decimal_point = '.';
+
+/* In theory multibyte decimal_points are possible, but
+ * Lua CJSON only supports UTF-8 and known locales only have
+ * single byte decimal points ([.,]).
+ *
+ * localconv() may not be thread safe (=>crash), and nl_langinfo() is
+ * not supported on some platforms. Use sprintf() instead - if the
+ * locale does change, at least Lua CJSON won't crash. */
+static void fpconv_update_locale()
+{
+ char buf[8];
+
+ snprintf(buf, sizeof(buf), "%g", 0.5);
+
+ /* Failing this test might imply the platform has a buggy dtoa
+ * implementation or wide characters */
+ if (buf[0] != '0' || buf[2] != '5' || buf[3] != 0) {
+ fprintf(stderr, "Error: wide characters found or printf() bug.");
+ abort();
+ }
+
+ locale_decimal_point = buf[1];
+}
+
+/* Check for a valid number character: [-+0-9a-yA-Y.]
+ * Eg: -0.6e+5, infinity, 0xF0.F0pF0
+ *
+ * Used to find the probable end of a number. It doesn't matter if
+ * invalid characters are counted - strtod() will find the valid
+ * number if it exists. The risk is that slightly more memory might
+ * be allocated before a parse error occurs. */
+static inline int valid_number_character(char ch)
+{
+ char lower_ch;
+
+ if ('0' <= ch && ch <= '9')
+ return 1;
+ if (ch == '-' || ch == '+' || ch == '.')
+ return 1;
+
+ /* Hex digits, exponent (e), base (p), "infinity",.. */
+ lower_ch = ch | 0x20;
+ if ('a' <= lower_ch && lower_ch <= 'y')
+ return 1;
+
+ return 0;
+}
+
+/* Calculate the size of the buffer required for a strtod locale
+ * conversion. */
+static int strtod_buffer_size(const char *s)
+{
+ const char *p = s;
+
+ while (valid_number_character(*p))
+ p++;
+
+ return p - s;
+}
+
+/* Similar to strtod(), but must be passed the current locale's decimal point
+ * character. Guaranteed to be called at the start of any valid number in a string */
+double fpconv_strtod(const char *nptr, char **endptr)
+{
+ char localbuf[FPCONV_G_FMT_BUFSIZE];
+ char *buf, *endbuf, *dp;
+ int buflen;
+ double value;
+
+ /* System strtod() is fine when decimal point is '.' */
+ if (locale_decimal_point == '.')
+ return strtod(nptr, endptr);
+
+ buflen = strtod_buffer_size(nptr);
+ if (!buflen) {
+ /* No valid characters found, standard strtod() return */
+ *endptr = (char *)nptr;
+ return 0;
+ }
+
+ /* Duplicate number into buffer */
+ if (buflen >= FPCONV_G_FMT_BUFSIZE) {
+ /* Handle unusually large numbers */
+ buf = malloc(buflen + 1);
+ if (!buf) {
+ fprintf(stderr, "Out of memory");
+ abort();
+ }
+ } else {
+ /* This is the common case.. */
+ buf = localbuf;
+ }
+ memcpy(buf, nptr, buflen);
+ buf[buflen] = 0;
+
+ /* Update decimal point character if found */
+ dp = strchr(buf, '.');
+ if (dp)
+ *dp = locale_decimal_point;
+
+ value = strtod(buf, &endbuf);
+ *endptr = (char *)&nptr[endbuf - buf];
+ if (buflen >= FPCONV_G_FMT_BUFSIZE)
+ free(buf);
+
+ return value;
+}
+
+/* "fmt" must point to a buffer of at least 6 characters */
+static void set_number_format(char *fmt, int precision)
+{
+ int d1, d2, i;
+
+ assert(1 <= precision && precision <= 14);
+
+ /* Create printf format (%.14g) from precision */
+ d1 = precision / 10;
+ d2 = precision % 10;
+ fmt[0] = '%';
+ fmt[1] = '.';
+ i = 2;
+ if (d1) {
+ fmt[i++] = '0' + d1;
+ }
+ fmt[i++] = '0' + d2;
+ fmt[i++] = 'g';
+ fmt[i] = 0;
+}
+
+/* Assumes there is always at least 32 characters available in the target buffer */
+int fpconv_g_fmt(char *str, double num, int precision)
+{
+ char buf[FPCONV_G_FMT_BUFSIZE];
+ char fmt[6];
+ int len;
+ char *b;
+
+ set_number_format(fmt, precision);
+
+ /* Pass through when decimal point character is dot. */
+ if (locale_decimal_point == '.')
+ return snprintf(str, FPCONV_G_FMT_BUFSIZE, fmt, num);
+
+ /* snprintf() to a buffer then translate for other decimal point characters */
+ len = snprintf(buf, FPCONV_G_FMT_BUFSIZE, fmt, num);
+
+ /* Copy into target location. Translate decimal point if required */
+ b = buf;
+ do {
+ *str++ = (*b == locale_decimal_point ? '.' : *b);
+ } while(*b++);
+
+ return len;
+}
+
+void fpconv_init()
+{
+ fpconv_update_locale();
+}
+
+/* vi:ai et sw=4 ts=4:
+ */
diff --git a/deps/lua/src/fpconv.h b/deps/lua/src/fpconv.h
new file mode 100644
index 000000000..7b0d0ee31
--- /dev/null
+++ b/deps/lua/src/fpconv.h
@@ -0,0 +1,22 @@
+/* Lua CJSON floating point conversion routines */
+
+/* Buffer required to store the largest string representation of a double.
+ *
+ * Longest double printed with %.14g is 21 characters long:
+ * -1.7976931348623e+308 */
+# define FPCONV_G_FMT_BUFSIZE 32
+
+#ifdef USE_INTERNAL_FPCONV
+static inline void fpconv_init()
+{
+ /* Do nothing - not required */
+}
+#else
+extern void fpconv_init();
+#endif
+
+extern int fpconv_g_fmt(char*, double, int);
+extern double fpconv_strtod(const char*, char**);
+
+/* vi:ai et sw=4 ts=4:
+ */
diff --git a/deps/lua/src/ldo.c b/deps/lua/src/ldo.c
index d1bf786cb..514f7a2a3 100644
--- a/deps/lua/src/ldo.c
+++ b/deps/lua/src/ldo.c
@@ -495,7 +495,7 @@ static void f_parser (lua_State *L, void *ud) {
struct SParser *p = cast(struct SParser *, ud);
int c = luaZ_lookahead(p->z);
luaC_checkGC(L);
- tf = ((c == LUA_SIGNATURE[0]) ? luaU_undump : luaY_parser)(L, p->z,
+ tf = (luaY_parser)(L, p->z,
&p->buff, p->name);
cl = luaF_newLclosure(L, tf->nups, hvalue(gt(L)));
cl->l.p = tf;
diff --git a/deps/lua/src/lua_bit.c b/deps/lua/src/lua_bit.c
new file mode 100644
index 000000000..690df7d3c
--- /dev/null
+++ b/deps/lua/src/lua_bit.c
@@ -0,0 +1,189 @@
+/*
+** Lua BitOp -- a bit operations library for Lua 5.1/5.2.
+** http://bitop.luajit.org/
+**
+** Copyright (C) 2008-2012 Mike Pall. All rights reserved.
+**
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+*/
+
+#define LUA_BITOP_VERSION "1.0.2"
+
+#define LUA_LIB
+#include "lua.h"
+#include "lauxlib.h"
+
+#ifdef _MSC_VER
+/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+typedef int32_t SBits;
+typedef uint32_t UBits;
+
+typedef union {
+ lua_Number n;
+#ifdef LUA_NUMBER_DOUBLE
+ uint64_t b;
+#else
+ UBits b;
+#endif
+} BitNum;
+
+/* Convert argument to bit type. */
+static UBits barg(lua_State *L, int idx)
+{
+ BitNum bn;
+ UBits b;
+#if LUA_VERSION_NUM < 502
+ bn.n = lua_tonumber(L, idx);
+#else
+ bn.n = luaL_checknumber(L, idx);
+#endif
+#if defined(LUA_NUMBER_DOUBLE)
+ bn.n += 6755399441055744.0; /* 2^52+2^51 */
+#ifdef SWAPPED_DOUBLE
+ b = (UBits)(bn.b >> 32);
+#else
+ b = (UBits)bn.b;
+#endif
+#elif defined(LUA_NUMBER_INT) || defined(LUA_NUMBER_LONG) || \
+ defined(LUA_NUMBER_LONGLONG) || defined(LUA_NUMBER_LONG_LONG) || \
+ defined(LUA_NUMBER_LLONG)
+ if (sizeof(UBits) == sizeof(lua_Number))
+ b = bn.b;
+ else
+ b = (UBits)(SBits)bn.n;
+#elif defined(LUA_NUMBER_FLOAT)
+#error "A 'float' lua_Number type is incompatible with this library"
+#else
+#error "Unknown number type, check LUA_NUMBER_* in luaconf.h"
+#endif
+#if LUA_VERSION_NUM < 502
+ if (b == 0 && !lua_isnumber(L, idx)) {
+ luaL_typerror(L, idx, "number");
+ }
+#endif
+ return b;
+}
+
+/* Return bit type. */
+#define BRET(b) lua_pushnumber(L, (lua_Number)(SBits)(b)); return 1;
+
+static int bit_tobit(lua_State *L) { BRET(barg(L, 1)) }
+static int bit_bnot(lua_State *L) { BRET(~barg(L, 1)) }
+
+#define BIT_OP(func, opr) \
+ static int func(lua_State *L) { int i; UBits b = barg(L, 1); \
+ for (i = lua_gettop(L); i > 1; i--) b opr barg(L, i); BRET(b) }
+BIT_OP(bit_band, &=)
+BIT_OP(bit_bor, |=)
+BIT_OP(bit_bxor, ^=)
+
+#define bshl(b, n) (b << n)
+#define bshr(b, n) (b >> n)
+#define bsar(b, n) ((SBits)b >> n)
+#define brol(b, n) ((b << n) | (b >> (32-n)))
+#define bror(b, n) ((b << (32-n)) | (b >> n))
+#define BIT_SH(func, fn) \
+ static int func(lua_State *L) { \
+ UBits b = barg(L, 1); UBits n = barg(L, 2) & 31; BRET(fn(b, n)) }
+BIT_SH(bit_lshift, bshl)
+BIT_SH(bit_rshift, bshr)
+BIT_SH(bit_arshift, bsar)
+BIT_SH(bit_rol, brol)
+BIT_SH(bit_ror, bror)
+
+static int bit_bswap(lua_State *L)
+{
+ UBits b = barg(L, 1);
+ b = (b >> 24) | ((b >> 8) & 0xff00) | ((b & 0xff00) << 8) | (b << 24);
+ BRET(b)
+}
+
+static int bit_tohex(lua_State *L)
+{
+ UBits b = barg(L, 1);
+ SBits n = lua_isnone(L, 2) ? 8 : (SBits)barg(L, 2);
+ const char *hexdigits = "0123456789abcdef";
+ char buf[8];
+ int i;
+ if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
+ if (n > 8) n = 8;
+ for (i = (int)n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
+ lua_pushlstring(L, buf, (size_t)n);
+ return 1;
+}
+
+static const struct luaL_Reg bit_funcs[] = {
+ { "tobit", bit_tobit },
+ { "bnot", bit_bnot },
+ { "band", bit_band },
+ { "bor", bit_bor },
+ { "bxor", bit_bxor },
+ { "lshift", bit_lshift },
+ { "rshift", bit_rshift },
+ { "arshift", bit_arshift },
+ { "rol", bit_rol },
+ { "ror", bit_ror },
+ { "bswap", bit_bswap },
+ { "tohex", bit_tohex },
+ { NULL, NULL }
+};
+
+/* Signed right-shifts are implementation-defined per C89/C99.
+** But the de facto standard are arithmetic right-shifts on two's
+** complement CPUs. This behaviour is required here, so test for it.
+*/
+#define BAD_SAR (bsar(-8, 2) != (SBits)-2)
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+ UBits b;
+ lua_pushnumber(L, (lua_Number)1437217655L);
+ b = barg(L, -1);
+ if (b != (UBits)1437217655L || BAD_SAR) { /* Perform a simple self-test. */
+ const char *msg = "compiled with incompatible luaconf.h";
+#ifdef LUA_NUMBER_DOUBLE
+#ifdef _WIN32
+ if (b == (UBits)1610612736L)
+ msg = "use D3DCREATE_FPU_PRESERVE with DirectX";
+#endif
+ if (b == (UBits)1127743488L)
+ msg = "not compiled with SWAPPED_DOUBLE";
+#endif
+ if (BAD_SAR)
+ msg = "arithmetic right-shift broken";
+ luaL_error(L, "bit library self-test failed (%s)", msg);
+ }
+#if LUA_VERSION_NUM < 502
+ luaL_register(L, "bit", bit_funcs);
+#else
+ luaL_newlib(L, bit_funcs);
+#endif
+ return 1;
+}
+
diff --git a/deps/lua/src/lua_cjson.c b/deps/lua/src/lua_cjson.c
index 2e272b007..c26c0d7b8 100644
--- a/deps/lua/src/lua_cjson.c
+++ b/deps/lua/src/lua_cjson.c
@@ -1,8 +1,6 @@
-#define VERSION "1.0.3"
-
-/* CJSON - JSON support for Lua
+/* Lua CJSON - JSON support for Lua
*
- * Copyright (c) 2010-2011 Mark Pulford <mark@kyne.com.au>
+ * Copyright (c) 2010-2012 Mark Pulford <mark@kyne.com.au>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
@@ -41,22 +39,42 @@
#include <assert.h>
#include <string.h>
#include <math.h>
+#include <limits.h>
#include "lua.h"
#include "lauxlib.h"
#include "strbuf.h"
+#include "fpconv.h"
+
+#include "../../../src/solarisfixes.h"
+
+#ifndef CJSON_MODNAME
+#define CJSON_MODNAME "cjson"
+#endif
+
+#ifndef CJSON_VERSION
+#define CJSON_VERSION "2.1.0"
+#endif
-#ifdef MISSING_ISINF
+/* Workaround for Solaris platforms missing isinf() */
+#if !defined(isinf) && (defined(USE_INTERNAL_ISINF) || defined(MISSING_ISINF))
#define isinf(x) (!isnan(x) && isnan((x) - (x)))
#endif
#define DEFAULT_SPARSE_CONVERT 0
#define DEFAULT_SPARSE_RATIO 2
#define DEFAULT_SPARSE_SAFE 10
-#define DEFAULT_MAX_DEPTH 20
-#define DEFAULT_ENCODE_REFUSE_BADNUM 1
-#define DEFAULT_DECODE_REFUSE_BADNUM 0
+#define DEFAULT_ENCODE_MAX_DEPTH 1000
+#define DEFAULT_DECODE_MAX_DEPTH 1000
+#define DEFAULT_ENCODE_INVALID_NUMBERS 0
+#define DEFAULT_DECODE_INVALID_NUMBERS 1
#define DEFAULT_ENCODE_KEEP_BUFFER 1
+#define DEFAULT_ENCODE_NUMBER_PRECISION 14
+
+#ifdef DISABLE_INVALID_NUMBERS
+#undef DEFAULT_DECODE_INVALID_NUMBERS
+#define DEFAULT_DECODE_INVALID_NUMBERS 0
+#endif
typedef enum {
T_OBJ_BEGIN,
@@ -96,29 +114,29 @@ static const char *json_token_type_name[] = {
typedef struct {
json_token_type_t ch2token[256];
char escape2char[256]; /* Decoding */
-#if 0
- char escapes[35][8]; /* Pre-generated escape string buffer */
- char *char2escape[256]; /* Encoding */
-#endif
+
+ /* encode_buf is only allocated and used when
+ * encode_keep_buffer is set */
strbuf_t encode_buf;
- char number_fmt[8]; /* "%.XXg\0" */
- int current_depth;
int encode_sparse_convert;
int encode_sparse_ratio;
int encode_sparse_safe;
int encode_max_depth;
- int encode_refuse_badnum;
- int decode_refuse_badnum;
- int encode_keep_buffer;
+ int encode_invalid_numbers; /* 2 => Encode as "null" */
int encode_number_precision;
+ int encode_keep_buffer;
+
+ int decode_invalid_numbers;
+ int decode_max_depth;
} json_config_t;
typedef struct {
const char *data;
- int index;
+ const char *ptr;
strbuf_t *tmp; /* Temporary storage for strings */
json_config_t *cfg;
+ int current_depth;
} json_parse_t;
typedef struct {
@@ -171,29 +189,76 @@ static const char *char2escape[256] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
-static int json_config_key;
-
/* ===== CONFIGURATION ===== */
static json_config_t *json_fetch_config(lua_State *l)
{
json_config_t *cfg;
- lua_pushlightuserdata(l, &json_config_key);
- lua_gettable(l, LUA_REGISTRYINDEX);
- cfg = lua_touserdata(l, -1);
+ cfg = lua_touserdata(l, lua_upvalueindex(1));
if (!cfg)
luaL_error(l, "BUG: Unable to fetch CJSON configuration");
- lua_pop(l, 1);
-
return cfg;
}
-static void json_verify_arg_count(lua_State *l, int args)
+/* Ensure the correct number of arguments have been provided.
+ * Pad with nil to allow other functions to simply check arg[i]
+ * to find whether an argument was provided */
+static json_config_t *json_arg_init(lua_State *l, int args)
{
luaL_argcheck(l, lua_gettop(l) <= args, args + 1,
"found too many arguments");
+
+ while (lua_gettop(l) < args)
+ lua_pushnil(l);
+
+ return json_fetch_config(l);
+}
+
+/* Process integer options for configuration functions */
+static int json_integer_option(lua_State *l, int optindex, int *setting,
+ int min, int max)
+{
+ char errmsg[64];
+ int value;
+
+ if (!lua_isnil(l, optindex)) {
+ value = luaL_checkinteger(l, optindex);
+ snprintf(errmsg, sizeof(errmsg), "expected integer between %d and %d", min, max);
+ luaL_argcheck(l, min <= value && value <= max, 1, errmsg);
+ *setting = value;
+ }
+
+ lua_pushinteger(l, *setting);
+
+ return 1;
+}
+
+/* Process enumerated arguments for a configuration function */
+static int json_enum_option(lua_State *l, int optindex, int *setting,
+ const char **options, int bool_true)
+{
+ static const char *bool_options[] = { "off", "on", NULL };
+
+ if (!options) {
+ options = bool_options;
+ bool_true = 1;
+ }
+
+ if (!lua_isnil(l, optindex)) {
+ if (bool_true && lua_isboolean(l, optindex))
+ *setting = lua_toboolean(l, optindex) * bool_true;
+ else
+ *setting = luaL_checkoption(l, optindex, NULL, options);
+ }
+
+ if (bool_true && (*setting == 0 || *setting == bool_true))
+ lua_pushboolean(l, *setting);
+ else
+ lua_pushstring(l, options[*setting]);
+
+ return 1;
}
/* Configures handling of extremely sparse arrays:
@@ -202,29 +267,11 @@ static void json_verify_arg_count(lua_State *l, int args)
* safe: Always use an array when the max index <= safe */
static int json_cfg_encode_sparse_array(lua_State *l)
{
- json_config_t *cfg;
- int val;
-
- json_verify_arg_count(l, 3);
- cfg = json_fetch_config(l);
-
- switch (lua_gettop(l)) {
- case 3:
- val = luaL_checkinteger(l, 3);
- luaL_argcheck(l, val >= 0, 3, "expected integer >= 0");
- cfg->encode_sparse_safe = val;
- case 2:
- val = luaL_checkinteger(l, 2);
- luaL_argcheck(l, val >= 0, 2, "expected integer >= 0");
- cfg->encode_sparse_ratio = val;
- case 1:
- luaL_argcheck(l, lua_isboolean(l, 1), 1, "expected boolean");
- cfg->encode_sparse_convert = lua_toboolean(l, 1);
- }
+ json_config_t *cfg = json_arg_init(l, 3);
- lua_pushboolean(l, cfg->encode_sparse_convert);
- lua_pushinteger(l, cfg->encode_sparse_ratio);
- lua_pushinteger(l, cfg->encode_sparse_safe);
+ json_enum_option(l, 1, &cfg->encode_sparse_convert, NULL, 1);
+ json_integer_option(l, 2, &cfg->encode_sparse_ratio, 0, INT_MAX);
+ json_integer_option(l, 3, &cfg->encode_sparse_safe, 0, INT_MAX);
return 3;
}
@@ -233,108 +280,80 @@ static int json_cfg_encode_sparse_array(lua_State *l)
* encoding */
static int json_cfg_encode_max_depth(lua_State *l)
{
- json_config_t *cfg;
- int depth;
+ json_config_t *cfg = json_arg_init(l, 1);
- json_verify_arg_count(l, 1);
- cfg = json_fetch_config(l);
-
- if (lua_gettop(l)) {
- depth = luaL_checkinteger(l, 1);
- luaL_argcheck(l, depth > 0, 1, "expected positive integer");
- cfg->encode_max_depth = depth;
- }
-
- lua_pushinteger(l, cfg->encode_max_depth);
-
- return 1;
+ return json_integer_option(l, 1, &cfg->encode_max_depth, 1, INT_MAX);
}
-static void json_set_number_precision(json_config_t *cfg, int prec)
+/* Configures the maximum number of nested arrays/objects allowed when
+ * encoding */
+static int json_cfg_decode_max_depth(lua_State *l)
{
- cfg->encode_number_precision = prec;
- sprintf(cfg->number_fmt, "%%.%dg", prec);
+ json_config_t *cfg = json_arg_init(l, 1);
+
+ return json_integer_option(l, 1, &cfg->decode_max_depth, 1, INT_MAX);
}
/* Configures number precision when converting doubles to text */
static int json_cfg_encode_number_precision(lua_State *l)
{
- json_config_t *cfg;
- int precision;
-
- json_verify_arg_count(l, 1);
- cfg = json_fetch_config(l);
-
- if (lua_gettop(l)) {
- precision = luaL_checkinteger(l, 1);
- luaL_argcheck(l, 1 <= precision && precision <= 14, 1,
- "expected integer between 1 and 14");
- json_set_number_precision(cfg, precision);
- }
+ json_config_t *cfg = json_arg_init(l, 1);
- lua_pushinteger(l, cfg->encode_number_precision);
-
- return 1;
+ return json_integer_option(l, 1, &cfg->encode_number_precision, 1, 14);
}
/* Configures JSON encoding buffer persistence */
static int json_cfg_encode_keep_buffer(lua_State *l)
{
- json_config_t *cfg;
+ json_config_t *cfg = json_arg_init(l, 1);
+ int old_value;
- json_verify_arg_count(l, 1);
- cfg = json_fetch_config(l);
+ old_value = cfg->encode_keep_buffer;
- if (lua_gettop(l)) {
- luaL_checktype(l, 1, LUA_TBOOLEAN);
- cfg->encode_keep_buffer = lua_toboolean(l, 1);
- }
+ json_enum_option(l, 1, &cfg->encode_keep_buffer, NULL, 1);
- lua_pushboolean(l, cfg->encode_keep_buffer);
+ /* Init / free the buffer if the setting has changed */
+ if (old_value ^ cfg->encode_keep_buffer) {
+ if (cfg->encode_keep_buffer)
+ strbuf_init(&cfg->encode_buf, 0);
+ else
+ strbuf_free(&cfg->encode_buf);
+ }
return 1;
}
-/* On argument: decode enum and set config variables
- * **options must point to a NULL terminated array of 4 enums
- * Returns: current enum value */
-static void json_enum_option(lua_State *l, const char **options,
- int *opt1, int *opt2)
+#if defined(DISABLE_INVALID_NUMBERS) && !defined(USE_INTERNAL_FPCONV)
+void json_verify_invalid_number_setting(lua_State *l, int *setting)
{
- int setting;
+ if (*setting == 1) {
+ *setting = 0;
+ luaL_error(l, "Infinity, NaN, and/or hexadecimal numbers are not supported.");
+ }
+}
+#else
+#define json_verify_invalid_number_setting(l, s) do { } while(0)
+#endif
- if (lua_gettop(l)) {
- if (lua_isboolean(l, 1))
- setting = lua_toboolean(l, 1) * 3;
- else
- setting = luaL_checkoption(l, 1, NULL, options);
+static int json_cfg_encode_invalid_numbers(lua_State *l)
+{
+ static const char *options[] = { "off", "on", "null", NULL };
+ json_config_t *cfg = json_arg_init(l, 1);
- *opt1 = setting & 1 ? 1 : 0;
- *opt2 = setting & 2 ? 1 : 0;
- } else {
- setting = *opt1 | (*opt2 << 1);
- }
+ json_enum_option(l, 1, &cfg->encode_invalid_numbers, options, 1);
- if (setting)
- lua_pushstring(l, options[setting]);
- else
- lua_pushboolean(l, 0);
-}
+ json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);
+ return 1;
+}
-/* When enabled, rejects: NaN, Infinity, hexidecimal numbers */
-static int json_cfg_refuse_invalid_numbers(lua_State *l)
+static int json_cfg_decode_invalid_numbers(lua_State *l)
{
- static const char *options_enc_dec[] = { "none", "encode", "decode",
- "both", NULL };
- json_config_t *cfg;
+ json_config_t *cfg = json_arg_init(l, 1);
- json_verify_arg_count(l, 1);
- cfg = json_fetch_config(l);
+ json_enum_option(l, 1, &cfg->decode_invalid_numbers, NULL, 1);
- json_enum_option(l, options_enc_dec,
- &cfg->encode_refuse_badnum,
- &cfg->decode_refuse_badnum);
+ json_verify_invalid_number_setting(l, &cfg->encode_invalid_numbers);
return 1;
}
@@ -364,16 +383,19 @@ static void json_create_config(lua_State *l)
lua_setfield(l, -2, "__gc");
lua_setmetatable(l, -2);
- strbuf_init(&cfg->encode_buf, 0);
-
cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;
cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;
cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;
- cfg->encode_max_depth = DEFAULT_MAX_DEPTH;
- cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM;
- cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM;
+ cfg->encode_max_depth = DEFAULT_ENCODE_MAX_DEPTH;
+ cfg->decode_max_depth = DEFAULT_DECODE_MAX_DEPTH;
+ cfg->encode_invalid_numbers = DEFAULT_ENCODE_INVALID_NUMBERS;
+ cfg->decode_invalid_numbers = DEFAULT_DECODE_INVALID_NUMBERS;
cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;
- json_set_number_precision(cfg, 14);
+ cfg->encode_number_precision = DEFAULT_ENCODE_NUMBER_PRECISION;
+
+#if DEFAULT_ENCODE_KEEP_BUFFER > 0
+ strbuf_init(&cfg->encode_buf, 0);
+#endif
/* Decoding init */
@@ -419,41 +441,15 @@ static void json_create_config(lua_State *l)
cfg->escape2char['f'] = '\f';
cfg->escape2char['r'] = '\r';
cfg->escape2char['u'] = 'u'; /* Unicode parsing required */
-
-
-#if 0
- /* Initialise separate storage for pre-generated escape codes.
- * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to
- * save memory. */
- for (i = 0 ; i < 32; i++)
- sprintf(cfg->escapes[i], "\\u%04x", i);
- strcpy(cfg->escapes[8], "\b"); /* Override simpler escapes */
- strcpy(cfg->escapes[9], "\t");
- strcpy(cfg->escapes[10], "\n");
- strcpy(cfg->escapes[12], "\f");
- strcpy(cfg->escapes[13], "\r");
- strcpy(cfg->escapes[32], "\\\""); /* chr(34) */
- strcpy(cfg->escapes[33], "\\\\"); /* chr(92) */
- sprintf(cfg->escapes[34], "\\u%04x", 127); /* char(127) */
-
- /* Initialise encoding escape lookup table */
- for (i = 0; i < 32; i++)
- cfg->char2escape[i] = cfg->escapes[i];
- for (i = 32; i < 256; i++)
- cfg->char2escape[i] = NULL;
- cfg->char2escape[34] = cfg->escapes[32];
- cfg->char2escape[92] = cfg->escapes[33];
- cfg->char2escape[127] = cfg->escapes[34];
-#endif
}
/* ===== ENCODING ===== */
-static void json_encode_exception(lua_State *l, json_config_t *cfg, int lindex,
+static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *json, int lindex,
const char *reason)
{
if (!cfg->encode_keep_buffer)
- strbuf_free(&cfg->encode_buf);
+ strbuf_free(json);
luaL_error(l, "Cannot serialise %s: %s",
lua_typename(l, lua_type(l, lindex)), reason);
}
@@ -494,7 +490,7 @@ static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
* -1 object (not a pure array)
* >=0 elements in array
*/
-static int lua_array_length(lua_State *l, json_config_t *cfg)
+static int lua_array_length(lua_State *l, json_config_t *cfg, strbuf_t *json)
{
double k;
int max;
@@ -529,7 +525,7 @@ static int lua_array_length(lua_State *l, json_config_t *cfg)
max > items * cfg->encode_sparse_ratio &&
max > cfg->encode_sparse_safe) {
if (!cfg->encode_sparse_convert)
- json_encode_exception(l, cfg, -1, "excessively sparse array");
+ json_encode_exception(l, cfg, json, -1, "excessively sparse array");
return -1;
}
@@ -537,31 +533,41 @@ static int lua_array_length(lua_State *l, json_config_t *cfg)
return max;
}
-static void json_encode_descend(lua_State *l, json_config_t *cfg)
+static void json_check_encode_depth(lua_State *l, json_config_t *cfg,
+ int current_depth, strbuf_t *json)
{
- cfg->current_depth++;
+ /* Ensure there are enough slots free to traverse a table (key,
+ * value) and push a string for a potential error message.
+ *
+ * Unlike "decode", the key and value are still on the stack when
+ * lua_checkstack() is called. Hence an extra slot for luaL_error()
+ * below is required just in case the next check to lua_checkstack()
+ * fails.
+ *
+ * While this won't cause a crash due to the EXTRA_STACK reserve
+ * slots, it would still be an improper use of the API. */
+ if (current_depth <= cfg->encode_max_depth && lua_checkstack(l, 3))
+ return;
- if (cfg->current_depth > cfg->encode_max_depth) {
- if (!cfg->encode_keep_buffer)
- strbuf_free(&cfg->encode_buf);
- luaL_error(l, "Cannot serialise, excessive nesting (%d)",
- cfg->current_depth);
- }
+ if (!cfg->encode_keep_buffer)
+ strbuf_free(json);
+
+ luaL_error(l, "Cannot serialise, excessive nesting (%d)",
+ current_depth);
}
-static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json);
+static void json_append_data(lua_State *l, json_config_t *cfg,
+ int current_depth, strbuf_t *json);
/* json_append_array args:
* - lua_State
* - JSON strbuf
* - Size of passwd Lua array (top of stack) */
-static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json,
- int array_length)
+static void json_append_array(lua_State *l, json_config_t *cfg, int current_depth,
+ strbuf_t *json, int array_length)
{
int comma, i;
- json_encode_descend(l, cfg);
-
strbuf_append_char(json, '[');
comma = 0;
@@ -572,38 +578,48 @@ static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json,
comma = 1;
lua_rawgeti(l, -1, i);
- json_append_data(l, cfg, json);
+ json_append_data(l, cfg, current_depth, json);
lua_pop(l, 1);
}
strbuf_append_char(json, ']');
-
- cfg->current_depth--;
}
-static void json_append_number(lua_State *l, strbuf_t *json, int index,
- json_config_t *cfg)
+static void json_append_number(lua_State *l, json_config_t *cfg,
+ strbuf_t *json, int lindex)
{
- double num = lua_tonumber(l, index);
+ double num = lua_tonumber(l, lindex);
+ int len;
- if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num)))
- json_encode_exception(l, cfg, index, "must not be NaN or Inf");
+ if (cfg->encode_invalid_numbers == 0) {
+ /* Prevent encoding invalid numbers */
+ if (isinf(num) || isnan(num))
+ json_encode_exception(l, cfg, json, lindex, "must not be NaN or Inf");
+ } else if (cfg->encode_invalid_numbers == 1) {
+ /* Encode invalid numbers, but handle "nan" separately
+ * since some platforms may encode as "-nan". */
+ if (isnan(num)) {
+ strbuf_append_mem(json, "nan", 3);
+ return;
+ }
+ } else {
+ /* Encode invalid numbers as "null" */
+ if (isinf(num) || isnan(num)) {
+ strbuf_append_mem(json, "null", 4);
+ return;
+ }
+ }
- /* Lowest double printed with %.14g is 21 characters long:
- * -1.7976931348623e+308
- *
- * Use 32 to include the \0, and a few extra just in case..
- */
- strbuf_append_fmt(json, 32, cfg->number_fmt, num);
+ strbuf_ensure_empty_length(json, FPCONV_G_FMT_BUFSIZE);
+ len = fpconv_g_fmt(strbuf_empty_ptr(json), num, cfg->encode_number_precision);
+ strbuf_extend_length(json, len);
}
static void json_append_object(lua_State *l, json_config_t *cfg,
- strbuf_t *json)
+ int current_depth, strbuf_t *json)
{
int comma, keytype;
- json_encode_descend(l, cfg);
-
/* Object */
strbuf_append_char(json, '{');
@@ -620,30 +636,29 @@ static void json_append_object(lua_State *l, json_config_t *cfg,
keytype = lua_type(l, -2);
if (keytype == LUA_TNUMBER) {
strbuf_append_char(json, '"');
- json_append_number(l, json, -2, cfg);
+ json_append_number(l, cfg, json, -2);
strbuf_append_mem(json, "\":", 2);
} else if (keytype == LUA_TSTRING) {
json_append_string(l, json, -2);
strbuf_append_char(json, ':');
} else {
- json_encode_exception(l, cfg, -2,
+ json_encode_exception(l, cfg, json, -2,
"table key must be a number or string");
/* never returns */
}
/* table, key, value */
- json_append_data(l, cfg, json);
+ json_append_data(l, cfg, current_depth, json);
lua_pop(l, 1);
/* table, key */
}
strbuf_append_char(json, '}');
-
- cfg->current_depth--;
}
/* Serialise Lua data into JSON string. */
-static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
+static void json_append_data(lua_State *l, json_config_t *cfg,
+ int current_depth, strbuf_t *json)
{
int len;
@@ -652,7 +667,7 @@ static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
json_append_string(l, json, -1);
break;
case LUA_TNUMBER:
- json_append_number(l, json, -1, cfg);
+ json_append_number(l, cfg, json, -1);
break;
case LUA_TBOOLEAN:
if (lua_toboolean(l, -1))
@@ -661,11 +676,13 @@ static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
strbuf_append_mem(json, "false", 5);
break;
case LUA_TTABLE:
- len = lua_array_length(l, cfg);
+ current_depth++;
+ json_check_encode_depth(l, cfg, current_depth, json);
+ len = lua_array_length(l, cfg, json);
if (len > 0)
- json_append_array(l, cfg, json, len);
+ json_append_array(l, cfg, current_depth, json, len);
else
- json_append_object(l, cfg, json);
+ json_append_object(l, cfg, current_depth, json);
break;
case LUA_TNIL:
strbuf_append_mem(json, "null", 4);
@@ -678,38 +695,38 @@ static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
default:
/* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
* and LUA_TLIGHTUSERDATA) cannot be serialised */
- json_encode_exception(l, cfg, -1, "type not supported");
+ json_encode_exception(l, cfg, json, -1, "type not supported");
/* never returns */
}
}
static int json_encode(lua_State *l)
{
- json_config_t *cfg;
+ json_config_t *cfg = json_fetch_config(l);
+ strbuf_t local_encode_buf;
+ strbuf_t *encode_buf;
char *json;
int len;
- /* Can't use json_verify_arg_count() since we need to ensure
- * there is only 1 argument */
luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
- cfg = json_fetch_config(l);
- cfg->current_depth = 0;
-
- /* Reset the persistent buffer if it exists.
- * Otherwise allocate a new buffer. */
- if (strbuf_allocated(&cfg->encode_buf))
- strbuf_reset(&cfg->encode_buf);
- else
- strbuf_init(&cfg->encode_buf, 0);
+ if (!cfg->encode_keep_buffer) {
+ /* Use private buffer */
+ encode_buf = &local_encode_buf;
+ strbuf_init(encode_buf, 0);
+ } else {
+ /* Reuse existing buffer */
+ encode_buf = &cfg->encode_buf;
+ strbuf_reset(encode_buf);
+ }
- json_append_data(l, cfg, &cfg->encode_buf);
- json = strbuf_string(&cfg->encode_buf, &len);
+ json_append_data(l, cfg, 0, encode_buf);
+ json = strbuf_string(encode_buf, &len);
lua_pushlstring(l, json, len);
if (!cfg->encode_keep_buffer)
- strbuf_free(&cfg->encode_buf);
+ strbuf_free(encode_buf);
return 1;
}
@@ -808,7 +825,7 @@ static int json_append_unicode_escape(json_parse_t *json)
int escape_len = 6;
/* Fetch UTF-16 code unit */
- codepoint = decode_hex4(&json->data[json->index + 2]);
+ codepoint = decode_hex4(json->ptr + 2);
if (codepoint < 0)
return -1;
@@ -824,13 +841,13 @@ static int json_append_unicode_escape(json_parse_t *json)
return -1;
/* Ensure the next code is a unicode escape */
- if (json->data[json->index + escape_len] != '\\' ||
- json->data[json->index + escape_len + 1] != 'u') {
+ if (*(json->ptr + escape_len) != '\\' ||
+ *(json->ptr + escape_len + 1) != 'u') {
return -1;
}
/* Fetch the next codepoint */
- surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
+ surrogate_low = decode_hex4(json->ptr + 2 + escape_len);
if (surrogate_low < 0)
return -1;
@@ -852,7 +869,7 @@ static int json_append_unicode_escape(json_parse_t *json)
/* Append bytes and advance parse index */
strbuf_append_mem_unsafe(json->tmp, utf8, len);
- json->index += escape_len;
+ json->ptr += escape_len;
return 0;
}
@@ -861,7 +878,7 @@ static void json_set_token_error(json_token_t *token, json_parse_t *json,
const char *errtype)
{
token->type = T_ERROR;
- token->index = json->index;
+ token->index = json->ptr - json->data;
token->value.string = errtype;
}
@@ -871,15 +888,18 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
char ch;
/* Caller must ensure a string is next */
- assert(json->data[json->index] == '"');
+ assert(*json->ptr == '"');
/* Skip " */
- json->index++;
+ json->ptr++;
/* json->tmp is the temporary strbuf used to accumulate the
- * decoded string value. */
+ * decoded string value.
+ * json->tmp is sized to handle JSON containing only a string value.
+ */
strbuf_reset(json->tmp);
- while ((ch = json->data[json->index]) != '"') {
+
+ while ((ch = *json->ptr) != '"') {
if (!ch) {
/* Premature end of the string */
json_set_token_error(token, json, "unexpected end of string");
@@ -889,7 +909,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
/* Handle escapes */
if (ch == '\\') {
/* Fetch escape character */
- ch = json->data[json->index + 1];
+ ch = *(json->ptr + 1);
/* Translate escape code and append to tmp string */
ch = escape2char[(unsigned char)ch];
@@ -907,14 +927,14 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
}
/* Skip '\' */
- json->index++;
+ json->ptr++;
}
/* Append normal character or translated single character
* Unicode escapes are handled above */
strbuf_append_char_unsafe(json->tmp, ch);
- json->index++;
+ json->ptr++;
}
- json->index++; /* Eat final quote (") */
+ json->ptr++; /* Eat final quote (") */
strbuf_ensure_null(json->tmp);
@@ -928,7 +948,7 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
* json_next_number_token() uses strtod() which allows other forms:
* - numbers starting with '+'
* - NaN, -NaN, infinity, -infinity
- * - hexidecimal numbers
+ * - hexadecimal numbers
* - numbers with leading zeros
*
* json_is_invalid_number() detects "numbers" which may pass strtod()'s
@@ -939,34 +959,33 @@ static void json_next_string_token(json_parse_t *json, json_token_t *token)
*/
static int json_is_invalid_number(json_parse_t *json)
{
- int i = json->index;
+ const char *p = json->ptr;
/* Reject numbers starting with + */
- if (json->data[i] == '+')
+ if (*p == '+')
return 1;
/* Skip minus sign if it exists */
- if (json->data[i] == '-')
- i++;
+ if (*p == '-')
+ p++;
/* Reject numbers starting with 0x, or leading zeros */
- if (json->data[i] == '0') {
- int ch2 = json->data[i + 1];
+ if (*p == '0') {
+ int ch2 = *(p + 1);
if ((ch2 | 0x20) == 'x' || /* Hex */
('0' <= ch2 && ch2 <= '9')) /* Leading zero */
return 1;
return 0;
- } else if (json->data[i] <= '9') {
+ } else if (*p <= '9') {
return 0; /* Ordinary number */
}
-
/* Reject inf/nan */
- if (!strncasecmp(&json->data[i], "inf", 3))
+ if (!strncasecmp(p, "inf", 3))
return 1;
- if (!strncasecmp(&json->data[i], "nan", 3))
+ if (!strncasecmp(p, "nan", 3))
return 1;
/* Pass all other numbers which may still be invalid, but
@@ -976,35 +995,39 @@ static int json_is_invalid_number(json_parse_t *json)
static void json_next_number_token(json_parse_t *json, json_token_t *token)
{
- const char *startptr;
char *endptr;
token->type = T_NUMBER;
- startptr = &json->data[json->index];
- token->value.number = strtod(&json->data[json->index], &endptr);
- if (startptr == endptr)
+ token->value.number = fpconv_strtod(json->ptr, &endptr);
+ if (json->ptr == endptr)
json_set_token_error(token, json, "invalid number");
else
- json->index += endptr - startptr; /* Skip the processed number */
+ json->ptr = endptr; /* Skip the processed number */
return;
}
/* Fills in the token struct.
* T_STRING will return a pointer to the json_parse_t temporary string
- * T_ERROR will leave the json->index pointer at the error.
+ * T_ERROR will leave the json->ptr pointer at the error.
*/
static void json_next_token(json_parse_t *json, json_token_t *token)
{
- json_token_type_t *ch2token = json->cfg->ch2token;
+ const json_token_type_t *ch2token = json->cfg->ch2token;
int ch;
- /* Eat whitespace. FIXME: UGLY */
- token->type = ch2token[(unsigned char)json->data[json->index]];
- while (token->type == T_WHITESPACE)
- token->type = ch2token[(unsigned char)json->data[++json->index]];
+ /* Eat whitespace. */
+ while (1) {
+ ch = (unsigned char)*(json->ptr);
+ token->type = ch2token[ch];
+ if (token->type != T_WHITESPACE)
+ break;
+ json->ptr++;
+ }
- token->index = json->index;
+ /* Store location of new token. Required when throwing errors
+ * for unexpected tokens (syntax errors). */
+ token->index = json->ptr - json->data;
/* Don't advance the pointer for an error or the end */
if (token->type == T_ERROR) {
@@ -1018,14 +1041,13 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
/* Found a known single character token, advance index and return */
if (token->type != T_UNKNOWN) {
- json->index++;
+ json->ptr++;
return;
}
- /* Process characters which triggered T_UNKNOWN */
- ch = json->data[json->index];
-
- /* Must use strncmp() to match the front of the JSON string.
+ /* Process characters which triggered T_UNKNOWN
+ *
+ * Must use strncmp() to match the front of the JSON string.
* JSON identifier must be lowercase.
* When strict_numbers if disabled, either case is allowed for
* Infinity/NaN (since we are no longer following the spec..) */
@@ -1033,29 +1055,29 @@ static void json_next_token(json_parse_t *json, json_token_t *token)
json_next_string_token(json, token);
return;
} else if (ch == '-' || ('0' <= ch && ch <= '9')) {
- if (json->cfg->decode_refuse_badnum && json_is_invalid_number(json)) {
+ if (!json->cfg->decode_invalid_numbers && json_is_invalid_number(json)) {
json_set_token_error(token, json, "invalid number");
return;
}
json_next_number_token(json, token);
return;
- } else if (!strncmp(&json->data[json->index], "true", 4)) {
+ } else if (!strncmp(json->ptr, "true", 4)) {
token->type = T_BOOLEAN;
token->value.boolean = 1;
- json->index += 4;
+ json->ptr += 4;
return;
- } else if (!strncmp(&json->data[json->index], "false", 5)) {
+ } else if (!strncmp(json->ptr, "false", 5)) {
token->type = T_BOOLEAN;
token->value.boolean = 0;
- json->index += 5;
+ json->ptr += 5;
return;
- } else if (!strncmp(&json->data[json->index], "null", 4)) {
+ } else if (!strncmp(json->ptr, "null", 4)) {
token->type = T_NULL;
- json->index += 4;
+ json->ptr += 4;
return;
- } else if (!json->cfg->decode_refuse_badnum &&
+ } else if (json->cfg->decode_invalid_numbers &&
json_is_invalid_number(json)) {
- /* When refuse_badnum is disabled, only attempt to process
+ /* When decode_invalid_numbers is enabled, only attempt to process
* numbers we know are invalid JSON (Inf, NaN, hex)
* This is required to generate an appropriate token error,
* otherwise all bad tokens will register as "invalid number"
@@ -1091,13 +1113,23 @@ static void json_throw_parse_error(lua_State *l, json_parse_t *json,
exp, found, token->index + 1);
}
-static void json_decode_checkstack(lua_State *l, json_parse_t *json, int n)
+static inline void json_decode_ascend(json_parse_t *json)
{
- if (lua_checkstack(l, n))
+ json->current_depth--;
+}
+
+static void json_decode_descend(lua_State *l, json_parse_t *json, int slots)
+{
+ json->current_depth++;
+
+ if (json->current_depth <= json->cfg->decode_max_depth &&
+ lua_checkstack(l, slots)) {
return;
+ }
strbuf_free(json->tmp);
- luaL_error(l, "Too many nested data structures");
+ luaL_error(l, "Found too many nested data structures (%d) at character %d",
+ json->current_depth, json->ptr - json->data);
}
static void json_parse_object_context(lua_State *l, json_parse_t *json)
@@ -1106,7 +1138,7 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
/* 3 slots required:
* .., table, key, value */
- json_decode_checkstack(l, json, 3);
+ json_decode_descend(l, json, 3);
lua_newtable(l);
@@ -1114,6 +1146,7 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
/* Handle empty objects */
if (token.type == T_OBJ_END) {
+ json_decode_ascend(json);
return;
}
@@ -1137,8 +1170,10 @@ static void json_parse_object_context(lua_State *l, json_parse_t *json)
json_next_token(json, &token);
- if (token.type == T_OBJ_END)
+ if (token.type == T_OBJ_END) {
+ json_decode_ascend(json);
return;
+ }
if (token.type != T_COMMA)
json_throw_parse_error(l, json, "comma or object end", &token);
@@ -1155,15 +1190,17 @@ static void json_parse_array_context(lua_State *l, json_parse_t *json)
/* 2 slots required:
* .., table, value */
- json_decode_checkstack(l, json, 2);
+ json_decode_descend(l, json, 2);
lua_newtable(l);
json_next_token(json, &token);
/* Handle empty arrays */
- if (token.type == T_ARR_END)
+ if (token.type == T_ARR_END) {
+ json_decode_ascend(json);
return;
+ }
for (i = 1; ; i++) {
json_process_value(l, json, &token);
@@ -1171,8 +1208,10 @@ static void json_parse_array_context(lua_State *l, json_parse_t *json)
json_next_token(json, &token);
- if (token.type == T_ARR_END)
+ if (token.type == T_ARR_END) {
+ json_decode_ascend(json);
return;
+ }
if (token.type != T_COMMA)
json_throw_parse_error(l, json, "comma or array end", &token);
@@ -1211,15 +1250,26 @@ static void json_process_value(lua_State *l, json_parse_t *json,
}
}
-/* json_text must be null terminated string */
-static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
+static int json_decode(lua_State *l)
{
json_parse_t json;
json_token_t token;
+ size_t json_len;
+
+ luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
json.cfg = json_fetch_config(l);
- json.data = json_text;
- json.index = 0;
+ json.data = luaL_checklstring(l, 1, &json_len);
+ json.current_depth = 0;
+ json.ptr = json.data;
+
+ /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
+ *
+ * CJSON can support any simple data type, hence only the first
+ * character is guaranteed to be ASCII (at worst: '"'). This is
+ * still enough to detect whether the wrong encoding is in use. */
+ if (json_len >= 2 && (!json.data[0] || !json.data[1]))
+ luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
/* Ensure the temporary buffer can hold the entire string.
* This means we no longer need to do length checks since the decoded
@@ -1236,64 +1286,142 @@ static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
json_throw_parse_error(l, &json, "the end", &token);
strbuf_free(json.tmp);
+
+ return 1;
}
-static int json_decode(lua_State *l)
+/* ===== INITIALISATION ===== */
+
+#if !defined(LUA_VERSION_NUM) || LUA_VERSION_NUM < 502
+/* Compatibility for Lua 5.1.
+ *
+ * luaL_setfuncs() is used to create a module table where the functions have
+ * json_config_t as their first upvalue. Code borrowed from Lua 5.2 source. */
+static void luaL_setfuncs (lua_State *l, const luaL_Reg *reg, int nup)
{
- const char *json;
- size_t len;
+ int i;
- json_verify_arg_count(l, 1);
+ luaL_checkstack(l, nup, "too many upvalues");
+ for (; reg->name != NULL; reg++) { /* fill the table with given functions */
+ for (i = 0; i < nup; i++) /* copy upvalues to the top */
+ lua_pushvalue(l, -nup);
+ lua_pushcclosure(l, reg->func, nup); /* closure with those upvalues */
+ lua_setfield(l, -(nup + 2), reg->name);
+ }
+ lua_pop(l, nup); /* remove upvalues */
+}
+#endif
- json = luaL_checklstring(l, 1, &len);
+/* Call target function in protected mode with all supplied args.
+ * Assumes target function only returns a single non-nil value.
+ * Convert and return thrown errors as: nil, "error message" */
+static int json_protect_conversion(lua_State *l)
+{
+ int err;
- /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
- *
- * CJSON can support any simple data type, hence only the first
- * character is guaranteed to be ASCII (at worst: '"'). This is
- * still enough to detect whether the wrong encoding is in use. */
- if (len >= 2 && (!json[0] || !json[1]))
- luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
+ /* Deliberately throw an error for invalid arguments */
+ luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
- lua_json_decode(l, json, len);
+ /* pcall() the function stored as upvalue(1) */
+ lua_pushvalue(l, lua_upvalueindex(1));
+ lua_insert(l, 1);
+ err = lua_pcall(l, 1, 1, 0);
+ if (!err)
+ return 1;
- return 1;
-}
+ if (err == LUA_ERRRUN) {
+ lua_pushnil(l);
+ lua_insert(l, -2);
+ return 2;
+ }
-/* ===== INITIALISATION ===== */
+ /* Since we are not using a custom error handler, the only remaining
+ * errors are memory related */
+ return luaL_error(l, "Memory allocation error in CJSON protected call");
+}
-int luaopen_cjson(lua_State *l)
+/* Return cjson module table */
+static int lua_cjson_new(lua_State *l)
{
luaL_Reg reg[] = {
{ "encode", json_encode },
{ "decode", json_decode },
{ "encode_sparse_array", json_cfg_encode_sparse_array },
{ "encode_max_depth", json_cfg_encode_max_depth },
+ { "decode_max_depth", json_cfg_decode_max_depth },
{ "encode_number_precision", json_cfg_encode_number_precision },
{ "encode_keep_buffer", json_cfg_encode_keep_buffer },
- { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers },
+ { "encode_invalid_numbers", json_cfg_encode_invalid_numbers },
+ { "decode_invalid_numbers", json_cfg_decode_invalid_numbers },
+ { "new", lua_cjson_new },
{ NULL, NULL }
};
- /* Use json_fetch_config as a pointer.
- * It's faster than using a config string, and more unique */
- lua_pushlightuserdata(l, &json_config_key);
- json_create_config(l);
- lua_settable(l, LUA_REGISTRYINDEX);
+ /* Initialise number conversions */
+ fpconv_init();
- luaL_register(l, "cjson", reg);
+ /* cjson module table */
+ lua_newtable(l);
+
+ /* Register functions with config data as upvalue */
+ json_create_config(l);
+ luaL_setfuncs(l, reg, 1);
/* Set cjson.null */
lua_pushlightuserdata(l, NULL);
lua_setfield(l, -2, "null");
- /* Set cjson.version */
- lua_pushliteral(l, VERSION);
- lua_setfield(l, -2, "version");
+ /* Set module name / version fields */
+ lua_pushliteral(l, CJSON_MODNAME);
+ lua_setfield(l, -2, "_NAME");
+ lua_pushliteral(l, CJSON_VERSION);
+ lua_setfield(l, -2, "_VERSION");
+
+ return 1;
+}
+
+/* Return cjson.safe module table */
+static int lua_cjson_safe_new(lua_State *l)
+{
+ const char *func[] = { "decode", "encode", NULL };
+ int i;
+
+ lua_cjson_new(l);
+
+ /* Fix new() method */
+ lua_pushcfunction(l, lua_cjson_safe_new);
+ lua_setfield(l, -2, "new");
+
+ for (i = 0; func[i]; i++) {
+ lua_getfield(l, -1, func[i]);
+ lua_pushcclosure(l, json_protect_conversion, 1);
+ lua_setfield(l, -2, func[i]);
+ }
+
+ return 1;
+}
+
+int luaopen_cjson(lua_State *l)
+{
+ lua_cjson_new(l);
+
+#ifdef ENABLE_CJSON_GLOBAL
+ /* Register a global "cjson" table. */
+ lua_pushvalue(l, -1);
+ lua_setglobal(l, CJSON_MODNAME);
+#endif
/* Return cjson table */
return 1;
}
+int luaopen_cjson_safe(lua_State *l)
+{
+ lua_cjson_safe_new(l);
+
+ /* Return cjson.safe table */
+ return 1;
+}
+
/* vi:ai et sw=4 ts=4:
*/
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index 53dc1cf61..90a388f3f 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -7,14 +7,38 @@
#include "lua.h"
#include "lauxlib.h"
-#define LUACMSGPACK_VERSION "lua-cmsgpack 0.3.0"
+#define LUACMSGPACK_NAME "cmsgpack"
+#define LUACMSGPACK_SAFE_NAME "cmsgpack_safe"
+#define LUACMSGPACK_VERSION "lua-cmsgpack 0.4.0"
#define LUACMSGPACK_COPYRIGHT "Copyright (C) 2012, Salvatore Sanfilippo"
#define LUACMSGPACK_DESCRIPTION "MessagePack C implementation for Lua"
-#define LUACMSGPACK_MAX_NESTING 16 /* Max tables nesting. */
+/* Allows a preprocessor directive to override MAX_NESTING */
+#ifndef LUACMSGPACK_MAX_NESTING
+ #define LUACMSGPACK_MAX_NESTING 16 /* Max tables nesting. */
+#endif
-/* ==============================================================================
- * MessagePack implementation and bindings for Lua 5.1.
+/* Check if float or double can be an integer without loss of precision */
+#define IS_INT_TYPE_EQUIVALENT(x, T) (!isinf(x) && (T)(x) == (x))
+
+#define IS_INT64_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int64_t)
+#define IS_INT_EQUIVALENT(x) IS_INT_TYPE_EQUIVALENT(x, int)
+
+/* If size of pointer is equal to a 4 byte integer, we're on 32 bits. */
+#if UINTPTR_MAX == UINT_MAX
+ #define BITS_32 1
+#else
+ #define BITS_32 0
+#endif
+
+#if BITS_32
+ #define lua_pushunsigned(L, n) lua_pushnumber(L, n)
+#else
+ #define lua_pushunsigned(L, n) lua_pushinteger(L, n)
+#endif
+
+/* =============================================================================
+ * MessagePack implementation and bindings for Lua 5.1/5.2.
* Copyright(C) 2012 Salvatore Sanfilippo <antirez@gmail.com>
*
* http://github.com/antirez/lua-cmsgpack
@@ -29,23 +53,27 @@
* 20-Feb-2012 (ver 0.2.0): Tables encoding improved.
* 20-Feb-2012 (ver 0.2.1): Minor bug fixing.
* 20-Feb-2012 (ver 0.3.0): Module renamed lua-cmsgpack (was lua-msgpack).
- * ============================================================================ */
+ * 04-Apr-2014 (ver 0.3.1): Lua 5.2 support and minor bug fix.
+ * 07-Apr-2014 (ver 0.4.0): Multiple pack/unpack, lua allocator, efficiency.
+ * ========================================================================== */
-/* --------------------------- Endian conversion --------------------------------
- * We use it only for floats and doubles, all the other conversions are performed
+/* -------------------------- Endian conversion --------------------------------
+ * We use it only for floats and doubles, all the other conversions performed
* in an endian independent fashion. So the only thing we need is a function
- * that swaps a binary string if the arch is little endian (and left it untouched
+ * that swaps a binary string if arch is little endian (and left it untouched
* otherwise). */
/* Reverse memory bytes if arch is little endian. Given the conceptual
- * simplicity of the Lua build system we prefer to check for endianess at runtime.
+ * simplicity of the Lua build system we prefer check for endianess at runtime.
* The performance difference should be acceptable. */
-static void memrevifle(void *ptr, size_t len) {
- unsigned char *p = ptr, *e = p+len-1, aux;
+void memrevifle(void *ptr, size_t len) {
+ unsigned char *p = (unsigned char *)ptr,
+ *e = (unsigned char *)p+len-1,
+ aux;
int test = 1;
unsigned char *testp = (unsigned char*) &test;
- if (testp[0] == 0) return; /* Big endian, nothign to do. */
+ if (testp[0] == 0) return; /* Big endian, nothing to do. */
len /= 2;
while(len--) {
aux = *p;
@@ -56,8 +84,8 @@ static void memrevifle(void *ptr, size_t len) {
}
}
-/* ----------------------------- String buffer ----------------------------------
- * This is a simple implementation of string buffers. The only opereation
+/* ---------------------------- String buffer ----------------------------------
+ * This is a simple implementation of string buffers. The only operation
* supported is creating empty buffers and appending bytes to it.
* The string buffer uses 2x preallocation on every realloc for O(N) append
* behavior. */
@@ -67,32 +95,44 @@ typedef struct mp_buf {
size_t len, free;
} mp_buf;
-static mp_buf *mp_buf_new(void) {
- mp_buf *buf = malloc(sizeof(*buf));
-
+void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
+ void *(*local_realloc) (void *, void *, size_t osize, size_t nsize) = NULL;
+ void *ud;
+
+ local_realloc = lua_getallocf(L, &ud);
+
+ return local_realloc(ud, target, osize, nsize);
+}
+
+mp_buf *mp_buf_new(lua_State *L) {
+ mp_buf *buf = NULL;
+
+ /* Old size = 0; new size = sizeof(*buf) */
+ buf = (mp_buf*)mp_realloc(L, NULL, 0, sizeof(*buf));
+
buf->b = NULL;
buf->len = buf->free = 0;
return buf;
}
-void mp_buf_append(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_buf_append(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
if (buf->free < len) {
- size_t newlen = buf->len+len;
+ size_t newsize = (buf->len+len)*2;
- buf->b = realloc(buf->b,newlen*2);
- buf->free = newlen;
+ buf->b = (unsigned char*)mp_realloc(L, buf->b, buf->len + buf->free, newsize);
+ buf->free = newsize - buf->len;
}
memcpy(buf->b+buf->len,s,len);
buf->len += len;
buf->free -= len;
}
-void mp_buf_free(mp_buf *buf) {
- free(buf->b);
- free(buf);
+void mp_buf_free(lua_State *L, mp_buf *buf) {
+ mp_realloc(L, buf->b, buf->len + buf->free, 0); /* realloc to 0 = free */
+ mp_realloc(L, buf, sizeof(*buf), 0);
}
-/* ------------------------------ String cursor ----------------------------------
+/* ---------------------------- String cursor ----------------------------------
* This simple data structure is used for parsing. Basically you create a cursor
* using a string pointer and a length, then it is possible to access the
* current string position with cursor->p, check the remaining length
@@ -102,7 +142,7 @@ void mp_buf_free(mp_buf *buf) {
* be used to report errors. */
#define MP_CUR_ERROR_NONE 0
-#define MP_CUR_ERROR_EOF 1 /* Not enough data to complete the opereation. */
+#define MP_CUR_ERROR_EOF 1 /* Not enough data to complete operation. */
#define MP_CUR_ERROR_BADFMT 2 /* Bad data format */
typedef struct mp_cur {
@@ -111,22 +151,15 @@ typedef struct mp_cur {
int err;
} mp_cur;
-static mp_cur *mp_cur_new(const unsigned char *s, size_t len) {
- mp_cur *cursor = malloc(sizeof(*cursor));
-
+void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
cursor->p = s;
cursor->left = len;
cursor->err = MP_CUR_ERROR_NONE;
- return cursor;
-}
-
-static void mp_cur_free(mp_cur *cursor) {
- free(cursor);
}
#define mp_cur_consume(_c,_len) do { _c->p += _len; _c->left -= _len; } while(0)
-/* When there is not enough room we set an error in the cursor and return, this
+/* When there is not enough room we set an error in the cursor and return. This
* is very common across the code so we have a macro to make the code look
* a bit simpler. */
#define mp_cur_need(_c,_len) do { \
@@ -136,15 +169,19 @@ static void mp_cur_free(mp_cur *cursor) {
} \
} while(0)
-/* --------------------------- Low level MP encoding -------------------------- */
+/* ------------------------- Low level MP encoding -------------------------- */
-static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_encode_bytes(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
unsigned char hdr[5];
int hdrlen;
if (len < 32) {
hdr[0] = 0xa0 | (len&0xff); /* fix raw */
hdrlen = 1;
+ } else if (len <= 0xff) {
+ hdr[0] = 0xd9;
+ hdr[1] = len;
+ hdrlen = 2;
} else if (len <= 0xffff) {
hdr[0] = 0xda;
hdr[1] = (len&0xff00)>>8;
@@ -158,12 +195,12 @@ static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
hdr[4] = len&0xff;
hdrlen = 5;
}
- mp_buf_append(buf,hdr,hdrlen);
- mp_buf_append(buf,s,len);
+ mp_buf_append(L,buf,hdr,hdrlen);
+ mp_buf_append(L,buf,s,len);
}
/* we assume IEEE 754 internal format for single and double precision floats. */
-static void mp_encode_double(mp_buf *buf, double d) {
+void mp_encode_double(lua_State *L, mp_buf *buf, double d) {
unsigned char b[9];
float f = d;
@@ -172,16 +209,16 @@ static void mp_encode_double(mp_buf *buf, double d) {
b[0] = 0xca; /* float IEEE 754 */
memcpy(b+1,&f,4);
memrevifle(b+1,4);
- mp_buf_append(buf,b,5);
+ mp_buf_append(L,buf,b,5);
} else if (sizeof(d) == 8) {
b[0] = 0xcb; /* double IEEE 754 */
memcpy(b+1,&d,8);
memrevifle(b+1,8);
- mp_buf_append(buf,b,9);
+ mp_buf_append(L,buf,b,9);
}
}
-static void mp_encode_int(mp_buf *buf, int64_t n) {
+void mp_encode_int(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[9];
int enclen;
@@ -219,7 +256,7 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
}
} else {
if (n >= -32) {
- b[0] = ((char)n); /* negative fixnum */
+ b[0] = ((signed char)n); /* negative fixnum */
enclen = 1;
} else if (n >= -128) {
b[0] = 0xd0; /* int 8 */
@@ -250,10 +287,10 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
enclen = 9;
}
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
-static void mp_encode_array(mp_buf *buf, int64_t n) {
+void mp_encode_array(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -273,10 +310,10 @@ static void mp_encode_array(mp_buf *buf, int64_t n) {
b[4] = n & 0xff;
enclen = 5;
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
-static void mp_encode_map(mp_buf *buf, int64_t n) {
+void mp_encode_map(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -296,41 +333,58 @@ static void mp_encode_map(mp_buf *buf, int64_t n) {
b[4] = n & 0xff;
enclen = 5;
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
-/* ----------------------------- Lua types encoding --------------------------- */
+/* --------------------------- Lua types encoding --------------------------- */
-static void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
size_t len;
const char *s;
s = lua_tolstring(L,-1,&len);
- mp_encode_bytes(buf,(const unsigned char*)s,len);
+ mp_encode_bytes(L,buf,(const unsigned char*)s,len);
}
-static void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
unsigned char b = lua_toboolean(L,-1) ? 0xc3 : 0xc2;
- mp_buf_append(buf,&b,1);
+ mp_buf_append(L,buf,&b,1);
+}
+
+/* Lua 5.3 has a built in 64-bit integer type */
+void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
+#if (LUA_VERSION_NUM < 503) && BITS_32
+ lua_Number i = lua_tonumber(L,-1);
+#else
+ lua_Integer i = lua_tointeger(L,-1);
+#endif
+ mp_encode_int(L, buf, (int64_t)i);
}
-static void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
+/* Lua 5.2 and lower only has 64-bit doubles, so we need to
+ * detect if the double may be representable as an int
+ * for Lua < 5.3 */
+void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
lua_Number n = lua_tonumber(L,-1);
- if (floor(n) != n) {
- mp_encode_double(buf,(double)n);
+ if (IS_INT64_EQUIVALENT(n)) {
+ mp_encode_lua_integer(L, buf);
} else {
- mp_encode_int(buf,(int64_t)n);
+ mp_encode_double(L,buf,(double)n);
}
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
/* Convert a lua table into a message pack list. */
-static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
+#if LUA_VERSION_NUM < 502
size_t len = lua_objlen(L,-1), j;
+#else
+ size_t len = lua_rawlen(L,-1), j;
+#endif
- mp_encode_array(buf,len);
+ mp_encode_array(L,buf,len);
for (j = 1; j <= len; j++) {
lua_pushnumber(L,j);
lua_gettable(L,-2);
@@ -339,13 +393,13 @@ static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
}
/* Convert a lua table into a message pack key-value map. */
-static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
size_t len = 0;
/* First step: count keys into table. No other way to do it with the
* Lua API, we need to iterate a first time. Note that an alternative
* would be to do a single run, and then hack the buffer to insert the
- * map opcodes for message pack. Too hachish for this lib. */
+ * map opcodes for message pack. Too hackish for this lib. */
lua_pushnil(L);
while(lua_next(L,-2)) {
lua_pop(L,1); /* remove value, keep key for next iteration. */
@@ -353,7 +407,7 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
}
/* Step two: actually encoding of the map. */
- mp_encode_map(buf,len);
+ mp_encode_map(L,buf,len);
lua_pushnil(L);
while(lua_next(L,-2)) {
/* Stack: ... key value */
@@ -366,80 +420,130 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
/* Returns true if the Lua table on top of the stack is exclusively composed
* of keys from numerical keys from 1 up to N, with N being the total number
* of elements, without any hole in the middle. */
-static int table_is_an_array(lua_State *L) {
- long count = 0, max = 0, idx = 0;
+int table_is_an_array(lua_State *L) {
+ int count = 0, max = 0;
+#if LUA_VERSION_NUM < 503
lua_Number n;
+#else
+ lua_Integer n;
+#endif
+
+ /* Stack top on function entry */
+ int stacktop;
+
+ stacktop = lua_gettop(L);
lua_pushnil(L);
while(lua_next(L,-2)) {
/* Stack: ... key value */
lua_pop(L,1); /* Stack: ... key */
- if (lua_type(L,-1) != LUA_TNUMBER) goto not_array;
- n = lua_tonumber(L,-1);
- idx = n;
- if (idx != n || idx < 1) goto not_array;
+ /* The <= 0 check is valid here because we're comparing indexes. */
+#if LUA_VERSION_NUM < 503
+ if ((LUA_TNUMBER != lua_type(L,-1)) || (n = lua_tonumber(L, -1)) <= 0 ||
+ !IS_INT_EQUIVALENT(n))
+#else
+ if (!lua_isinteger(L,-1) || (n = lua_tointeger(L, -1)) <= 0)
+#endif
+ {
+ lua_settop(L, stacktop);
+ return 0;
+ }
+ max = (n > max ? n : max);
count++;
- max = idx;
}
/* We have the total number of elements in "count". Also we have
- * the max index encountered in "idx". We can't reach this code
+ * the max index encountered in "max". We can't reach this code
* if there are indexes <= 0. If you also note that there can not be
- * repeated keys into a table, you have that if idx==count you are sure
+ * repeated keys into a table, you have that if max==count you are sure
* that there are all the keys form 1 to count (both included). */
- return idx == count;
-
-not_array:
- lua_pop(L,1);
- return 0;
+ lua_settop(L, stacktop);
+ return max == count;
}
/* If the length operator returns non-zero, that is, there is at least
* an object at key '1', we serialize to message pack list. Otherwise
* we use a map. */
-static void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
if (table_is_an_array(L))
mp_encode_lua_table_as_array(L,buf,level);
else
mp_encode_lua_table_as_map(L,buf,level);
}
-static void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
unsigned char b[1];
b[0] = 0xc0;
- mp_buf_append(buf,b,1);
+ mp_buf_append(L,buf,b,1);
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
int t = lua_type(L,-1);
- /* Limit the encoding of nested tables to a specfiied maximum depth, so that
+ /* Limit the encoding of nested tables to a specified maximum depth, so that
* we survive when called against circular references in tables. */
if (t == LUA_TTABLE && level == LUACMSGPACK_MAX_NESTING) t = LUA_TNIL;
switch(t) {
case LUA_TSTRING: mp_encode_lua_string(L,buf); break;
case LUA_TBOOLEAN: mp_encode_lua_bool(L,buf); break;
- case LUA_TNUMBER: mp_encode_lua_number(L,buf); break;
+ case LUA_TNUMBER:
+ #if LUA_VERSION_NUM < 503
+ mp_encode_lua_number(L,buf); break;
+ #else
+ if (lua_isinteger(L, -1)) {
+ mp_encode_lua_integer(L, buf);
+ } else {
+ mp_encode_lua_number(L, buf);
+ }
+ break;
+ #endif
case LUA_TTABLE: mp_encode_lua_table(L,buf,level); break;
default: mp_encode_lua_null(L,buf); break;
}
lua_pop(L,1);
}
-static int mp_pack(lua_State *L) {
- mp_buf *buf = mp_buf_new();
+/*
+ * Packs all arguments as a stream for multiple upacking later.
+ * Returns error if no arguments provided.
+ */
+int mp_pack(lua_State *L) {
+ int nargs = lua_gettop(L);
+ int i;
+ mp_buf *buf;
+
+ if (nargs == 0)
+ return luaL_argerror(L, 0, "MessagePack pack needs input.");
+
+ buf = mp_buf_new(L);
+ for(i = 1; i <= nargs; i++) {
+ /* Copy argument i to top of stack for _encode processing;
+ * the encode function pops it from the stack when complete. */
+ lua_pushvalue(L, i);
+
+ mp_encode_lua_type(L,buf,0);
+
+ lua_pushlstring(L,(char*)buf->b,buf->len);
+
+ /* Reuse the buffer for the next operation by
+ * setting its free count to the total buffer size
+ * and the current position to zero. */
+ buf->free += buf->len;
+ buf->len = 0;
+ }
+ mp_buf_free(L, buf);
- mp_encode_lua_type(L,buf,0);
- lua_pushlstring(L,(char*)buf->b,buf->len);
- mp_buf_free(buf);
+ /* Concatenate all nargs buffers together */
+ lua_concat(L, nargs);
return 1;
}
-/* --------------------------------- Decoding --------------------------------- */
+/* ------------------------------- Decoding --------------------------------- */
void mp_decode_to_lua_type(lua_State *L, mp_cur *c);
void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
int index = 1;
lua_newtable(L);
@@ -452,6 +556,7 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
}
void mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {
+ assert(len <= UINT_MAX);
lua_newtable(L);
while(len--) {
mp_decode_to_lua_type(L,c); /* key */
@@ -466,34 +571,44 @@ void mp_decode_to_lua_hash(lua_State *L, mp_cur *c, size_t len) {
* a Lua type, that is left as the only result on the stack. */
void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
mp_cur_need(c,1);
+
+ /* If we return more than 18 elements, we must resize the stack to
+ * fit all our return values. But, there is no way to
+ * determine how many objects a msgpack will unpack to up front, so
+ * we request a +1 larger stack on each iteration (noop if stack is
+ * big enough, and when stack does require resize it doubles in size) */
+ luaL_checkstack(L, 1,
+ "too many return values at once; "
+ "use unpack_one or unpack_limit instead.");
+
switch(c->p[0]) {
case 0xcc: /* uint 8 */
mp_cur_need(c,2);
- lua_pushnumber(L,c->p[1]);
+ lua_pushunsigned(L,c->p[1]);
mp_cur_consume(c,2);
break;
case 0xd0: /* int 8 */
mp_cur_need(c,2);
- lua_pushnumber(L,(char)c->p[1]);
+ lua_pushinteger(L,(signed char)c->p[1]);
mp_cur_consume(c,2);
break;
case 0xcd: /* uint 16 */
mp_cur_need(c,3);
- lua_pushnumber(L,
+ lua_pushunsigned(L,
(c->p[1] << 8) |
c->p[2]);
mp_cur_consume(c,3);
break;
case 0xd1: /* int 16 */
mp_cur_need(c,3);
- lua_pushnumber(L,(int16_t)
+ lua_pushinteger(L,(int16_t)
(c->p[1] << 8) |
c->p[2]);
mp_cur_consume(c,3);
break;
case 0xce: /* uint 32 */
mp_cur_need(c,5);
- lua_pushnumber(L,
+ lua_pushunsigned(L,
((uint32_t)c->p[1] << 24) |
((uint32_t)c->p[2] << 16) |
((uint32_t)c->p[3] << 8) |
@@ -502,7 +617,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xd2: /* int 32 */
mp_cur_need(c,5);
- lua_pushnumber(L,
+ lua_pushinteger(L,
((int32_t)c->p[1] << 24) |
((int32_t)c->p[2] << 16) |
((int32_t)c->p[3] << 8) |
@@ -511,7 +626,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xcf: /* uint 64 */
mp_cur_need(c,9);
- lua_pushnumber(L,
+ lua_pushunsigned(L,
((uint64_t)c->p[1] << 56) |
((uint64_t)c->p[2] << 48) |
((uint64_t)c->p[3] << 40) |
@@ -524,7 +639,11 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
break;
case 0xd3: /* int 64 */
mp_cur_need(c,9);
+#if LUA_VERSION_NUM < 503
lua_pushnumber(L,
+#else
+ lua_pushinteger(L,
+#endif
((int64_t)c->p[1] << 56) |
((int64_t)c->p[2] << 48) |
((int64_t)c->p[3] << 40) |
@@ -569,6 +688,15 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
mp_cur_consume(c,9);
}
break;
+ case 0xd9: /* raw 8 */
+ mp_cur_need(c,2);
+ {
+ size_t l = c->p[1];
+ mp_cur_need(c,2+l);
+ lua_pushlstring(L,(char*)c->p+2,l);
+ mp_cur_consume(c,2+l);
+ }
+ break;
case 0xda: /* raw 16 */
mp_cur_need(c,3);
{
@@ -581,13 +709,14 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdb: /* raw 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
- mp_cur_need(c,5+l);
- lua_pushlstring(L,(char*)c->p+5,l);
- mp_cur_consume(c,5+l);
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
+ mp_cur_consume(c,5);
+ mp_cur_need(c,l);
+ lua_pushlstring(L,(char*)c->p,l);
+ mp_cur_consume(c,l);
}
break;
case 0xdc: /* array 16 */
@@ -601,10 +730,10 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdd: /* array 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_array(L,c,l);
}
@@ -620,20 +749,20 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
case 0xdf: /* map 32 */
mp_cur_need(c,5);
{
- size_t l = (c->p[1] << 24) |
- (c->p[2] << 16) |
- (c->p[3] << 8) |
- c->p[4];
+ size_t l = ((size_t)c->p[1] << 24) |
+ ((size_t)c->p[2] << 16) |
+ ((size_t)c->p[3] << 8) |
+ (size_t)c->p[4];
mp_cur_consume(c,5);
mp_decode_to_lua_hash(L,c,l);
}
break;
default: /* types that can't be idenitified by first byte value. */
if ((c->p[0] & 0x80) == 0) { /* positive fixnum */
- lua_pushnumber(L,c->p[0]);
+ lua_pushunsigned(L,c->p[0]);
mp_cur_consume(c,1);
} else if ((c->p[0] & 0xe0) == 0xe0) { /* negative fixnum */
- lua_pushnumber(L,(signed char)c->p[0]);
+ lua_pushinteger(L,(signed char)c->p[0]);
mp_cur_consume(c,1);
} else if ((c->p[0] & 0xe0) == 0xa0) { /* fix raw */
size_t l = c->p[0] & 0x1f;
@@ -654,54 +783,163 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
}
}
-static int mp_unpack(lua_State *L) {
+int mp_unpack_full(lua_State *L, int limit, int offset) {
size_t len;
- const unsigned char *s;
- mp_cur *c;
+ const char *s;
+ mp_cur c;
+ int cnt; /* Number of objects unpacked */
+ int decode_all = (!limit && !offset);
+
+ s = luaL_checklstring(L,1,&len); /* if no match, exits */
- if (!lua_isstring(L,-1)) {
- lua_pushstring(L,"MessagePack decoding needs a string as input.");
- lua_error(L);
+ if (offset < 0 || limit < 0) /* requesting negative off or lim is invalid */
+ return luaL_error(L,
+ "Invalid request to unpack with offset of %d and limit of %d.",
+ offset, len);
+ else if (offset > len)
+ return luaL_error(L,
+ "Start offset %d greater than input length %d.", offset, len);
+
+ if (decode_all) limit = INT_MAX;
+
+ mp_cur_init(&c,(const unsigned char *)s+offset,len-offset);
+
+ /* We loop over the decode because this could be a stream
+ * of multiple top-level values serialized together */
+ for(cnt = 0; c.left > 0 && cnt < limit; cnt++) {
+ mp_decode_to_lua_type(L,&c);
+
+ if (c.err == MP_CUR_ERROR_EOF) {
+ return luaL_error(L,"Missing bytes in input.");
+ } else if (c.err == MP_CUR_ERROR_BADFMT) {
+ return luaL_error(L,"Bad data format in input.");
+ }
}
- s = (const unsigned char*) lua_tolstring(L,-1,&len);
- c = mp_cur_new(s,len);
- mp_decode_to_lua_type(L,c);
-
- if (c->err == MP_CUR_ERROR_EOF) {
- mp_cur_free(c);
- lua_pushstring(L,"Missing bytes in input.");
- lua_error(L);
- } else if (c->err == MP_CUR_ERROR_BADFMT) {
- mp_cur_free(c);
- lua_pushstring(L,"Bad data format in input.");
- lua_error(L);
- } else if (c->left != 0) {
- mp_cur_free(c);
- lua_pushstring(L,"Extra bytes in input.");
- lua_error(L);
+ if (!decode_all) {
+ /* c->left is the remaining size of the input buffer.
+ * subtract the entire buffer size from the unprocessed size
+ * to get our next start offset */
+ int offset = len - c.left;
+ /* Return offset -1 when we have have processed the entire buffer. */
+ lua_pushinteger(L, c.left == 0 ? -1 : offset);
+ /* Results are returned with the arg elements still
+ * in place. Lua takes care of only returning
+ * elements above the args for us.
+ * In this case, we have one arg on the stack
+ * for this function, so we insert our first return
+ * value at position 2. */
+ lua_insert(L, 2);
+ cnt += 1; /* increase return count by one to make room for offset */
}
- mp_cur_free(c);
- return 1;
+
+ return cnt;
}
-/* ---------------------------------------------------------------------------- */
+int mp_unpack(lua_State *L) {
+ return mp_unpack_full(L, 0, 0);
+}
+
+int mp_unpack_one(lua_State *L) {
+ int offset = luaL_optinteger(L, 2, 0);
+ /* Variable pop because offset may not exist */
+ lua_pop(L, lua_gettop(L)-1);
+ return mp_unpack_full(L, 1, offset);
+}
+
+int mp_unpack_limit(lua_State *L) {
+ int limit = luaL_checkinteger(L, 2);
+ int offset = luaL_optinteger(L, 3, 0);
+ /* Variable pop because offset may not exist */
+ lua_pop(L, lua_gettop(L)-1);
+
+ return mp_unpack_full(L, limit, offset);
+}
+
+int mp_safe(lua_State *L) {
+ int argc, err, total_results;
+
+ argc = lua_gettop(L);
+
+ /* This adds our function to the bottom of the stack
+ * (the "call this function" position) */
+ lua_pushvalue(L, lua_upvalueindex(1));
+ lua_insert(L, 1);
+
+ err = lua_pcall(L, argc, LUA_MULTRET, 0);
+ total_results = lua_gettop(L);
+
+ if (!err) {
+ return total_results;
+ } else {
+ lua_pushnil(L);
+ lua_insert(L,-2);
+ return 2;
+ }
+}
-static const struct luaL_reg thislib[] = {
+/* -------------------------------------------------------------------------- */
+const struct luaL_Reg cmds[] = {
{"pack", mp_pack},
{"unpack", mp_unpack},
- {NULL, NULL}
+ {"unpack_one", mp_unpack_one},
+ {"unpack_limit", mp_unpack_limit},
+ {0}
};
-LUALIB_API int luaopen_cmsgpack (lua_State *L) {
- luaL_register(L, "cmsgpack", thislib);
+int luaopen_create(lua_State *L) {
+ int i;
+ /* Manually construct our module table instead of
+ * relying on _register or _newlib */
+ lua_newtable(L);
+
+ for (i = 0; i < (sizeof(cmds)/sizeof(*cmds) - 1); i++) {
+ lua_pushcfunction(L, cmds[i].func);
+ lua_setfield(L, -2, cmds[i].name);
+ }
+ /* Add metadata */
+ lua_pushliteral(L, LUACMSGPACK_NAME);
+ lua_setfield(L, -2, "_NAME");
lua_pushliteral(L, LUACMSGPACK_VERSION);
lua_setfield(L, -2, "_VERSION");
lua_pushliteral(L, LUACMSGPACK_COPYRIGHT);
lua_setfield(L, -2, "_COPYRIGHT");
lua_pushliteral(L, LUACMSGPACK_DESCRIPTION);
- lua_setfield(L, -2, "_DESCRIPTION");
+ lua_setfield(L, -2, "_DESCRIPTION");
+ return 1;
+}
+
+LUALIB_API int luaopen_cmsgpack(lua_State *L) {
+ luaopen_create(L);
+
+#if LUA_VERSION_NUM < 502
+ /* Register name globally for 5.1 */
+ lua_pushvalue(L, -1);
+ lua_setglobal(L, LUACMSGPACK_NAME);
+#endif
+
+ return 1;
+}
+
+LUALIB_API int luaopen_cmsgpack_safe(lua_State *L) {
+ int i;
+
+ luaopen_cmsgpack(L);
+
+ /* Wrap all functions in the safe handler */
+ for (i = 0; i < (sizeof(cmds)/sizeof(*cmds) - 1); i++) {
+ lua_getfield(L, -1, cmds[i].name);
+ lua_pushcclosure(L, mp_safe, 1);
+ lua_setfield(L, -2, cmds[i].name);
+ }
+
+#if LUA_VERSION_NUM < 502
+ /* Register name globally for 5.1 */
+ lua_pushvalue(L, -1);
+ lua_setglobal(L, LUACMSGPACK_SAFE_NAME);
+#endif
+
return 1;
}
diff --git a/deps/lua/src/lua_struct.c b/deps/lua/src/lua_struct.c
index ec78bcbc0..a602bb430 100644
--- a/deps/lua/src/lua_struct.c
+++ b/deps/lua/src/lua_struct.c
@@ -89,12 +89,14 @@ typedef struct Header {
} Header;
-static int getnum (const char **fmt, int df) {
+static int getnum (lua_State *L, const char **fmt, int df) {
if (!isdigit(**fmt)) /* no number? */
return df; /* return default value */
else {
int a = 0;
do {
+ if (a > (INT_MAX / 10) || a * 10 > (INT_MAX - (**fmt - '0')))
+ luaL_error(L, "integral size overflow");
a = a*10 + *((*fmt)++) - '0';
} while (isdigit(**fmt));
return a;
@@ -115,9 +117,9 @@ static size_t optsize (lua_State *L, char opt, const char **fmt) {
case 'f': return sizeof(float);
case 'd': return sizeof(double);
case 'x': return 1;
- case 'c': return getnum(fmt, 1);
+ case 'c': return getnum(L, fmt, 1);
case 'i': case 'I': {
- int sz = getnum(fmt, sizeof(int));
+ int sz = getnum(L, fmt, sizeof(int));
if (sz > MAXINTSIZE)
luaL_error(L, "integral size %d is larger than limit of %d",
sz, MAXINTSIZE);
@@ -150,7 +152,7 @@ static void controloptions (lua_State *L, int opt, const char **fmt,
case '>': h->endian = BIG; return;
case '<': h->endian = LITTLE; return;
case '!': {
- int a = getnum(fmt, MAXALIGN);
+ int a = getnum(L, fmt, MAXALIGN);
if (!isp2(a))
luaL_error(L, "alignment %d is not a power of 2", a);
h->align = a;
diff --git a/deps/lua/src/strbuf.c b/deps/lua/src/strbuf.c
index 976925a88..f0f7f4b9a 100644
--- a/deps/lua/src/strbuf.c
+++ b/deps/lua/src/strbuf.c
@@ -1,6 +1,6 @@
-/* strbuf - string buffer routines
+/* strbuf - String buffer routines
*
- * Copyright (c) 2010-2011 Mark Pulford <mark@kyne.com.au>
+ * Copyright (c) 2010-2012 Mark Pulford <mark@kyne.com.au>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
@@ -29,7 +29,7 @@
#include "strbuf.h"
-void die(const char *fmt, ...)
+static void die(const char *fmt, ...)
{
va_list arg;
diff --git a/deps/lua/src/strbuf.h b/deps/lua/src/strbuf.h
index f856543ad..d861108c1 100644
--- a/deps/lua/src/strbuf.h
+++ b/deps/lua/src/strbuf.h
@@ -1,6 +1,6 @@
/* strbuf - String buffer routines
*
- * Copyright (c) 2010-2011 Mark Pulford <mark@kyne.com.au>
+ * Copyright (c) 2010-2012 Mark Pulford <mark@kyne.com.au>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
@@ -62,7 +62,9 @@ extern void strbuf_resize(strbuf_t *s, int len);
static int strbuf_empty_length(strbuf_t *s);
static int strbuf_length(strbuf_t *s);
static char *strbuf_string(strbuf_t *s, int *len);
-static void strbuf_ensure_empty_length(strbuf_t *s, int len);
+static void strbuf_ensure_empty_length(strbuf_t *s, int len);
+static char *strbuf_empty_ptr(strbuf_t *s);
+static void strbuf_extend_length(strbuf_t *s, int len);
/* Update */
extern void strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...);
@@ -96,6 +98,16 @@ static inline void strbuf_ensure_empty_length(strbuf_t *s, int len)
strbuf_resize(s, s->length + len);
}
+static inline char *strbuf_empty_ptr(strbuf_t *s)
+{
+ return s->buf + s->length;
+}
+
+static inline void strbuf_extend_length(strbuf_t *s, int len)
+{
+ s->length += len;
+}
+
static inline int strbuf_length(strbuf_t *s)
{
return s->length;
diff --git a/redis.conf b/redis.conf
index 33f28a5bb..c54dba392 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1,4 +1,9 @@
-# Redis configuration file example
+# Redis configuration file example.
+#
+# Note that in order to read the configuration file, Redis must be
+# started with the file path as first argument:
+#
+# ./redis-server /path/to/redis.conf
# Note on units: when memory size is needed, it is possible to specify
# it in the usual form of 1k 5GB 4M and so forth:
@@ -15,7 +20,7 @@
################################## INCLUDES ###################################
# Include one or more other config files here. This is useful if you
-# have a standard template that goes to all Redis server but also need
+# have a standard template that goes to all Redis servers but also need
# to customize a few per-server settings. Include files can include
# other files, so use this wisely.
#
@@ -30,17 +35,59 @@
# include /path/to/local.conf
# include /path/to/other.conf
-################################ GENERAL #####################################
+################################## MODULES #####################################
-# By default Redis does not run as a daemon. Use 'yes' if you need it.
-# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
-daemonize no
+# Load modules at startup. If the server is not able to load modules
+# it will abort. It is possible to use multiple loadmodule directives.
+#
+# loadmodule /path/to/my_module.so
+# loadmodule /path/to/other_module.so
-# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
-# default. You can specify a custom pid file location here.
-pidfile /var/run/redis.pid
+################################## NETWORK #####################################
+
+# By default, if no "bind" configuration directive is specified, Redis listens
+# for connections from all the network interfaces available on the server.
+# It is possible to listen to just one or multiple selected interfaces using
+# the "bind" configuration directive, followed by one or more IP addresses.
+#
+# Examples:
+#
+# bind 192.168.1.100 10.0.0.1
+# bind 127.0.0.1 ::1
+#
+# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the
+# internet, binding to all the interfaces is dangerous and will expose the
+# instance to everybody on the internet. So by default we uncomment the
+# following bind directive, that will force Redis to listen only into
+# the IPv4 lookback interface address (this means Redis will be able to
+# accept connections only from clients running into the same computer it
+# is running).
+#
+# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES
+# JUST COMMENT THE FOLLOWING LINE.
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+bind 127.0.0.1
+
+# Protected mode is a layer of security protection, in order to avoid that
+# Redis instances left open on the internet are accessed and exploited.
+#
+# When protected mode is on and if:
+#
+# 1) The server is not binding explicitly to a set of addresses using the
+# "bind" directive.
+# 2) No password is configured.
+#
+# The server only accepts connections from clients connecting from the
+# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain
+# sockets.
+#
+# By default protected mode is enabled. You should disable it only if
+# you are sure you want clients from other hosts to connect to Redis
+# even if no authentication is configured, nor a specific set of interfaces
+# are explicitly listed using the "bind" directive.
+protected-mode yes
-# Accept connections on the specified port, default is 6379.
+# Accept connections on the specified port, default is 6379 (IANA #815344).
# If port 0 is specified Redis will not listen on a TCP socket.
port 6379
@@ -53,22 +100,14 @@ port 6379
# in order to get the desired effect.
tcp-backlog 511
-# By default Redis listens for connections from all the network interfaces
-# available on the server. It is possible to listen to just one or multiple
-# interfaces using the "bind" configuration directive, followed by one or
-# more IP addresses.
-#
-# Examples:
+# Unix socket.
#
-# bind 192.168.1.100 10.0.0.1
-# bind 127.0.0.1
-
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen
# on a unix socket when not specified.
#
# unixsocket /tmp/redis.sock
-# unixsocketperm 755
+# unixsocketperm 700
# Close the connection after a client is idle for N seconds (0 to disable)
timeout 0
@@ -86,8 +125,37 @@ timeout 0
# Note that to close the connection the double of the time is needed.
# On other kernels the period depends on the kernel configuration.
#
-# A reasonable value for this option is 60 seconds.
-tcp-keepalive 0
+# A reasonable value for this option is 300 seconds, which is the new
+# Redis default starting with Redis 3.2.1.
+tcp-keepalive 300
+
+################################# GENERAL #####################################
+
+# By default Redis does not run as a daemon. Use 'yes' if you need it.
+# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
+daemonize no
+
+# If you run Redis from upstart or systemd, Redis can interact with your
+# supervision tree. Options:
+# supervised no - no supervision interaction
+# supervised upstart - signal upstart by putting Redis into SIGSTOP mode
+# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
+# supervised auto - detect upstart or systemd method based on
+# UPSTART_JOB or NOTIFY_SOCKET environment variables
+# Note: these supervision methods only signal "process is ready."
+# They do not enable continuous liveness pings back to your supervisor.
+supervised no
+
+# If a pid file is specified, Redis writes it where specified at startup
+# and removes it at exit.
+#
+# When the server runs non daemonized, no pid file is created if none is
+# specified in the configuration. When the server is daemonized, the pid file
+# is used even if not specified, defaulting to "/var/run/redis.pid".
+#
+# Creating a pid file is best effort: if Redis is not able to create it
+# nothing bad happens, the server will start and run normally.
+pidfile /var/run/redis_6379.pid
# Specify the server verbosity level.
# This can be one of:
@@ -117,6 +185,14 @@ logfile ""
# dbid is a number between 0 and 'databases'-1
databases 16
+# By default Redis shows an ASCII art logo only when started to log to the
+# standard output and if the standard output is a TTY. Basically this means
+# that normally a logo is displayed only in interactive sessions.
+#
+# However it is possible to force the pre-4.0 behavior and always show a
+# ASCII art logo in startup logs by setting the following option to yes.
+always-show-logo yes
+
################################ SNAPSHOTTING ################################
#
# Save the DB on disk:
@@ -131,7 +207,7 @@ databases 16
# after 300 sec (5 min) if at least 10 keys changed
# after 60 sec if at least 10000 keys changed
#
-# Note: you can disable saving at all commenting all the "save" lines.
+# Note: you can disable saving completely by commenting out all "save" lines.
#
# It is also possible to remove all the previously configured save
# points by adding a save directive with a single empty string argument
@@ -180,9 +256,9 @@ dbfilename dump.rdb
#
# The DB will be written inside this directory, with the filename specified
# above using the 'dbfilename' configuration directive.
-#
+#
# The Append Only File will also be created inside this directory.
-#
+#
# Note that you must specify a directory here, not a file name.
dir ./
@@ -240,6 +316,49 @@ slave-serve-stale-data yes
# administrative / dangerous commands.
slave-read-only yes
+# Replication SYNC strategy: disk or socket.
+#
+# -------------------------------------------------------
+# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY
+# -------------------------------------------------------
+#
+# New slaves and reconnecting slaves that are not able to continue the replication
+# process just receiving differences, need to do what is called a "full
+# synchronization". An RDB file is transmitted from the master to the slaves.
+# The transmission can happen in two different ways:
+#
+# 1) Disk-backed: The Redis master creates a new process that writes the RDB
+# file on disk. Later the file is transferred by the parent
+# process to the slaves incrementally.
+# 2) Diskless: The Redis master creates a new process that directly writes the
+# RDB file to slave sockets, without touching the disk at all.
+#
+# With disk-backed replication, while the RDB file is generated, more slaves
+# can be queued and served with the RDB file as soon as the current child producing
+# the RDB file finishes its work. With diskless replication instead once
+# the transfer starts, new slaves arriving will be queued and a new transfer
+# will start when the current one terminates.
+#
+# When diskless replication is used, the master waits a configurable amount of
+# time (in seconds) before starting the transfer in the hope that multiple slaves
+# will arrive and the transfer can be parallelized.
+#
+# With slow disks and fast (large bandwidth) networks, diskless replication
+# works better.
+repl-diskless-sync no
+
+# When diskless replication is enabled, it is possible to configure the delay
+# the server waits in order to spawn the child that transfers the RDB via socket
+# to the slaves.
+#
+# This is important since once the transfer starts, it is not possible to serve
+# new slaves arriving, that will be queued for the next RDB transfer, so the server
+# waits a delay in order to let more slaves arrive.
+#
+# The delay is specified in seconds, and by default is 5 seconds. To disable
+# it entirely just set it to 0 seconds and the transfer will start ASAP.
+repl-diskless-sync-delay 5
+
# Slaves send PINGs to server in a predefined interval. It's possible to change
# this interval with the repl_ping_slave_period option. The default value is 10
# seconds.
@@ -279,7 +398,7 @@ repl-disable-tcp-nodelay no
# resync is enough, just passing the portion of data the slave missed while
# disconnected.
#
-# The biggest the replication backlog, the longer the time the slave can be
+# The bigger the replication backlog, the longer the time the slave can be
# disconnected and later be able to perform a partial resynchronization.
#
# The backlog is only allocated once there is at least a slave connected.
@@ -291,6 +410,10 @@ repl-disable-tcp-nodelay no
# need to elapse, starting from the time the last slave disconnected, for
# the backlog buffer to be freed.
#
+# Note that slaves never free the backlog for timeout, since they may be
+# promoted to masters later, and should be able to correctly "partially
+# resynchronize" with the slaves: hence they should always accumulate backlog.
+#
# A value of 0 means to never release the backlog.
#
# repl-backlog-ttl 3600
@@ -318,7 +441,7 @@ slave-priority 100
# The lag in seconds, that must be <= the specified value, is calculated from
# the last ping received from the slave, that is usually sent every second.
#
-# This option does not GUARANTEES that N replicas will accept the write, but
+# This option does not GUARANTEE that N replicas will accept the write, but
# will limit the window of exposure for lost writes in case not enough slaves
# are available, to the specified number of seconds.
#
@@ -332,6 +455,35 @@ slave-priority 100
# By default min-slaves-to-write is set to 0 (feature disabled) and
# min-slaves-max-lag is set to 10.
+# A Redis master is able to list the address and port of the attached
+# slaves in different ways. For example the "INFO replication" section
+# offers this information, which is used, among other tools, by
+# Redis Sentinel in order to discover slave instances.
+# Another place where this info is available is in the output of the
+# "ROLE" command of a master.
+#
+# The listed IP and address normally reported by a slave is obtained
+# in the following way:
+#
+# IP: The address is auto detected by checking the peer address
+# of the socket used by the slave to connect with the master.
+#
+# Port: The port is communicated by the slave during the replication
+# handshake, and is normally the port that the slave is using to
+# list for connections.
+#
+# However when port forwarding or Network Address Translation (NAT) is
+# used, the slave may be actually reachable via different IP and port
+# pairs. The following two options can be used by a slave in order to
+# report to its master a specific set of IP and port, so that both INFO
+# and ROLE will report those values.
+#
+# There is no need to use both the options if you need to override just
+# the port or the IP address.
+#
+# slave-announce-ip 5.5.5.5
+# slave-announce-port 1234
+
################################## SECURITY ###################################
# Require clients to issue AUTH <PASSWORD> before processing any other
@@ -340,7 +492,7 @@ slave-priority 100
#
# This should stay commented out for backward compatibility and because most
# people do not need auth (e.g. they run their own servers).
-#
+#
# Warning: since Redis is pretty fast an outside user can try up to
# 150k passwords per second against a good box. This means that you should
# use a very strong password otherwise it will be very easy to break.
@@ -366,7 +518,7 @@ slave-priority 100
# Please note that changing the name of commands that are logged into the
# AOF file or transmitted to slaves may cause problems.
-################################### LIMITS ####################################
+################################### CLIENTS ####################################
# Set the max number of connected clients at the same time. By default
# this limit is set to 10000 clients, however if the Redis server is not
@@ -379,7 +531,9 @@ slave-priority 100
#
# maxclients 10000
-# Don't use more memory than the specified amount of bytes.
+############################## MEMORY MANAGEMENT ################################
+
+# Set a memory usage limit to the specified amount of bytes.
# When the memory limit is reached Redis will try to remove keys
# according to the eviction policy selected (see maxmemory-policy).
#
@@ -388,8 +542,8 @@ slave-priority 100
# that would use more memory, like SET, LPUSH, and so on, and will continue
# to reply to read-only commands like GET.
#
-# This option is usually useful when using Redis as an LRU cache, or to set
-# a hard memory limit for an instance (using the 'noeviction' policy).
+# This option is usually useful when using Redis as an LRU or LFU cache, or to
+# set a hard memory limit for an instance (using the 'noeviction' policy).
#
# WARNING: If you have slaves attached to an instance with maxmemory on,
# the size of the output buffers needed to feed the slaves are subtracted
@@ -406,18 +560,26 @@ slave-priority 100
# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
# is reached. You can select among five behaviors:
-#
-# volatile-lru -> remove the key with an expire set using an LRU algorithm
-# allkeys-lru -> remove any key accordingly to the LRU algorithm
-# volatile-random -> remove a random key with an expire set
-# allkeys-random -> remove a random key, any key
-# volatile-ttl -> remove the key with the nearest expire time (minor TTL)
-# noeviction -> don't expire at all, just return an error on write operations
-#
+#
+# volatile-lru -> Evict using approximated LRU among the keys with an expire set.
+# allkeys-lru -> Evict any key using approximated LRU.
+# volatile-lfu -> Evict using approximated LFU among the keys with an expire set.
+# allkeys-lfu -> Evict any key using approximated LFU.
+# volatile-random -> Remove a random key among the ones with an expire set.
+# allkeys-random -> Remove a random key, any key.
+# volatile-ttl -> Remove the key with the nearest expire time (minor TTL)
+# noeviction -> Don't evict anything, just return an error on write operations.
+#
+# LRU means Least Recently Used
+# LFU means Least Frequently Used
+#
+# Both LRU, LFU and volatile-ttl are implemented using approximated
+# randomized algorithms.
+#
# Note: with any of the above policies, Redis will return an error on write
-# operations, when there are not suitable keys for eviction.
+# operations, when there are no suitable keys for eviction.
#
-# At the date of writing this commands are: set setnx setex append
+# At the date of writing these commands are: set setnx setex append
# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
@@ -427,17 +589,66 @@ slave-priority 100
#
# maxmemory-policy noeviction
-# LRU and minimal TTL algorithms are not precise algorithms but approximated
+# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated
# algorithms (in order to save memory), so you can tune it for speed or
# accuracy. For default Redis will check five keys and pick the one that was
# used less recently, you can change the sample size using the following
# configuration directive.
#
# The default of 5 produces good enough results. 10 Approximates very closely
-# true LRU but costs a bit more CPU. 3 is very fast but not very accurate.
+# true LRU but costs more CPU. 3 is faster but not very accurate.
#
# maxmemory-samples 5
+############################# LAZY FREEING ####################################
+
+# Redis has two primitives to delete keys. One is called DEL and is a blocking
+# deletion of the object. It means that the server stops processing new commands
+# in order to reclaim all the memory associated with an object in a synchronous
+# way. If the key deleted is associated with a small object, the time needed
+# in order to execute th DEL command is very small and comparable to most other
+# O(1) or O(log_N) commands in Redis. However if the key is associated with an
+# aggregated value containing millions of elements, the server can block for
+# a long time (even seconds) in order to complete the operation.
+#
+# For the above reasons Redis also offers non blocking deletion primitives
+# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and
+# FLUSHDB commands, in order to reclaim memory in background. Those commands
+# are executed in constant time. Another thread will incrementally free the
+# object in the background as fast as possible.
+#
+# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled.
+# It's up to the design of the application to understand when it is a good
+# idea to use one or the other. However the Redis server sometimes has to
+# delete keys or flush the whole database as a side effect of other operations.
+# Specifically Redis deletes objects independently of an user call in the
+# following scenarios:
+#
+# 1) On eviction, because of the maxmemory and maxmemory policy configurations,
+# in order to make room for new data, without going over the specified
+# memory limit.
+# 2) Because of expire: when a key with an associated time to live (see the
+# EXPIRE command) must be deleted from memory.
+# 3) Because of a side effect of a command that stores data on a key that may
+# already exist. For example the RENAME command may delete the old key
+# content when it is replaced with another one. Similarly SUNIONSTORE
+# or SORT with STORE option may delete existing keys. The SET command
+# itself removes any old content of the specified key in order to replace
+# it with the specified string.
+# 4) During replication, when a slave performs a full resynchronization with
+# its master, the content of the whole database is removed in order to
+# load the RDB file just transfered.
+#
+# In all the above cases the default is to delete objects in a blocking way,
+# like if DEL was called. However you can configure each case specifically
+# in order to instead release memory in a non-blocking way like if UNLINK
+# was called, using the following configuration directives:
+
+lazyfree-lazy-eviction no
+lazyfree-lazy-expire no
+lazyfree-lazy-server-del no
+slave-lazy-flush no
+
############################## APPEND ONLY MODE ###############################
# By default Redis asynchronously dumps the dataset on disk. This mode is
@@ -465,13 +676,13 @@ appendonly no
appendfilename "appendonly.aof"
# The fsync() call tells the Operating System to actually write data on disk
-# instead to wait for more data in the output buffer. Some OS will really flush
+# instead of waiting for more data in the output buffer. Some OS will really flush
# data on disk, some other OS will just try to do it ASAP.
#
# Redis supports three different modes:
#
# no: don't fsync, just let the OS flush the data when it wants. Faster.
-# always: fsync after every write to the append only log . Slow, Safest.
+# always: fsync after every write to the append only log. Slow, Safest.
# everysec: fsync only one time every second. Compromise.
#
# The default is "everysec", as that's usually the right compromise between
@@ -506,7 +717,7 @@ appendfsync everysec
# the same as "appendfsync none". In practical terms, this means that it is
# possible to lose up to 30 seconds of log in the worst scenario (with the
# default Linux settings).
-#
+#
# If you have latency problems turn this to "yes". Otherwise leave it as
# "no" that is the safest pick from the point of view of durability.
@@ -515,7 +726,7 @@ no-appendfsync-on-rewrite no
# Automatic rewrite of the append only file.
# Redis is able to automatically rewrite the log file implicitly calling
# BGREWRITEAOF when the AOF log size grows by the specified percentage.
-#
+#
# This is how it works: Redis remembers the size of the AOF file after the
# latest rewrite (if no rewrite has happened since the restart, the size of
# the AOF at startup is used).
@@ -532,6 +743,44 @@ no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
+# An AOF file may be found to be truncated at the end during the Redis
+# startup process, when the AOF data gets loaded back into memory.
+# This may happen when the system where Redis is running
+# crashes, especially when an ext4 filesystem is mounted without the
+# data=ordered option (however this can't happen when Redis itself
+# crashes or aborts but the operating system still works correctly).
+#
+# Redis can either exit with an error when this happens, or load as much
+# data as possible (the default now) and start if the AOF file is found
+# to be truncated at the end. The following option controls this behavior.
+#
+# If aof-load-truncated is set to yes, a truncated AOF file is loaded and
+# the Redis server starts emitting a log to inform the user of the event.
+# Otherwise if the option is set to no, the server aborts with an error
+# and refuses to start. When the option is set to no, the user requires
+# to fix the AOF file using the "redis-check-aof" utility before to restart
+# the server.
+#
+# Note that if the AOF file will be found to be corrupted in the middle
+# the server will still exit with an error. This option only applies when
+# Redis will try to read more data from the AOF file but not enough bytes
+# will be found.
+aof-load-truncated yes
+
+# When rewriting the AOF file, Redis is able to use an RDB preamble in the
+# AOF file for faster rewrites and recoveries. When this option is turned
+# on the rewritten AOF file is composed of two different stanzas:
+#
+# [RDB file][AOF tail]
+#
+# When loading Redis recognizes that the AOF file starts with the "REDIS"
+# string and loads the prefixed RDB file, and continues loading the AOF
+# tail.
+#
+# This is currently turned off by default in order to avoid the surprise
+# of a format change, but will at some point be used as the default.
+aof-use-rdb-preamble no
+
################################ LUA SCRIPTING ###############################
# Max execution time of a Lua script in milliseconds.
@@ -540,11 +789,11 @@ auto-aof-rewrite-min-size 64mb
# still in execution after the maximum allowed time and will start to
# reply to queries with an error.
#
-# When a long running script exceed the maximum execution time only the
+# When a long running script exceeds the maximum execution time only the
# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
# used to stop a script that did not yet called write commands. The second
-# is the only way to shut down the server in the case a write commands was
-# already issue by the script but the user don't want to wait for the natural
+# is the only way to shut down the server in the case a write command was
+# already issued by the script but the user doesn't want to wait for the natural
# termination of the script.
#
# Set it to 0 or a negative value for unlimited execution without warnings.
@@ -552,6 +801,12 @@ lua-time-limit 5000
################################ REDIS CLUSTER ###############################
#
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however
+# in order to mark it as "mature" we need to wait for a non trivial percentage
+# of users to deploy it in production.
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+#
# Normal Redis instances can't be part of a Redis Cluster; only nodes that are
# started as cluster nodes can. In order to start a Redis instance as a
# cluster node enable the cluster support uncommenting the following:
@@ -561,12 +816,12 @@ lua-time-limit 5000
# Every cluster node has a cluster configuration file. This file is not
# intended to be edited by hand. It is created and updated by Redis nodes.
# Every Redis Cluster node requires a different cluster configuration file.
-# Make sure that instances running in the same system does not have
+# Make sure that instances running in the same system do not have
# overlapping cluster configuration file names.
#
# cluster-config-file nodes-6379.conf
-# Cluster node timeout is the amount of milliseconds a node must be unreachable
+# Cluster node timeout is the amount of milliseconds a node must be unreachable
# for it to be considered in failure state.
# Most other internal time limits are multiple of the node timeout.
#
@@ -575,7 +830,7 @@ lua-time-limit 5000
# A slave of a failing master will avoid to start a failover if its data
# looks too old.
#
-# There is no simple way for a slave to actually have a exact measure of
+# There is no simple way for a slave to actually have an exact measure of
# its "data age", so the following two checks are performed:
#
# 1) If there are multiple slaves able to failover, they exchange messages
@@ -636,9 +891,55 @@ lua-time-limit 5000
#
# cluster-migration-barrier 1
+# By default Redis Cluster nodes stop accepting queries if they detect there
+# is at least an hash slot uncovered (no available node is serving it).
+# This way if the cluster is partially down (for example a range of hash slots
+# are no longer covered) all the cluster becomes, eventually, unavailable.
+# It automatically returns available as soon as all the slots are covered again.
+#
+# However sometimes you want the subset of the cluster which is working,
+# to continue to accept queries for the part of the key space that is still
+# covered. In order to do so, just set the cluster-require-full-coverage
+# option to no.
+#
+# cluster-require-full-coverage yes
+
# In order to setup your cluster make sure to read the documentation
# available at http://redis.io web site.
+########################## CLUSTER DOCKER/NAT support ########################
+
+# In certain deployments, Redis Cluster nodes address discovery fails, because
+# addresses are NAT-ted or because ports are forwarded (the typical case is
+# Docker and other containers).
+#
+# In order to make Redis Cluster working in such environments, a static
+# configuration where each node known its public address is needed. The
+# following two options are used for this scope, and are:
+#
+# * cluster-announce-ip
+# * cluster-announce-port
+# * cluster-announce-bus-port
+#
+# Each instruct the node about its address, client port, and cluster message
+# bus port. The information is then published in the header of the bus packets
+# so that other nodes will be able to correctly map the address of the node
+# publishing the information.
+#
+# If the above options are not used, the normal Redis Cluster auto-detection
+# will be used instead.
+#
+# Note that when remapped, the bus port may not be at the fixed offset of
+# clients port + 10000, so you can specify any port and bus-port depending
+# on how they get remapped. If the bus-port is not set, a fixed offset of
+# 10000 will be used as usually.
+#
+# Example:
+#
+# cluster-announce-ip 10.1.1.5
+# cluster-announce-port 6379
+# cluster-announce-bus-port 6380
+
################################## SLOW LOG ###################################
# The Redis Slow Log is a system to log queries that exceeded a specified
@@ -647,7 +948,7 @@ lua-time-limit 5000
# but just the time needed to actually execute the command (this is the only
# stage of command execution where the thread is blocked and can not serve
# other requests in the meantime).
-#
+#
# You can configure the slow log with two parameters: one tells Redis
# what is the execution time, in microseconds, to exceed in order for the
# command to get logged, and the other parameter is the length of the
@@ -680,15 +981,15 @@ slowlog-max-len 128
# By default latency monitoring is disabled since it is mostly not needed
# if you don't have latency issues, and collecting data has a performance
# impact, that while very small, can be measured under big load. Latency
-# monitoring can easily be enalbed at runtime using the command
+# monitoring can easily be enabled at runtime using the command
# "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
latency-monitor-threshold 0
-############################# Event notification ##############################
+############################# EVENT NOTIFICATION ##############################
# Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at http://redis.io/topics/notifications
-#
+#
# For instance if keyspace events notification is enabled, and a client
# performs a DEL operation on key "foo" stored in the Database 0, two
# messages will be published via Pub/Sub:
@@ -712,8 +1013,8 @@ latency-monitor-threshold 0
# A Alias for g$lshzxe, so that the "AKE" string means all the events.
#
# The "notify-keyspace-events" takes as argument a string that is composed
-# by zero or multiple characters. The empty string means that notifications
-# are disabled at all.
+# of zero or multiple characters. The empty string means that notifications
+# are disabled.
#
# Example: to enable list and generic events, from the point of view of the
# event name, use:
@@ -738,14 +1039,39 @@ notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
-# Similarly to hashes, small lists are also encoded in a special way in order
-# to save a lot of space. The special representation is only used when
-# you are under the following limits:
-list-max-ziplist-entries 512
-list-max-ziplist-value 64
+# Lists are also encoded in a special way to save a lot of space.
+# The number of entries allowed per internal list node can be specified
+# as a fixed maximum size or a maximum number of elements.
+# For a fixed maximum size, use -5 through -1, meaning:
+# -5: max size: 64 Kb <-- not recommended for normal workloads
+# -4: max size: 32 Kb <-- not recommended
+# -3: max size: 16 Kb <-- probably not recommended
+# -2: max size: 8 Kb <-- good
+# -1: max size: 4 Kb <-- good
+# Positive numbers mean store up to _exactly_ that number of elements
+# per list node.
+# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size),
+# but if your use case is unique, adjust the settings as necessary.
+list-max-ziplist-size -2
+
+# Lists may also be compressed.
+# Compress depth is the number of quicklist ziplist nodes from *each* side of
+# the list to *exclude* from compression. The head and tail of the list
+# are always uncompressed for fast push/pop operations. Settings are:
+# 0: disable all list compression
+# 1: depth 1 means "don't start compressing until after 1 node into the list,
+# going from either the head or tail"
+# So: [head]->node->node->...->node->[tail]
+# [head], [tail] will always be uncompressed; inner nodes will compress.
+# 2: [head]->[next]->node->node->...->node->[prev]->[tail]
+# 2 here means: don't compress head or head->next or tail->prev or tail,
+# but compress all nodes between them.
+# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail]
+# etc.
+list-compress-depth 0
# Sets have a special encoding in just one case: when a set is composed
-# of just strings that happens to be integers in radix 10 in the range
+# of just strings that happen to be integers in radix 10 in the range
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
@@ -763,7 +1089,7 @@ zset-max-ziplist-value 64
#
# A value greater than 16000 is totally useless, since at that point the
# dense representation is more memory efficient.
-#
+#
# The suggested value is ~ 3000 in order to have the benefits of
# the space efficient encoding without slowing down too much PFADD,
# which is O(N) with the sparse encoding. The value can be raised to
@@ -778,13 +1104,13 @@ hll-sparse-max-bytes 3000
# that is rehashing, the more rehashing "steps" are performed, so if the
# server is idle the rehashing is never complete and some more memory is used
# by the hash table.
-#
+#
# The default is to use this millisecond 10 times every second in order to
-# active rehashing the main dictionaries, freeing memory when possible.
+# actively rehash the main dictionaries, freeing memory when possible.
#
# If unsure:
# use "activerehashing no" if you have hard latency requirements and it is
-# not a good thing in your environment that Redis can reply form time to time
+# not a good thing in your environment that Redis can reply from time to time
# to queries with 2 milliseconds delay.
#
# use "activerehashing yes" if you don't have such hard requirements but
@@ -833,7 +1159,7 @@ client-output-buffer-limit pubsub 32mb 8mb 60
# never requested, and so forth.
#
# Not all tasks are performed with the same frequency, but Redis checks for
-# tasks to perform accordingly to the specified "hz" value.
+# tasks to perform according to the specified "hz" value.
#
# By default "hz" is set to 10. Raising the value will use more CPU when
# Redis is idle, but at the same time will make Redis more responsive when
@@ -851,3 +1177,117 @@ hz 10
# big latency spikes.
aof-rewrite-incremental-fsync yes
+# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good
+# idea to start with the default settings and only change them after investigating
+# how to improve the performances and how the keys LFU change over time, which
+# is possible to inspect via the OBJECT FREQ command.
+#
+# There are two tunable parameters in the Redis LFU implementation: the
+# counter logarithm factor and the counter decay time. It is important to
+# understand what the two parameters mean before changing them.
+#
+# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis
+# uses a probabilistic increment with logarithmic behavior. Given the value
+# of the old counter, when a key is accessed, the counter is incremented in
+# this way:
+#
+# 1. A random number R between 0 and 1 is extracted.
+# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1).
+# 3. The counter is incremented only if R < P.
+#
+# The default lfu-log-factor is 10. This is a table of how the frequency
+# counter changes with a different number of accesses with different
+# logarithmic factors:
+#
+# +--------+------------+------------+------------+------------+------------+
+# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits |
+# +--------+------------+------------+------------+------------+------------+
+# | 0 | 104 | 255 | 255 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 1 | 18 | 49 | 255 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 10 | 10 | 18 | 142 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 100 | 8 | 11 | 49 | 143 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+#
+# NOTE: The above table was obtained by running the following commands:
+#
+# redis-benchmark -n 1000000 incr foo
+# redis-cli object freq foo
+#
+# NOTE 2: The counter initial value is 5 in order to give new objects a chance
+# to accumulate hits.
+#
+# The counter decay time is the time, in minutes, that must elapse in order
+# for the key counter to be divided by two (or decremented if it has a value
+# less <= 10).
+#
+# The default value for the lfu-decay-time is 1. A Special value of 0 means to
+# decay the counter every time it happens to be scanned.
+#
+# lfu-log-factor 10
+# lfu-decay-time 1
+
+########################### ACTIVE DEFRAGMENTATION #######################
+#
+# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested
+# even in production and manually tested by multiple engineers for some
+# time.
+#
+# What is active defragmentation?
+# -------------------------------
+#
+# Active (online) defragmentation allows a Redis server to compact the
+# spaces left between small allocations and deallocations of data in memory,
+# thus allowing to reclaim back memory.
+#
+# Fragmentation is a natural process that happens with every allocator (but
+# less so with Jemalloc, fortunately) and certain workloads. Normally a server
+# restart is needed in order to lower the fragmentation, or at least to flush
+# away all the data and create it again. However thanks to this feature
+# implemented by Oran Agra for Redis 4.0 this process can happen at runtime
+# in an "hot" way, while the server is running.
+#
+# Basically when the fragmentation is over a certain level (see the
+# configuration options below) Redis will start to create new copies of the
+# values in contiguous memory regions by exploiting certain specific Jemalloc
+# features (in order to understand if an allocation is causing fragmentation
+# and to allocate it in a better place), and at the same time, will release the
+# old copies of the data. This process, repeated incrementally for all the keys
+# will cause the fragmentation to drop back to normal values.
+#
+# Important things to understand:
+#
+# 1. This feature is disabled by default, and only works if you compiled Redis
+# to use the copy of Jemalloc we ship with the source code of Redis.
+# This is the default with Linux builds.
+#
+# 2. You never need to enable this feature if you don't have fragmentation
+# issues.
+#
+# 3. Once you experience fragmentation, you can enable this feature when
+# needed with the command "CONFIG SET activedefrag yes".
+#
+# The configuration parameters are able to fine tune the behavior of the
+# defragmentation process. If you are not sure about what they mean it is
+# a good idea to leave the defaults untouched.
+
+# Enabled active defragmentation
+# activedefrag yes
+
+# Minimum amount of fragmentation waste to start active defrag
+# active-defrag-ignore-bytes 100mb
+
+# Minimum percentage of fragmentation to start active defrag
+# active-defrag-threshold-lower 10
+
+# Maximum percentage of fragmentation at which we use maximum effort
+# active-defrag-threshold-upper 100
+
+# Minimal effort for defrag in CPU percentage
+# active-defrag-cycle-min 25
+
+# Maximal effort for defrag in CPU percentage
+# active-defrag-cycle-max 75
+
diff --git a/sentinel.conf b/sentinel.conf
index 2384e9bc7..0e1b266ed 100644
--- a/sentinel.conf
+++ b/sentinel.conf
@@ -1,13 +1,51 @@
# Example sentinel.conf
+# *** IMPORTANT ***
+#
+# By default Sentinel will not be reachable from interfaces different than
+# localhost, either use the 'bind' directive to bind to a list of network
+# interfaces, or disable protected mode with "protected-mode no" by
+# adding it to this configuration file.
+#
+# Before doing that MAKE SURE the instance is protected from the outside
+# world via firewalling or other means.
+#
+# For example you may use one of the following:
+#
+# bind 127.0.0.1 192.168.1.1
+#
+# protected-mode no
+
# port <sentinel-port>
# The port that this sentinel instance will run on
port 26379
+# sentinel announce-ip <ip>
+# sentinel announce-port <port>
+#
+# The above two configuration directives are useful in environments where,
+# because of NAT, Sentinel is reachable from outside via a non-local address.
+#
+# When announce-ip is provided, the Sentinel will claim the specified IP address
+# in HELLO messages used to gossip its presence, instead of auto-detecting the
+# local address as it usually does.
+#
+# Similarly when announce-port is provided and is valid and non-zero, Sentinel
+# will announce the specified TCP port.
+#
+# The two options don't need to be used together, if only announce-ip is
+# provided, the Sentinel will announce the specified IP and the server port
+# as specified by the "port" option. If only announce-port is provided, the
+# Sentinel will announce the auto-detected local IP and the specified port.
+#
+# Example:
+#
+# sentinel announce-ip 1.2.3.4
+
# dir <working-directory>
# Every long running process should have a well-defined working directory.
# For Redis Sentinel to chdir to /tmp at startup is the simplest thing
-# for the process to don't interferer with administrative tasks such as
+# for the process to don't interfere with administrative tasks such as
# unmounting filesystems.
dir /tmp
@@ -156,4 +194,3 @@ sentinel failover-timeout mymaster 180000
#
# sentinel client-reconfig-script mymaster /var/redis/reconfig.sh
-
diff --git a/src/Makefile b/src/Makefile
index 4ccc6d367..86e0b3fe0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,23 +14,33 @@
release_hdr := $(shell sh -c './mkreleasehdr.sh')
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
OPTIMIZATION?=-O2
DEPENDENCY_TARGETS=hiredis linenoise lua
+NODEPS:=clean distclean
# Default settings
-STD=-std=c99 -pedantic
-WARN=-Wall
+STD=-std=c99 -pedantic -DREDIS_STATIC=''
+WARN=-Wall -W -Wno-missing-field-initializers
OPT=$(OPTIMIZATION)
PREFIX?=/usr/local
INSTALL_BIN=$(PREFIX)/bin
INSTALL=install
-# Default allocator
+# Default allocator defaults to Jemalloc if it's not an ARM
+MALLOC=libc
+ifneq ($(uname_M),armv6l)
+ifneq ($(uname_M),armv7l)
ifeq ($(uname_S),Linux)
MALLOC=jemalloc
-else
- MALLOC=libc
+endif
+endif
+endif
+
+# To get ARM stack traces if Redis crashes we need a special C flag.
+ifneq (,$(findstring armv,$(uname_M)))
+ CFLAGS+=-funwind-tables
endif
# Backwards compatibility for selecting an allocator
@@ -46,6 +56,10 @@ ifeq ($(USE_JEMALLOC),yes)
MALLOC=jemalloc
endif
+ifeq ($(USE_JEMALLOC),no)
+ MALLOC=libc
+endif
+
# Override default settings if possible
-include .make-settings
@@ -56,19 +70,42 @@ DEBUG=-g -ggdb
ifeq ($(uname_S),SunOS)
# SunOS
+ ifneq ($(@@),32bit)
+ CFLAGS+= -m64
+ LDFLAGS+= -m64
+ endif
+ DEBUG=-g
+ DEBUG_FLAGS=-g
+ export CFLAGS LDFLAGS DEBUG DEBUG_FLAGS
INSTALL=cp -pf
FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6
- FINAL_LIBS+= -ldl -lnsl -lsocket -lpthread
+ FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt
else
ifeq ($(uname_S),Darwin)
- # Darwin (nothing to do)
+ # Darwin
+ FINAL_LIBS+= -ldl
+else
+ifeq ($(uname_S),AIX)
+ # AIX
+ FINAL_LDFLAGS+= -Wl,-bexpall
+ FINAL_LIBS+=-ldl -pthread -lcrypt -lbsd
+else
+ifeq ($(uname_S),OpenBSD)
+ # OpenBSD
+ FINAL_LIBS+= -lpthread
+else
+ifeq ($(uname_S),FreeBSD)
+ # FreeBSD
+ FINAL_LIBS+= -lpthread
else
# All the other OSes (notably Linux)
FINAL_LDFLAGS+= -rdynamic
- FINAL_LIBS+= -pthread
+ FINAL_LIBS+=-ldl -pthread
+endif
+endif
+endif
endif
endif
-
# Include paths to dependencies
FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src
@@ -85,7 +122,7 @@ endif
ifeq ($(MALLOC),jemalloc)
DEPENDENCY_TARGETS+= jemalloc
FINAL_CFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include
- FINAL_LIBS+= ../deps/jemalloc/lib/libjemalloc.a -ldl
+ FINAL_LIBS+= ../deps/jemalloc/lib/libjemalloc.a
endif
REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS)
@@ -107,30 +144,27 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o
REDIS_CLI_NAME=redis-cli
-REDIS_CLI_OBJ=anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
+REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
REDIS_BENCHMARK_NAME=redis-benchmark
-REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o redis-benchmark.o
-REDIS_CHECK_DUMP_NAME=redis-check-dump
-REDIS_CHECK_DUMP_OBJ=redis-check-dump.o lzf_c.o lzf_d.o crc64.o
+REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o
+REDIS_CHECK_RDB_NAME=redis-check-rdb
REDIS_CHECK_AOF_NAME=redis-check-aof
-REDIS_CHECK_AOF_OBJ=redis-check-aof.o
-all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME)
+all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME)
@echo ""
- @echo "Hint: To run 'make test' is a good idea ;)"
+ @echo "Hint: It's a good idea to run 'make test' ;)"
@echo ""
-.PHONY: all
-
-# Deps (use make dep to generate this)
-include Makefile.dep
+Makefile.dep:
+ -$(REDIS_CC) -MM *.c > Makefile.dep 2> /dev/null || true
-dep:
- $(REDIS_CC) -MM *.c > Makefile.dep
+ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS))))
+-include Makefile.dep
+endif
-.PHONY: dep
+.PHONY: all
persist-settings: distclean
echo STD=$(STD) >> .make-settings
@@ -168,6 +202,14 @@ $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)
+# redis-check-rdb
+$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME)
+
+# redis-check-aof
+$(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME)
+
# redis-cli
$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS)
@@ -176,13 +218,8 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a $(FINAL_LIBS)
-# redis-check-dump
-$(REDIS_CHECK_DUMP_NAME): $(REDIS_CHECK_DUMP_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
-
-# redis-check-aof
-$(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
+dict-benchmark: dict.c zmalloc.c sds.c siphash.c
+ $(REDIS_CC) $(FINAL_CFLAGS) $^ -D DICT_BENCHMARK_MAIN -o $@ $(FINAL_LIBS)
# Because the jemalloc.h header is generated as a part of the jemalloc build,
# building it should complete before building any other object. Instead of
@@ -191,7 +228,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_CC) -c $<
clean:
- rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
+ rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep dict-benchmark
.PHONY: clean
@@ -215,6 +252,10 @@ lcov:
@geninfo -o redis.info .
@genhtml --legend -o lcov-html redis.info
+test-sds: sds.c sds.h
+ $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN $(FINAL_LIBS) -o /tmp/sds_test
+ /tmp/sds_test
+
.PHONY: lcov
bench: $(REDIS_BENCHMARK_NAME)
@@ -235,6 +276,9 @@ noopt:
valgrind:
$(MAKE) OPTIMIZATION="-O0" MALLOC="libc"
+helgrind:
+ $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" CFLAGS="-D__ATOMIC_VAR_FORCE_SYNC_MACROS"
+
src/help.h:
@../utils/generate-command-help.rb > help.h
@@ -243,5 +287,6 @@ install: all
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_BENCHMARK_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CLI_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_DUMP_NAME) $(INSTALL_BIN)
+ $(REDIS_INSTALL) $(REDIS_CHECK_RDB_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CHECK_AOF_NAME) $(INSTALL_BIN)
+ @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
diff --git a/src/Makefile.dep b/src/Makefile.dep
deleted file mode 100644
index 33e89137d..000000000
--- a/src/Makefile.dep
+++ /dev/null
@@ -1,142 +0,0 @@
-adlist.o: adlist.c adlist.h zmalloc.h
-ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c ae_epoll.c ae_select.c ae_evport.c
-ae_epoll.o: ae_epoll.c
-ae_evport.o: ae_evport.c
-ae_kqueue.o: ae_kqueue.c
-ae_select.o: ae_select.c
-anet.o: anet.c fmacros.h anet.h
-aof.o: aof.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- bio.h
-bio.o: bio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- bio.h
-bitops.o: bitops.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-blocked.o: blocked.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-cluster.o: cluster.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h endianconv.h
-config.o: config.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h
-crc16.o: crc16.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-crc64.o: crc64.c
-db.o: db.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h
-debug.o: debug.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- sha1.h crc64.h bio.h
-dict.o: dict.c fmacros.h dict.h zmalloc.h redisassert.h
-endianconv.o: endianconv.c
-hyperloglog.o: hyperloglog.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-intset.o: intset.c intset.h zmalloc.h endianconv.h config.h
-latency.o: latency.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-lzf_c.o: lzf_c.c lzfP.h
-lzf_d.o: lzf_d.c lzfP.h
-memtest.o: memtest.c config.h
-multi.o: multi.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-networking.o: networking.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-notify.o: notify.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-object.o: object.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-pqsort.o: pqsort.c
-pubsub.o: pubsub.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-rand.o: rand.c
-rdb.o: rdb.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- lzf.h zipmap.h endianconv.h
-redis-benchmark.o: redis-benchmark.c fmacros.h ae.h \
- ../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h
-redis-check-aof.o: redis-check-aof.c fmacros.h config.h
-redis-check-dump.o: redis-check-dump.c lzf.h crc64.h
-redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \
- sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h anet.h ae.h
-redis.o: redis.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h slowlog.h bio.h asciilogo.h
-release.o: release.c release.h version.h crc64.h
-replication.o: replication.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-rio.o: rio.c fmacros.h rio.h sds.h util.h crc64.h config.h redis.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h dict.h adlist.h \
- zmalloc.h anet.h ziplist.h intset.h version.h latency.h sparkline.h \
- rdb.h
-scripting.o: scripting.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- sha1.h rand.h ../deps/lua/src/lauxlib.h ../deps/lua/src/lua.h \
- ../deps/lua/src/lualib.h
-sds.o: sds.c sds.h zmalloc.h
-sentinel.o: sentinel.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- ../deps/hiredis/hiredis.h ../deps/hiredis/async.h \
- ../deps/hiredis/hiredis.h
-setproctitle.o: setproctitle.c
-sha1.o: sha1.c sha1.h config.h
-slowlog.o: slowlog.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- slowlog.h
-sort.o: sort.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- pqsort.h
-sparkline.o: sparkline.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-syncio.o: syncio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_hash.o: t_hash.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_list.o: t_list.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_set.o: t_set.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_string.o: t_string.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_zset.o: t_zset.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-util.o: util.c fmacros.h util.h sds.h
-ziplist.o: ziplist.c zmalloc.h util.h sds.h ziplist.h endianconv.h \
- config.h redisassert.h
-zipmap.o: zipmap.c zmalloc.h endianconv.h config.h
-zmalloc.o: zmalloc.c config.h zmalloc.h
diff --git a/src/adlist.c b/src/adlist.c
index b4dba420f..e87d25cee 100644
--- a/src/adlist.c
+++ b/src/adlist.c
@@ -52,10 +52,8 @@ list *listCreate(void)
return list;
}
-/* Free the whole list.
- *
- * This function can't fail. */
-void listRelease(list *list)
+/* Remove all the elements from the list without destroying the list itself. */
+void listEmpty(list *list)
{
unsigned long len;
listNode *current, *next;
@@ -68,10 +66,20 @@ void listRelease(list *list)
zfree(current);
current = next;
}
+ list->head = list->tail = NULL;
+ list->len = 0;
+}
+
+/* Free the whole list.
+ *
+ * This function can't fail. */
+void listRelease(list *list)
+{
+ listEmpty(list);
zfree(list);
}
-/* Add a new node to the list, to head, contaning the specified 'value'
+/* Add a new node to the list, to head, containing the specified 'value'
* pointer as value.
*
* On error, NULL is returned and no operation is performed (i.e. the
@@ -242,7 +250,7 @@ listNode *listNext(listIter *iter)
list *listDup(list *orig)
{
list *copy;
- listIter *iter;
+ listIter iter;
listNode *node;
if ((copy = listCreate()) == NULL)
@@ -250,26 +258,23 @@ list *listDup(list *orig)
copy->dup = orig->dup;
copy->free = orig->free;
copy->match = orig->match;
- iter = listGetIterator(orig, AL_START_HEAD);
- while((node = listNext(iter)) != NULL) {
+ listRewind(orig, &iter);
+ while((node = listNext(&iter)) != NULL) {
void *value;
if (copy->dup) {
value = copy->dup(node->value);
if (value == NULL) {
listRelease(copy);
- listReleaseIterator(iter);
return NULL;
}
} else
value = node->value;
if (listAddNodeTail(copy, value) == NULL) {
listRelease(copy);
- listReleaseIterator(iter);
return NULL;
}
}
- listReleaseIterator(iter);
return copy;
}
@@ -284,24 +289,21 @@ list *listDup(list *orig)
* NULL is returned. */
listNode *listSearchKey(list *list, void *key)
{
- listIter *iter;
+ listIter iter;
listNode *node;
- iter = listGetIterator(list, AL_START_HEAD);
- while((node = listNext(iter)) != NULL) {
+ listRewind(list, &iter);
+ while((node = listNext(&iter)) != NULL) {
if (list->match) {
if (list->match(node->value, key)) {
- listReleaseIterator(iter);
return node;
}
} else {
if (key == node->value) {
- listReleaseIterator(iter);
return node;
}
}
}
- listReleaseIterator(iter);
return NULL;
}
@@ -339,3 +341,22 @@ void listRotate(list *list) {
tail->next = list->head;
list->head = tail;
}
+
+/* Add all the elements of the list 'o' at the end of the
+ * list 'l'. The list 'other' remains empty but otherwise valid. */
+void listJoin(list *l, list *o) {
+ if (o->head)
+ o->head->prev = l->tail;
+
+ if (l->tail)
+ l->tail->next = o->head;
+ else
+ l->head = o->head;
+
+ l->tail = o->tail;
+ l->len += o->len;
+
+ /* Setup other as an empty list. */
+ o->head = o->tail = NULL;
+ o->len = 0;
+}
diff --git a/src/adlist.h b/src/adlist.h
index be322552f..c954fac87 100644
--- a/src/adlist.h
+++ b/src/adlist.h
@@ -72,6 +72,7 @@ typedef struct list {
/* Prototypes */
list *listCreate(void);
void listRelease(list *list);
+void listEmpty(list *list);
list *listAddNodeHead(list *list, void *value);
list *listAddNodeTail(list *list, void *value);
list *listInsertNode(list *list, listNode *old_node, void *value, int after);
@@ -85,6 +86,7 @@ listNode *listIndex(list *list, long index);
void listRewind(list *list, listIter *li);
void listRewindTail(list *list, listIter *li);
void listRotate(list *list);
+void listJoin(list *l, list *o);
/* Directions for iterators */
#define AL_START_HEAD 0
diff --git a/src/ae.c b/src/ae.c
index 164f8fdeb..742388d85 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -75,6 +75,7 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
eventLoop->stop = 0;
eventLoop->maxfd = -1;
eventLoop->beforesleep = NULL;
+ eventLoop->aftersleep = NULL;
if (aeApiCreate(eventLoop) == -1) goto err;
/* Events with mask == AE_NONE are not set. So let's initialize the
* vector with it. */
@@ -156,8 +157,9 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
{
if (fd >= eventLoop->setsize) return;
aeFileEvent *fe = &eventLoop->events[fd];
-
if (fe->mask == AE_NONE) return;
+
+ aeApiDelEvent(eventLoop, fd, mask);
fe->mask = fe->mask & (~mask);
if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
/* Update the max fd */
@@ -167,7 +169,6 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
if (eventLoop->events[j].mask != AE_NONE) break;
eventLoop->maxfd = j;
}
- aeApiDelEvent(eventLoop, fd, mask);
}
int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
@@ -221,21 +222,12 @@ long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
{
- aeTimeEvent *te, *prev = NULL;
-
- te = eventLoop->timeEventHead;
+ aeTimeEvent *te = eventLoop->timeEventHead;
while(te) {
if (te->id == id) {
- if (prev == NULL)
- eventLoop->timeEventHead = te->next;
- else
- prev->next = te->next;
- if (te->finalizerProc)
- te->finalizerProc(eventLoop, te->clientData);
- zfree(te);
+ te->id = AE_DELETED_EVENT_ID;
return AE_OK;
}
- prev = te;
te = te->next;
}
return AE_ERR; /* NO event with the specified ID found */
@@ -270,7 +262,7 @@ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
/* Process time events */
static int processTimeEvents(aeEventLoop *eventLoop) {
int processed = 0;
- aeTimeEvent *te;
+ aeTimeEvent *te, *prev;
long long maxId;
time_t now = time(NULL);
@@ -291,12 +283,32 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
}
eventLoop->lastTime = now;
+ prev = NULL;
te = eventLoop->timeEventHead;
maxId = eventLoop->timeEventNextId-1;
while(te) {
long now_sec, now_ms;
long long id;
+ /* Remove events scheduled for deletion. */
+ if (te->id == AE_DELETED_EVENT_ID) {
+ aeTimeEvent *next = te->next;
+ if (prev == NULL)
+ eventLoop->timeEventHead = te->next;
+ else
+ prev->next = te->next;
+ if (te->finalizerProc)
+ te->finalizerProc(eventLoop, te->clientData);
+ zfree(te);
+ te = next;
+ continue;
+ }
+
+ /* Make sure we don't process time events created by time events in
+ * this iteration. Note that this check is currently useless: we always
+ * add new timers on the head, however if we change the implementation
+ * detail, this check may be useful again: we keep it here for future
+ * defense. */
if (te->id > maxId) {
te = te->next;
continue;
@@ -310,28 +322,14 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
id = te->id;
retval = te->timeProc(eventLoop, id, te->clientData);
processed++;
- /* After an event is processed our time event list may
- * no longer be the same, so we restart from head.
- * Still we make sure to don't process events registered
- * by event handlers itself in order to don't loop forever.
- * To do so we saved the max ID we want to handle.
- *
- * FUTURE OPTIMIZATIONS:
- * Note that this is NOT great algorithmically. Redis uses
- * a single time event so it's not a problem but the right
- * way to do this is to add the new elements on head, and
- * to flag deleted elements in a special way for later
- * deletion (putting references to the nodes to delete into
- * another linked list). */
if (retval != AE_NOMORE) {
aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
} else {
- aeDeleteTimeEvent(eventLoop, id);
+ te->id = AE_DELETED_EVENT_ID;
}
- te = eventLoop->timeEventHead;
- } else {
- te = te->next;
}
+ prev = te;
+ te = te->next;
}
return processed;
}
@@ -346,6 +344,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
* if flags has AE_FILE_EVENTS set, file events are processed.
* if flags has AE_TIME_EVENTS set, time events are processed.
* if flags has AE_DONT_WAIT set the function returns ASAP until all
+ * if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
* the events that's possible to process without to wait are processed.
*
* The function returns the number of events processed. */
@@ -371,19 +370,22 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
if (shortest) {
long now_sec, now_ms;
- /* Calculate the time missing for the nearest
- * timer to fire. */
aeGetTime(&now_sec, &now_ms);
tvp = &tv;
- tvp->tv_sec = shortest->when_sec - now_sec;
- if (shortest->when_ms < now_ms) {
- tvp->tv_usec = ((shortest->when_ms+1000) - now_ms)*1000;
- tvp->tv_sec --;
+
+ /* How many milliseconds we need to wait for the next
+ * time event to fire? */
+ long long ms =
+ (shortest->when_sec - now_sec)*1000 +
+ shortest->when_ms - now_ms;
+
+ if (ms > 0) {
+ tvp->tv_sec = ms/1000;
+ tvp->tv_usec = (ms % 1000)*1000;
} else {
- tvp->tv_usec = (shortest->when_ms - now_ms)*1000;
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
}
- if (tvp->tv_sec < 0) tvp->tv_sec = 0;
- if (tvp->tv_usec < 0) tvp->tv_usec = 0;
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
@@ -397,7 +399,14 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
}
}
+ /* Call the multiplexing API, will return only on timeout or when
+ * some event fires. */
numevents = aeApiPoll(eventLoop, tvp);
+
+ /* After sleep callback. */
+ if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
+ eventLoop->aftersleep(eventLoop);
+
for (j = 0; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;
@@ -452,7 +461,7 @@ void aeMain(aeEventLoop *eventLoop) {
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
eventLoop->beforesleep(eventLoop);
- aeProcessEvents(eventLoop, AE_ALL_EVENTS);
+ aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
@@ -463,3 +472,7 @@ char *aeGetApiName(void) {
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
eventLoop->beforesleep = beforesleep;
}
+
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
+ eventLoop->aftersleep = aftersleep;
+}
diff --git a/src/ae.h b/src/ae.h
index 15ca1b5e7..c49bfe233 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -33,6 +33,8 @@
#ifndef __AE_H__
#define __AE_H__
+#include <time.h>
+
#define AE_OK 0
#define AE_ERR -1
@@ -44,8 +46,10 @@
#define AE_TIME_EVENTS 2
#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS)
#define AE_DONT_WAIT 4
+#define AE_CALL_AFTER_SLEEP 8
#define AE_NOMORE -1
+#define AE_DELETED_EVENT_ID -1
/* Macros */
#define AE_NOTUSED(V) ((void) V)
@@ -95,6 +99,7 @@ typedef struct aeEventLoop {
int stop;
void *apidata; /* This is used for polling API specific data */
aeBeforeSleepProc *beforesleep;
+ aeBeforeSleepProc *aftersleep;
} aeEventLoop;
/* Prototypes */
@@ -114,6 +119,7 @@ int aeWait(int fd, int mask, long long milliseconds);
void aeMain(aeEventLoop *eventLoop);
char *aeGetApiName(void);
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
int aeGetSetSize(aeEventLoop *eventLoop);
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index da9c7b906..410aac70d 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -72,7 +72,7 @@ static void aeApiFree(aeEventLoop *eventLoop) {
static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
aeApiState *state = eventLoop->apidata;
- struct epoll_event ee;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
/* If the fd was already monitored for some event, we need a MOD
* operation. Otherwise we need an ADD operation. */
int op = eventLoop->events[fd].mask == AE_NONE ?
@@ -82,7 +82,6 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
mask |= eventLoop->events[fd].mask; /* Merge old events */
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
- ee.data.u64 = 0; /* avoid valgrind warning */
ee.data.fd = fd;
if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
return 0;
@@ -90,13 +89,12 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
aeApiState *state = eventLoop->apidata;
- struct epoll_event ee;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
int mask = eventLoop->events[fd].mask & (~delmask);
ee.events = 0;
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
- ee.data.u64 = 0; /* avoid valgrind warning */
ee.data.fd = fd;
if (mask != AE_NONE) {
epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
diff --git a/src/ae_select.c b/src/ae_select.c
index e2b7a9e8a..c039a8ea3 100644
--- a/src/ae_select.c
+++ b/src/ae_select.c
@@ -29,6 +29,7 @@
*/
+#include <sys/select.h>
#include <string.h>
typedef struct aeApiState {
diff --git a/src/anet.c b/src/anet.c
index 87cc3ea25..53a56b0d2 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -34,6 +34,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
+#include <sys/time.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
@@ -57,24 +58,37 @@ static void anetSetError(char *err, const char *fmt, ...)
va_end(ap);
}
-int anetNonBlock(char *err, int fd)
-{
+int anetSetBlock(char *err, int fd, int non_block) {
int flags;
- /* Set the socket non-blocking.
+ /* Set the socket blocking (if non_block is zero) or non-blocking.
* Note that fcntl(2) for F_GETFL and F_SETFL can't be
* interrupted by a signal. */
if ((flags = fcntl(fd, F_GETFL)) == -1) {
anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno));
return ANET_ERR;
}
- if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
+
+ if (non_block)
+ flags |= O_NONBLOCK;
+ else
+ flags &= ~O_NONBLOCK;
+
+ if (fcntl(fd, F_SETFL, flags) == -1) {
anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
+int anetNonBlock(char *err, int fd) {
+ return anetSetBlock(err,fd,1);
+}
+
+int anetBlock(char *err, int fd) {
+ return anetSetBlock(err,fd,0);
+}
+
/* Set TCP keep alive option to detect dead peers. The interval option
* is only used for Linux as we are using Linux-specific APIs to set
* the probe send time, interval, and count. */
@@ -117,6 +131,8 @@ int anetKeepAlive(char *err, int fd, int interval)
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
return ANET_ERR;
}
+#else
+ ((void) interval); /* Avoid unused var warning for non Linux systems. */
#endif
return ANET_OK;
@@ -163,6 +179,20 @@ int anetTcpKeepAlive(char *err, int fd)
return ANET_OK;
}
+/* Set the socket send timeout (SO_SNDTIMEO socket option) to the specified
+ * number of milliseconds, or disable it if the 'ms' argument is zero. */
+int anetSendTimeout(char *err, int fd, long long ms) {
+ struct timeval tv;
+
+ tv.tv_sec = ms/1000;
+ tv.tv_usec = (ms%1000)*1000;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
+ anetSetError(err, "setsockopt SO_SNDTIMEO: %s", strerror(errno));
+ return ANET_ERR;
+ }
+ return ANET_OK;
+}
+
/* anetGenericResolve() is called by anetResolve() and anetResolveIP() to
* do the actual work. It resolves the hostname "host" and set the string
* representation of the IP address into the buffer pointed by "ipbuf".
@@ -234,6 +264,7 @@ static int anetCreateSocket(char *err, int domain) {
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
+#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
static int anetTcpGenericConnect(char *err, char *addr, int port,
char *source_addr, int flags)
{
@@ -262,9 +293,10 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
if (source_addr) {
int bound = 0;
/* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */
- if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) {
+ if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0)
+ {
anetSetError(err, "%s", gai_strerror(rv));
- goto end;
+ goto error;
}
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
@@ -272,9 +304,10 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
break;
}
}
+ freeaddrinfo(bservinfo);
if (!bound) {
anetSetError(err, "bind: %s", strerror(errno));
- goto end;
+ goto error;
}
}
if (connect(s,p->ai_addr,p->ai_addrlen) == -1) {
@@ -299,9 +332,17 @@ error:
close(s);
s = ANET_ERR;
}
+
end:
freeaddrinfo(servinfo);
- return s;
+
+ /* Handle best effort binding: if a binding address was used, but it is
+ * not possible to create a socket, try again without a binding address. */
+ if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) {
+ return anetTcpGenericConnect(err,addr,port,NULL,flags);
+ } else {
+ return s;
+ }
}
int anetTcpConnect(char *err, char *addr, int port)
@@ -314,9 +355,18 @@ int anetTcpNonBlockConnect(char *err, char *addr, int port)
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
}
-int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr)
+int anetTcpNonBlockBindConnect(char *err, char *addr, int port,
+ char *source_addr)
+{
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK);
+}
+
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port,
+ char *source_addr)
{
- return anetTcpGenericConnect(err,addr,port,source_addr,ANET_CONNECT_NONBLOCK);
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
}
int anetUnixGenericConnect(char *err, char *path, int flags)
@@ -330,8 +380,10 @@ int anetUnixGenericConnect(char *err, char *path, int flags)
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (flags & ANET_CONNECT_NONBLOCK) {
- if (anetNonBlock(err,s) != ANET_OK)
+ if (anetNonBlock(err,s) != ANET_OK) {
+ close(s);
return ANET_ERR;
+ }
}
if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) {
if (errno == EINPROGRESS &&
@@ -359,7 +411,7 @@ int anetUnixNonBlockConnect(char *err, char *path)
* (unless error or EOF condition is encountered) */
int anetRead(int fd, char *buf, int count)
{
- int nread, totlen = 0;
+ ssize_t nread, totlen = 0;
while(totlen != count) {
nread = read(fd,buf,count-totlen);
if (nread == 0) return totlen;
@@ -370,11 +422,11 @@ int anetRead(int fd, char *buf, int count)
return totlen;
}
-/* Like write(2) but make sure 'count' is read before to return
+/* Like write(2) but make sure 'count' is written before to return
* (unless error is encountered) */
int anetWrite(int fd, char *buf, int count)
{
- int nwritten, totlen = 0;
+ ssize_t nwritten, totlen = 0;
while(totlen != count) {
nwritten = write(fd,buf,count-totlen);
if (nwritten == 0) return totlen;
@@ -412,7 +464,7 @@ static int anetV6Only(char *err, int s) {
static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog)
{
- int s, rv;
+ int s = -1, rv;
char _port[6]; /* strlen("65535") */
struct addrinfo hints, *servinfo, *p;
@@ -436,11 +488,12 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl
goto end;
}
if (p == NULL) {
- anetSetError(err, "unable to bind socket");
+ anetSetError(err, "unable to bind socket, errno: %d", errno);
goto error;
}
error:
+ if (s != -1) close(s);
s = ANET_ERR;
end:
freeaddrinfo(servinfo);
@@ -525,22 +578,53 @@ int anetPeerToString(int fd, char *ip, size_t ip_len, int *port) {
struct sockaddr_storage sa;
socklen_t salen = sizeof(sa);
- if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) {
- if (port) *port = 0;
- ip[0] = '?';
- ip[1] = '\0';
- return -1;
- }
+ if (getpeername(fd,(struct sockaddr*)&sa,&salen) == -1) goto error;
+ if (ip_len == 0) goto error;
+
if (sa.ss_family == AF_INET) {
struct sockaddr_in *s = (struct sockaddr_in *)&sa;
if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len);
if (port) *port = ntohs(s->sin_port);
- } else {
+ } else if (sa.ss_family == AF_INET6) {
struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa;
if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len);
if (port) *port = ntohs(s->sin6_port);
+ } else if (sa.ss_family == AF_UNIX) {
+ if (ip) strncpy(ip,"/unixsocket",ip_len);
+ if (port) *port = 0;
+ } else {
+ goto error;
}
return 0;
+
+error:
+ if (ip) {
+ if (ip_len >= 2) {
+ ip[0] = '?';
+ ip[1] = '\0';
+ } else if (ip_len == 1) {
+ ip[0] = '\0';
+ }
+ }
+ if (port) *port = 0;
+ return -1;
+}
+
+/* Format an IP,port pair into something easy to parse. If IP is IPv6
+ * (matches for ":"), the ip is surrounded by []. IP and port are just
+ * separated by colons. This the standard to display addresses within Redis. */
+int anetFormatAddr(char *buf, size_t buf_len, char *ip, int port) {
+ return snprintf(buf,buf_len, strchr(ip,':') ?
+ "[%s]:%d" : "%s:%d", ip, port);
+}
+
+/* Like anetFormatAddr() but extract ip and port from the socket's peer. */
+int anetFormatPeer(int fd, char *buf, size_t buf_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetPeerToString(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(buf, buf_len, ip, port);
}
int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
@@ -564,3 +648,11 @@ int anetSockName(int fd, char *ip, size_t ip_len, int *port) {
}
return 0;
}
+
+int anetFormatSock(int fd, char *fmt, size_t fmt_len) {
+ char ip[INET6_ADDRSTRLEN];
+ int port;
+
+ anetSockName(fd,ip,sizeof(ip),&port);
+ return anetFormatAddr(fmt, fmt_len, ip, port);
+}
diff --git a/src/anet.h b/src/anet.h
index c4659cd35..7142f78d2 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -31,6 +31,8 @@
#ifndef ANET_H
#define ANET_H
+#include <sys/types.h>
+
#define ANET_OK 0
#define ANET_ERR -1
#define ANET_ERR_LEN 256
@@ -39,13 +41,18 @@
#define ANET_NONE 0
#define ANET_IP_ONLY (1<<0)
-#if defined(__sun)
+#if defined(__sun) || defined(_AIX)
#define AF_LOCAL AF_UNIX
#endif
+#ifdef _AIX
+#undef ip_len
+#endif
+
int anetTcpConnect(char *err, char *addr, int port);
int anetTcpNonBlockConnect(char *err, char *addr, int port);
int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr);
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port, char *source_addr);
int anetUnixConnect(char *err, char *path);
int anetUnixNonBlockConnect(char *err, char *path);
int anetRead(int fd, char *buf, int count);
@@ -58,11 +65,16 @@ int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port)
int anetUnixAccept(char *err, int serversock);
int anetWrite(int fd, char *buf, int count);
int anetNonBlock(char *err, int fd);
+int anetBlock(char *err, int fd);
int anetEnableTcpNoDelay(char *err, int fd);
int anetDisableTcpNoDelay(char *err, int fd);
int anetTcpKeepAlive(char *err, int fd);
+int anetSendTimeout(char *err, int fd, long long ms);
int anetPeerToString(int fd, char *ip, size_t ip_len, int *port);
int anetKeepAlive(char *err, int fd, int interval);
int anetSockName(int fd, char *ip, size_t ip_len, int *port);
+int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port);
+int anetFormatPeer(int fd, char *fmt, size_t fmt_len);
+int anetFormatSock(int fd, char *fmt, size_t fmt_len);
#endif
diff --git a/src/aof.c b/src/aof.c
index a2ef2df93..0593b2707 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "bio.h"
#include "rio.h"
@@ -38,6 +38,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
+#include <sys/param.h>
void aofUpdateCurrentSize(void);
void aofClosePipes(void);
@@ -74,7 +75,7 @@ void aofRewriteBufferReset(void) {
listSetFreeMethod(server.aof_rewrite_buf_blocks,zfree);
}
-/* Return the current size of the AOF rerwite buffer. */
+/* Return the current size of the AOF rewrite buffer. */
unsigned long aofRewriteBufferSize(void) {
listNode *ln;
listIter li;
@@ -95,6 +96,10 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
listNode *ln;
aofrwblock *block;
ssize_t nwritten;
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(privdata);
+ UNUSED(mask);
while(1) {
ln = listFirst(server.aof_rewrite_buf_blocks);
@@ -110,6 +115,7 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
if (nwritten <= 0) return;
memmove(block->buf,block->buf+nwritten,block->used-nwritten);
block->used -= nwritten;
+ block->free += nwritten;
}
if (block->used == 0) listDelNode(server.aof_rewrite_buf_blocks,ln);
}
@@ -146,9 +152,9 @@ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) {
* as a notice or warning. */
numblocks = listLength(server.aof_rewrite_buf_blocks);
if (((numblocks+1) % 10) == 0) {
- int level = ((numblocks+1) % 100) == 0 ? REDIS_WARNING :
- REDIS_NOTICE;
- redisLog(level,"Background AOF buffer size: %lu MB",
+ int level = ((numblocks+1) % 100) == 0 ? LL_WARNING :
+ LL_NOTICE;
+ serverLog(level,"Background AOF buffer size: %lu MB",
aofRewriteBufferSize()/(1024*1024));
}
}
@@ -177,7 +183,7 @@ ssize_t aofRewriteBufferWrite(int fd) {
if (block->used) {
nwritten = write(fd,block->buf,block->used);
- if (nwritten != block->used) {
+ if (nwritten != (ssize_t)block->used) {
if (nwritten == 0) errno = EIO;
return -1;
}
@@ -194,28 +200,29 @@ ssize_t aofRewriteBufferWrite(int fd) {
/* Starts a background task that performs fsync() against the specified
* file descriptor (the one of the AOF file) in another thread. */
void aof_background_fsync(int fd) {
- bioCreateBackgroundJob(REDIS_BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
+ bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
}
/* Called when the user switches from "appendonly yes" to "appendonly no"
* at runtime using the CONFIG command. */
void stopAppendOnly(void) {
- redisAssert(server.aof_state != REDIS_AOF_OFF);
+ serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
aof_fsync(server.aof_fd);
close(server.aof_fd);
server.aof_fd = -1;
server.aof_selected_db = -1;
- server.aof_state = REDIS_AOF_OFF;
+ server.aof_state = AOF_OFF;
/* rewrite operation in progress? kill it, wait child exit */
if (server.aof_child_pid != -1) {
int statloc;
- redisLog(REDIS_NOTICE,"Killing running AOF rewrite child: %ld",
+ serverLog(LL_NOTICE,"Killing running AOF rewrite child: %ld",
(long) server.aof_child_pid);
- if (kill(server.aof_child_pid,SIGUSR1) != -1)
- wait3(&statloc,0,NULL);
+ if (kill(server.aof_child_pid,SIGUSR1) != -1) {
+ while(wait3(&statloc,0,NULL) != server.aof_child_pid);
+ }
/* reset the buffer accumulating changes while the child saves */
aofRewriteBufferReset();
aofRemoveTempFile(server.aof_child_pid);
@@ -229,22 +236,34 @@ void stopAppendOnly(void) {
/* Called when the user switches from "appendonly no" to "appendonly yes"
* at runtime using the CONFIG command. */
int startAppendOnly(void) {
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+
server.aof_last_fsync = server.unixtime;
server.aof_fd = open(server.aof_filename,O_WRONLY|O_APPEND|O_CREAT,0644);
- redisAssert(server.aof_state == REDIS_AOF_OFF);
+ serverAssert(server.aof_state == AOF_OFF);
if (server.aof_fd == -1) {
- redisLog(REDIS_WARNING,"Redis needs to enable the AOF but can't open the append only file: %s",strerror(errno));
- return REDIS_ERR;
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+
+ serverLog(LL_WARNING,
+ "Redis needs to enable the AOF but can't open the "
+ "append only file %s (in server root dir %s): %s",
+ server.aof_filename,
+ cwdp ? cwdp : "unknown",
+ strerror(errno));
+ return C_ERR;
}
- if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
+ if (server.rdb_child_pid != -1) {
+ server.aof_rewrite_scheduled = 1;
+ serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible.");
+ } else if (rewriteAppendOnlyFileBackground() == C_ERR) {
close(server.aof_fd);
- redisLog(REDIS_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
- return REDIS_ERR;
+ serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
+ return C_ERR;
}
- /* We correctly switched on AOF, now wait for the rerwite to be complete
+ /* We correctly switched on AOF, now wait for the rewrite to be complete
* in order to append data on disk. */
- server.aof_state = REDIS_AOF_WAIT_REWRITE;
- return REDIS_OK;
+ server.aof_state = AOF_WAIT_REWRITE;
+ return C_OK;
}
/* Write the append only file buffer on disk.
@@ -274,7 +293,7 @@ void flushAppendOnlyFile(int force) {
if (sdslen(server.aof_buf) == 0) return;
if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
- sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0;
+ sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;
if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {
/* With this append fsync policy we do background fsyncing.
@@ -282,7 +301,7 @@ void flushAppendOnlyFile(int force) {
* the write for a couple of seconds. */
if (sync_in_progress) {
if (server.aof_flush_postponed_start == 0) {
- /* No previous write postponinig, remember that we are
+ /* No previous write postponing, remember that we are
* postponing the flush and return. */
server.aof_flush_postponed_start = server.unixtime;
return;
@@ -294,7 +313,7 @@ void flushAppendOnlyFile(int force) {
/* Otherwise fall trough, and go write since we can't wait
* over two seconds. */
server.aof_delayed_fsync++;
- redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
+ serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
}
}
/* We want to perform a single write. This should be guaranteed atomic
@@ -333,16 +352,16 @@ void flushAppendOnlyFile(int force) {
last_write_error_log = server.unixtime;
}
- /* Lof the AOF write error and record the error code. */
+ /* Log the AOF write error and record the error code. */
if (nwritten == -1) {
if (can_log) {
- redisLog(REDIS_WARNING,"Error writing to the AOF file: %s",
+ serverLog(LL_WARNING,"Error writing to the AOF file: %s",
strerror(errno));
server.aof_last_write_errno = errno;
}
} else {
if (can_log) {
- redisLog(REDIS_WARNING,"Short write while writing to "
+ serverLog(LL_WARNING,"Short write while writing to "
"the AOF file: (nwritten=%lld, "
"expected=%lld)",
(long long)nwritten,
@@ -351,13 +370,13 @@ void flushAppendOnlyFile(int force) {
if (ftruncate(server.aof_fd, server.aof_current_size) == -1) {
if (can_log) {
- redisLog(REDIS_WARNING, "Could not remove short write "
+ serverLog(LL_WARNING, "Could not remove short write "
"from the append-only file. Redis may refuse "
"to load the AOF the next time it starts. "
"ftruncate: %s", strerror(errno));
}
} else {
- /* If the ftrunacate() succeeded we can set nwritten to
+ /* If the ftruncate() succeeded we can set nwritten to
* -1 since there is no longer partial data into the AOF. */
nwritten = -1;
}
@@ -369,14 +388,14 @@ void flushAppendOnlyFile(int force) {
/* We can't recover when the fsync policy is ALWAYS since the
* reply for the client is already in the output buffers, and we
* have the contract with the user that on acknowledged write data
- * is synched on disk. */
- redisLog(REDIS_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
+ * is synced on disk. */
+ serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
exit(1);
} else {
/* Recover from failed write leaving data into the buffer. However
* set an error to stop accepting writes as long as the error
* condition is not cleared. */
- server.aof_last_write_status = REDIS_ERR;
+ server.aof_last_write_status = C_ERR;
/* Trim the sds buffer if there was a partial write, and there
* was no way to undo it with ftruncate(2). */
@@ -389,10 +408,10 @@ void flushAppendOnlyFile(int force) {
} else {
/* Successful write(2). If AOF was in error state, restore the
* OK state and log the event. */
- if (server.aof_last_write_status == REDIS_ERR) {
- redisLog(REDIS_WARNING,
+ if (server.aof_last_write_status == C_ERR) {
+ serverLog(LL_WARNING,
"AOF write error looks solved, Redis can write again.");
- server.aof_last_write_status = REDIS_OK;
+ server.aof_last_write_status = C_OK;
}
}
server.aof_current_size += nwritten;
@@ -464,7 +483,7 @@ sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, r
long long when;
robj *argv[3];
- /* Make sure we can use strtol */
+ /* Make sure we can use strtoll */
seconds = getDecodedObject(seconds);
when = strtoll(seconds->ptr,NULL,10);
/* Convert argument into milliseconds for EXPIRE, SETEX, EXPIREAT */
@@ -495,7 +514,7 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
robj *tmpargv[3];
/* The DB this command was targeting is not the same as the last command
- * we appendend. To issue a SELECT command is needed. */
+ * we appended. To issue a SELECT command is needed. */
if (dictid != server.aof_selected_db) {
char seldb[64];
@@ -517,6 +536,22 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
decrRefCount(tmpargv[0]);
buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
+ } else if (cmd->proc == setCommand && argc > 3) {
+ int i;
+ robj *exarg = NULL, *pxarg = NULL;
+ /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
+ buf = catAppendOnlyGenericCommand(buf,3,argv);
+ for (i = 3; i < argc; i ++) {
+ if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
+ if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
+ }
+ serverAssert(!(exarg && pxarg));
+ if (exarg)
+ buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],
+ exarg);
+ if (pxarg)
+ buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
+ pxarg);
} else {
/* All the other commands don't need translation or need the
* same translation already operated in the command vector
@@ -527,7 +562,7 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
/* Append to the AOF buffer. This will be flushed on disk just before
* of re-entering the event loop, so before the client will get a
* positive reply about the operation performed. */
- if (server.aof_state == REDIS_AOF_ON)
+ if (server.aof_state == AOF_ON)
server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));
/* If a background append only file rewriting is in progress we want to
@@ -546,8 +581,8 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
/* In Redis commands are always executed in the context of a client, so in
* order to load the append only file we need to create a fake client. */
-struct redisClient *createFakeClient(void) {
- struct redisClient *c = zmalloc(sizeof(*c));
+struct client *createFakeClient(void) {
+ struct client *c = zmalloc(sizeof(*c));
selectDb(c,0);
c->fd = -1;
@@ -558,10 +593,10 @@ struct redisClient *createFakeClient(void) {
c->argv = NULL;
c->bufpos = 0;
c->flags = 0;
- c->btype = REDIS_BLOCKED_NONE;
+ c->btype = BLOCKED_NONE;
/* We set the fake client as a slave waiting for the synchronization
* so that Redis will not try to send replies to this client. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
c->reply = listCreate();
c->reply_bytes = 0;
c->obuf_soft_limit_reached_time = 0;
@@ -573,7 +608,15 @@ struct redisClient *createFakeClient(void) {
return c;
}
-void freeFakeClient(struct redisClient *c) {
+void freeFakeClientArgv(struct client *c) {
+ int j;
+
+ for (j = 0; j < c->argc; j++)
+ decrRefCount(c->argv[j]);
+ zfree(c->argv);
+}
+
+void freeFakeClient(struct client *c) {
sdsfree(c->querybuf);
listRelease(c->reply);
listRelease(c->watched_keys);
@@ -581,34 +624,61 @@ void freeFakeClient(struct redisClient *c) {
zfree(c);
}
-/* Replay the append log file. On error REDIS_OK is returned. On non fatal
- * error (the append only file is zero-length) REDIS_ERR is returned. On
+/* Replay the append log file. On success C_OK is returned. On non fatal
+ * error (the append only file is zero-length) C_ERR is returned. On
* fatal error an error message is logged and the program exists. */
int loadAppendOnlyFile(char *filename) {
- struct redisClient *fakeClient;
+ struct client *fakeClient;
FILE *fp = fopen(filename,"r");
struct redis_stat sb;
int old_aof_state = server.aof_state;
long loops = 0;
+ off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */
+
+ if (fp == NULL) {
+ serverLog(LL_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
+ exit(1);
+ }
+ /* Handle a zero-length AOF file as a special case. An emtpy AOF file
+ * is a valid AOF because an empty server with AOF enabled will create
+ * a zero length file at startup, that will remain like that if no write
+ * operation is received. */
if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
server.aof_current_size = 0;
fclose(fp);
- return REDIS_ERR;
- }
-
- if (fp == NULL) {
- redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
- exit(1);
+ return C_ERR;
}
/* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
* to the same file we're about to read. */
- server.aof_state = REDIS_AOF_OFF;
+ server.aof_state = AOF_OFF;
fakeClient = createFakeClient();
startLoading(fp);
+ /* Check if this AOF file has an RDB preamble. In that case we need to
+ * load the RDB file and later continue loading the AOF tail. */
+ char sig[5]; /* "REDIS" */
+ if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
+ /* No RDB preamble, seek back at 0 offset. */
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ } else {
+ /* RDB preamble. Pass loading the RDB functions. */
+ rio rdb;
+
+ serverLog(LL_NOTICE,"Reading RDB preamble from AOF file...");
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ rioInitWithFile(&rdb,fp);
+ if (rdbLoadRio(&rdb,NULL) != C_OK) {
+ serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted");
+ goto readerr;
+ } else {
+ serverLog(LL_NOTICE,"Reading the remaining AOF tail...");
+ }
+ }
+
+ /* Read the actual AOF file, in REPL format, command by command. */
while(1) {
int argc, j;
unsigned long len;
@@ -630,64 +700,112 @@ int loadAppendOnlyFile(char *filename) {
goto readerr;
}
if (buf[0] != '*') goto fmterr;
+ if (buf[1] == '\0') goto readerr;
argc = atoi(buf+1);
if (argc < 1) goto fmterr;
argv = zmalloc(sizeof(robj*)*argc);
+ fakeClient->argc = argc;
+ fakeClient->argv = argv;
+
for (j = 0; j < argc; j++) {
- if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ fakeClient->argc = j; /* Free up to j-1. */
+ freeFakeClientArgv(fakeClient);
+ goto readerr;
+ }
if (buf[0] != '$') goto fmterr;
len = strtol(buf+1,NULL,10);
argsds = sdsnewlen(NULL,len);
- if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
- argv[j] = createObject(REDIS_STRING,argsds);
- if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
+ if (len && fread(argsds,len,1,fp) == 0) {
+ sdsfree(argsds);
+ fakeClient->argc = j; /* Free up to j-1. */
+ freeFakeClientArgv(fakeClient);
+ goto readerr;
+ }
+ argv[j] = createObject(OBJ_STRING,argsds);
+ if (fread(buf,2,1,fp) == 0) {
+ fakeClient->argc = j+1; /* Free up to j. */
+ freeFakeClientArgv(fakeClient);
+ goto readerr; /* discard CRLF */
+ }
}
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
if (!cmd) {
- redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", (char*)argv[0]->ptr);
+ serverLog(LL_WARNING,"Unknown command '%s' reading the append only file", (char*)argv[0]->ptr);
exit(1);
}
+
/* Run the command in the context of a fake client */
- fakeClient->argc = argc;
- fakeClient->argv = argv;
+ fakeClient->cmd = cmd;
cmd->proc(fakeClient);
/* The fake client should not have a reply */
- redisAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
+ serverAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
/* The fake client should never get blocked */
- redisAssert((fakeClient->flags & REDIS_BLOCKED) == 0);
+ serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0);
/* Clean up. Command code may have changed argv/argc so we use the
* argv/argc of the client instead of the local variables. */
- for (j = 0; j < fakeClient->argc; j++)
- decrRefCount(fakeClient->argv[j]);
- zfree(fakeClient->argv);
+ freeFakeClientArgv(fakeClient);
+ fakeClient->cmd = NULL;
+ if (server.aof_load_truncated) valid_up_to = ftello(fp);
}
/* This point can only be reached when EOF is reached without errors.
* If the client is in the middle of a MULTI/EXEC, log error and quit. */
- if (fakeClient->flags & REDIS_MULTI) goto readerr;
+ if (fakeClient->flags & CLIENT_MULTI) goto uxeof;
+loaded_ok: /* DB loaded, cleanup and return C_OK to the caller. */
fclose(fp);
freeFakeClient(fakeClient);
server.aof_state = old_aof_state;
stopLoading();
aofUpdateCurrentSize();
server.aof_rewrite_base_size = server.aof_current_size;
- return REDIS_OK;
+ return C_OK;
-readerr:
- if (feof(fp)) {
- redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
- } else {
- redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
+readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */
+ if (!feof(fp)) {
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
+ exit(1);
}
+
+uxeof: /* Unexpected AOF end of file. */
+ if (server.aof_load_truncated) {
+ serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file !!!");
+ serverLog(LL_WARNING,"!!! Truncating the AOF at offset %llu !!!",
+ (unsigned long long) valid_up_to);
+ if (valid_up_to == -1 || truncate(filename,valid_up_to) == -1) {
+ if (valid_up_to == -1) {
+ serverLog(LL_WARNING,"Last valid command offset is invalid");
+ } else {
+ serverLog(LL_WARNING,"Error truncating the AOF file: %s",
+ strerror(errno));
+ }
+ } else {
+ /* Make sure the AOF file descriptor points to the end of the
+ * file after the truncate call. */
+ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) {
+ serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s",
+ strerror(errno));
+ } else {
+ serverLog(LL_WARNING,
+ "AOF loaded anyway because aof-load-truncated is enabled");
+ goto loaded_ok;
+ }
+ }
+ }
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Unexpected end of file reading the append only file. You can: 1) Make a backup of your AOF file, then use ./redis-check-aof --fix <filename>. 2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.");
exit(1);
-fmterr:
- redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
+
+fmterr: /* Format error. */
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
exit(1);
}
@@ -696,16 +814,16 @@ fmterr:
* ------------------------------------------------------------------------- */
/* Delegate writing an object to writing a bulk string or bulk long long.
- * This is not placed in rio.c since that adds the redis.h dependency. */
+ * This is not placed in rio.c since that adds the server.h dependency. */
int rioWriteBulkObject(rio *r, robj *obj) {
/* Avoid using getDecodedObject to help copy-on-write (we are often
* in a child process when this function is called). */
- if (obj->encoding == REDIS_ENCODING_INT) {
+ if (obj->encoding == OBJ_ENCODING_INT) {
return rioWriteBulkLongLong(r,(long)obj->ptr);
} else if (sdsEncodedObject(obj)) {
return rioWriteBulkString(r,obj->ptr,sdslen(obj->ptr));
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
@@ -714,54 +832,31 @@ int rioWriteBulkObject(rio *r, robj *obj) {
int rewriteListObject(rio *r, robj *key, robj *o) {
long long count = 0, items = listTypeLength(o);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = o->ptr;
- unsigned char *p = ziplistIndex(zl,0);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *list = o->ptr;
+ quicklistIter *li = quicklistGetIterator(list, AL_START_HEAD);
+ quicklistEntry entry;
- while(ziplistGet(p,&vstr,&vlen,&vlong)) {
+ while (quicklistNext(li,&entry)) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
-
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (vstr) {
- if (rioWriteBulkString(r,(char*)vstr,vlen) == 0) return 0;
- } else {
- if (rioWriteBulkLongLong(r,vlong) == 0) return 0;
- }
- p = ziplistNext(zl,p);
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listNode *ln;
- listIter li;
-
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
-
- if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
- if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
- if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
- if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (entry.value) {
+ if (rioWriteBulkString(r,(char*)entry.value,entry.sz) == 0) return 0;
+ } else {
+ if (rioWriteBulkLongLong(r,entry.longval) == 0) return 0;
}
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
+ quicklistReleaseIterator(li);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return 1;
}
@@ -771,44 +866,44 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
int rewriteSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = setTypeSize(o);
- if (o->encoding == REDIS_ENCODING_INTSET) {
+ if (o->encoding == OBJ_ENCODING_INTSET) {
int ii = 0;
int64_t llval;
while(intsetGet(o->ptr,ii++,&llval)) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"SADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
if (rioWriteBulkLongLong(r,llval) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
- } else if (o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
dictIterator *di = dictGetIterator(o->ptr);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds ele = dictGetKey(de);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"SADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteBulkString(r,ele,sdslen(ele)) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 1;
}
@@ -818,7 +913,7 @@ int rewriteSetObject(rio *r, robj *key, robj *o) {
int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = zsetLength(o);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = o->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -827,17 +922,17 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
double score;
eptr = ziplistIndex(zl,0);
- redisAssert(eptr != NULL);
+ serverAssert(eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
while (eptr != NULL) {
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
score = zzlGetScore(sptr);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"ZADD",4) == 0) return 0;
@@ -850,34 +945,34 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
if (rioWriteBulkLongLong(r,vll) == 0) return 0;
}
zzlNext(zl,&eptr,&sptr);
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
dictIterator *di = dictGetIterator(zs->dict);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds ele = dictGetKey(de);
double *score = dictGetVal(de);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"ZADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
if (rioWriteBulkDouble(r,*score) == 0) return 0;
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteBulkString(r,ele,sdslen(ele)) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted zset encoding");
+ serverPanic("Unknown sorted zset encoding");
}
return 1;
}
@@ -885,30 +980,26 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
/* Write either the key or the value of the currently selected item of a hash.
* The 'hi' argument passes a valid Redis hash iterator.
* The 'what' filed specifies if to write a key or a value and can be
- * either REDIS_HASH_KEY or REDIS_HASH_VALUE.
+ * either OBJ_HASH_KEY or OBJ_HASH_VALUE.
*
* The function returns 0 on error, non-zero on success. */
static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
+ if (vstr)
return rioWriteBulkString(r, (char*)vstr, vlen);
- } else {
+ else
return rioWriteBulkLongLong(r, vll);
- }
-
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- hashTypeCurrentFromHashTable(hi, what, &value);
- return rioWriteBulkObject(r, value);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ return rioWriteBulkString(r, value, sdslen(value));
}
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
return 0;
}
@@ -919,19 +1010,19 @@ int rewriteHashObject(rio *r, robj *key, robj *o) {
long long count = 0, items = hashTypeLength(o);
hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
+ while (hashTypeNext(hi) != C_ERR) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"HMSET",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_KEY) == 0) return 0;
- if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_VALUE) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY) == 0) return 0;
+ if (rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
@@ -940,11 +1031,27 @@ int rewriteHashObject(rio *r, robj *key, robj *o) {
return 1;
}
+/* Call the module type callback in order to rewrite a data type
+ * that is exported by a module and is not handled by Redis itself.
+ * The function returns 0 on error, 1 on success. */
+int rewriteModuleObject(rio *r, robj *key, robj *o) {
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitIOContext(io,mt,r);
+ mt->aof_rewrite(&io,key,mv->value);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? 0 : 1;
+}
+
/* This function is called by the child rewriting the AOF file to read
* the difference accumulated from the parent into a buffer, that is
* concatenated at the end of the rewrite. */
ssize_t aofReadDiffFromParent(void) {
- char buf[65536]; /* Default pipe buffer size on most Linux sytems. */
+ char buf[65536]; /* Default pipe buffer size on most Linux systems. */
ssize_t nread, total = 0;
while ((nread =
@@ -955,51 +1062,23 @@ ssize_t aofReadDiffFromParent(void) {
return total;
}
-/* Write a sequence of commands able to fully rebuild the dataset into
- * "filename". Used both by REWRITEAOF and BGREWRITEAOF.
- *
- * In order to minimize the number of commands needed in the rewritten
- * log Redis uses variadic commands when possible, such as RPUSH, SADD
- * and ZADD. However at max REDIS_AOF_REWRITE_ITEMS_PER_CMD items per time
- * are inserted using a single command. */
-int rewriteAppendOnlyFile(char *filename) {
+int rewriteAppendOnlyFileRio(rio *aof) {
dictIterator *di = NULL;
dictEntry *de;
- rio aof;
- FILE *fp;
- char tmpfile[256];
- int j;
- long long now = mstime();
- char byte;
size_t processed = 0;
+ long long now = mstime();
+ int j;
- /* Note that we have to use a different temp name here compared to the
- * one used by rewriteAppendOnlyFileBackground() function. */
- snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
- fp = fopen(tmpfile,"w");
- if (!fp) {
- redisLog(REDIS_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno));
- return REDIS_ERR;
- }
-
- server.aof_child_diff = sdsempty();
- rioInitWithFile(&aof,fp);
- if (server.aof_rewrite_incremental_fsync)
- rioSetAutoSync(&aof,REDIS_AOF_AUTOSYNC_BYTES);
for (j = 0; j < server.dbnum; j++) {
char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
- if (!di) {
- fclose(fp);
- return REDIS_ERR;
- }
/* SELECT the new DB */
- if (rioWrite(&aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
- if (rioWriteBulkLongLong(&aof,j) == 0) goto werr;
+ if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,j) == 0) goto werr;
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
@@ -1017,38 +1096,85 @@ int rewriteAppendOnlyFile(char *filename) {
if (expiretime != -1 && expiretime < now) continue;
/* Save the key and associated value */
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
/* Emit a SET command */
char cmd[]="*3\r\n$3\r\nSET\r\n";
- if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
/* Key and value */
- if (rioWriteBulkObject(&aof,&key) == 0) goto werr;
- if (rioWriteBulkObject(&aof,o) == 0) goto werr;
- } else if (o->type == REDIS_LIST) {
- if (rewriteListObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_SET) {
- if (rewriteSetObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_ZSET) {
- if (rewriteSortedSetObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_HASH) {
- if (rewriteHashObject(&aof,&key,o) == 0) goto werr;
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkObject(aof,o) == 0) goto werr;
+ } else if (o->type == OBJ_LIST) {
+ if (rewriteListObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_SET) {
+ if (rewriteSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_ZSET) {
+ if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_HASH) {
+ if (rewriteHashObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_MODULE) {
+ if (rewriteModuleObject(aof,&key,o) == 0) goto werr;
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
/* Save the expire time */
if (expiretime != -1) {
char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n";
- if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr;
- if (rioWriteBulkObject(&aof,&key) == 0) goto werr;
- if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr;
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr;
}
/* Read some diff from the parent process from time to time. */
- if (aof.processed_bytes > processed+1024*10) {
- processed = aof.processed_bytes;
+ if (aof->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) {
+ processed = aof->processed_bytes;
aofReadDiffFromParent();
}
}
dictReleaseIterator(di);
+ di = NULL;
+ }
+ return C_OK;
+
+werr:
+ if (di) dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* Write a sequence of commands able to fully rebuild the dataset into
+ * "filename". Used both by REWRITEAOF and BGREWRITEAOF.
+ *
+ * In order to minimize the number of commands needed in the rewritten
+ * log Redis uses variadic commands when possible, such as RPUSH, SADD
+ * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time
+ * are inserted using a single command. */
+int rewriteAppendOnlyFile(char *filename) {
+ rio aof;
+ FILE *fp;
+ char tmpfile[256];
+ char byte;
+
+ /* Note that we have to use a different temp name here compared to the
+ * one used by rewriteAppendOnlyFileBackground() function. */
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno));
+ return C_ERR;
+ }
+
+ server.aof_child_diff = sdsempty();
+ rioInitWithFile(&aof,fp);
+
+ if (server.aof_rewrite_incremental_fsync)
+ rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES);
+
+ if (server.aof_use_rdb_preamble) {
+ int error;
+ if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE,NULL) == C_ERR) {
+ errno = error;
+ goto werr;
+ }
+ } else {
+ if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr;
}
/* Do an initial slow fsync here while the parent is still sending
@@ -1058,7 +1184,7 @@ int rewriteAppendOnlyFile(char *filename) {
/* Read again a few times to get more data from the parent.
* We can't read forever (the server may receive data from clients
- * fater than it is able to send data to the child), so we try to read
+ * faster than it is able to send data to the child), so we try to read
* some more data in a loop as soon as there is a good chance more data
* will come. If it looks like we are wasting time, we abort (this
* happens after 20 ms without new data). */
@@ -1084,13 +1210,13 @@ int rewriteAppendOnlyFile(char *filename) {
* the child will eventually get terminated. */
if (syncRead(server.aof_pipe_read_ack_from_parent,&byte,1,5000) != 1 ||
byte != '!') goto werr;
- redisLog(REDIS_NOTICE,"Parent agreed to stop sending diffs. Finalizing AOF...");
+ serverLog(LL_NOTICE,"Parent agreed to stop sending diffs. Finalizing AOF...");
/* Read the final diff if any. */
aofReadDiffFromParent();
/* Write the received diff to the file. */
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Concatenating %.2f MB of AOF diff received from parent.",
(double) sdslen(server.aof_child_diff) / (1024*1024));
if (rioWrite(&aof,server.aof_child_diff,sdslen(server.aof_child_diff)) == 0)
@@ -1104,19 +1230,18 @@ int rewriteAppendOnlyFile(char *filename) {
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
unlink(tmpfile);
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
- return REDIS_OK;
+ serverLog(LL_NOTICE,"SYNC append only file rewrite performed");
+ return C_OK;
werr:
+ serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
- if (di) dictReleaseIterator(di);
- return REDIS_ERR;
+ return C_ERR;
}
/* ----------------------------------------------------------------------------
@@ -1128,16 +1253,19 @@ werr:
* parent sends a '!' as well to acknowledge. */
void aofChildPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
char byte;
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
if (read(fd,&byte,1) == 1 && byte == '!') {
- redisLog(REDIS_NOTICE,"AOF rewrite child asks to stop sending diffs.");
+ serverLog(LL_NOTICE,"AOF rewrite child asks to stop sending diffs.");
server.aof_stop_sending_diff = 1;
if (write(server.aof_pipe_write_ack_to_child,"!",1) != 1) {
/* If we can't send the ack, inform the user, but don't try again
* since in the other side the children will use a timeout if the
* kernel can't buffer our write, or, the children was
* terminated. */
- redisLog(REDIS_WARNING,"Can't send ACK to AOF child: %s",
+ serverLog(LL_WARNING,"Can't send ACK to AOF child: %s",
strerror(errno));
}
}
@@ -1170,13 +1298,13 @@ int aofCreatePipes(void) {
server.aof_pipe_write_ack_to_child = fds[5];
server.aof_pipe_read_ack_from_parent = fds[4];
server.aof_stop_sending_diff = 0;
- return REDIS_OK;
+ return C_OK;
error:
- redisLog(REDIS_WARNING,"Error opening /setting AOF rewrite IPC pipes: %s",
+ serverLog(LL_WARNING,"Error opening /setting AOF rewrite IPC pipes: %s",
strerror(errno));
for (j = 0; j < 6; j++) if(fds[j] != -1) close(fds[j]);
- return REDIS_ERR;
+ return C_ERR;
}
void aofClosePipes(void) {
@@ -1191,7 +1319,7 @@ void aofClosePipes(void) {
}
/* ----------------------------------------------------------------------------
- * AOF backgorund rewrite
+ * AOF background rewrite
* ------------------------------------------------------------------------- */
/* This is how rewriting of the append only file in background works:
@@ -1210,8 +1338,9 @@ int rewriteAppendOnlyFileBackground(void) {
pid_t childpid;
long long start;
- if (server.aof_child_pid != -1) return REDIS_ERR;
- if (aofCreatePipes() != REDIS_OK) return REDIS_ERR;
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
+ if (aofCreatePipes() != C_OK) return C_ERR;
+ openChildInfoPipe();
start = ustime();
if ((childpid = fork()) == 0) {
char tmpfile[256];
@@ -1220,14 +1349,17 @@ int rewriteAppendOnlyFileBackground(void) {
closeListeningSockets(0);
redisSetProcTitle("redis-aof-rewrite");
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
- if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
- size_t private_dirty = zmalloc_get_private_dirty();
+ if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"AOF rewrite: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
+
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_AOF);
exitFromChild(0);
} else {
exitFromChild(1);
@@ -1238,12 +1370,14 @@ int rewriteAppendOnlyFileBackground(void) {
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
- redisLog(REDIS_WARNING,
+ closeChildInfoPipe();
+ serverLog(LL_WARNING,
"Can't rewrite append only file in background: fork: %s",
strerror(errno));
- return REDIS_ERR;
+ aofClosePipes();
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background append only file rewriting started by pid %d",childpid);
server.aof_rewrite_scheduled = 0;
server.aof_rewrite_time_start = time(NULL);
@@ -1255,18 +1389,18 @@ int rewriteAppendOnlyFileBackground(void) {
* with a SELECT statement and it will be safe to merge. */
server.aof_selected_db = -1;
replicationScriptCacheFlush();
- return REDIS_OK;
+ return C_OK;
}
- return REDIS_OK; /* unreached */
+ return C_OK; /* unreached */
}
-void bgrewriteaofCommand(redisClient *c) {
+void bgrewriteaofCommand(client *c) {
if (server.aof_child_pid != -1) {
addReplyError(c,"Background append only file rewriting already in progress");
} else if (server.rdb_child_pid != -1) {
server.aof_rewrite_scheduled = 1;
addReplyStatus(c,"Background append only file rewriting scheduled");
- } else if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
+ } else if (rewriteAppendOnlyFileBackground() == C_OK) {
addReplyStatus(c,"Background append only file rewriting started");
} else {
addReply(c,shared.err);
@@ -1280,7 +1414,7 @@ void aofRemoveTempFile(pid_t childpid) {
unlink(tmpfile);
}
-/* Update the server.aof_current_size filed explicitly using stat(2)
+/* Update the server.aof_current_size field explicitly using stat(2)
* to check the size of the file. This is useful after a rewrite or after
* a restart, normally the size is updated just adding the write length
* to the current length, that is much faster. */
@@ -1290,7 +1424,7 @@ void aofUpdateCurrentSize(void) {
latencyStartMonitor(latency);
if (redis_fstat(server.aof_fd,&sb) == -1) {
- redisLog(REDIS_WARNING,"Unable to obtain the AOF file length. stat: %s",
+ serverLog(LL_WARNING,"Unable to obtain the AOF file length. stat: %s",
strerror(errno));
} else {
server.aof_current_size = sb.st_size;
@@ -1308,7 +1442,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
long long now = ustime();
mstime_t latency;
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background AOF rewrite terminated with success");
/* Flush the differences accumulated by the parent to the
@@ -1318,13 +1452,13 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
(int)server.aof_child_pid);
newfd = open(tmpfile,O_WRONLY|O_APPEND);
if (newfd == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unable to open the temporary AOF produced by the child: %s", strerror(errno));
goto cleanup;
}
if (aofRewriteBufferWrite(newfd) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Error trying to flush the parent diff to the rewritten AOF: %s", strerror(errno));
close(newfd);
goto cleanup;
@@ -1332,8 +1466,8 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency);
- redisLog(REDIS_NOTICE,
- "Redidual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024));
+ serverLog(LL_NOTICE,
+ "Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024));
/* The only remaining thing to do is to rename the temporary file to
* the configured file and switch the file descriptor used to do AOF
@@ -1378,8 +1512,11 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
* it exists, because we reference it with "oldfd". */
latencyStartMonitor(latency);
if (rename(tmpfile,server.aof_filename) == -1) {
- redisLog(REDIS_WARNING,
- "Error trying to rename the temporary AOF file: %s", strerror(errno));
+ serverLog(LL_WARNING,
+ "Error trying to rename the temporary AOF file %s into %s: %s",
+ tmpfile,
+ server.aof_filename,
+ strerror(errno));
close(newfd);
if (oldfd != -1) close(oldfd);
goto cleanup;
@@ -1409,27 +1546,29 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
server.aof_buf = sdsempty();
}
- server.aof_lastbgrewrite_status = REDIS_OK;
+ server.aof_lastbgrewrite_status = C_OK;
- redisLog(REDIS_NOTICE, "Background AOF rewrite finished successfully");
+ serverLog(LL_NOTICE, "Background AOF rewrite finished successfully");
/* Change state from WAIT_REWRITE to ON if needed */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE)
- server.aof_state = REDIS_AOF_ON;
+ if (server.aof_state == AOF_WAIT_REWRITE)
+ server.aof_state = AOF_ON;
/* Asynchronously close the overwritten AOF. */
- if (oldfd != -1) bioCreateBackgroundJob(REDIS_BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL);
+ if (oldfd != -1) bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL);
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Background AOF rewrite signal handler took %lldus", ustime()-now);
} else if (!bysignal && exitcode != 0) {
- server.aof_lastbgrewrite_status = REDIS_ERR;
-
- redisLog(REDIS_WARNING,
+ /* SIGUSR1 is whitelisted, so we have a way to kill a child without
+ * tirggering an error conditon. */
+ if (bysignal != SIGUSR1)
+ server.aof_lastbgrewrite_status = C_ERR;
+ serverLog(LL_WARNING,
"Background AOF rewrite terminated with error");
} else {
- server.aof_lastbgrewrite_status = REDIS_ERR;
+ server.aof_lastbgrewrite_status = C_ERR;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Background AOF rewrite terminated by signal %d", bysignal);
}
@@ -1441,6 +1580,6 @@ cleanup:
server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start;
server.aof_rewrite_time_start = -1;
/* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE)
+ if (server.aof_state == AOF_WAIT_REWRITE)
server.aof_rewrite_scheduled = 1;
}
diff --git a/src/atomicvar.h b/src/atomicvar.h
new file mode 100644
index 000000000..84a5bbc5c
--- /dev/null
+++ b/src/atomicvar.h
@@ -0,0 +1,133 @@
+/* This file implements atomic counters using __atomic or __sync macros if
+ * available, otherwise synchronizing different threads using a mutex.
+ *
+ * The exported interaface is composed of three macros:
+ *
+ * atomicIncr(var,count) -- Increment the atomic counter
+ * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter
+ * atomicDecr(var,count) -- Decrement the atomic counter
+ * atomicGet(var,dstvar) -- Fetch the atomic counter value
+ * atomicSet(var,value) -- Set the atomic counter value
+ *
+ * The variable 'var' should also have a declared mutex with the same
+ * name and the "_mutex" postfix, for instance:
+ *
+ * long myvar;
+ * pthread_mutex_t myvar_mutex;
+ * atomicSet(myvar,12345);
+ *
+ * If atomic primitives are availble (tested in config.h) the mutex
+ * is not used.
+ *
+ * Never use return value from the macros, instead use the AtomicGetIncr()
+ * if you need to get the current value and increment it atomically, like
+ * in the followign example:
+ *
+ * long oldvalue;
+ * atomicGetIncr(myvar,oldvalue,1);
+ * doSomethingWith(oldvalue);
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <pthread.h>
+
+#ifndef __ATOMIC_VAR_H
+#define __ATOMIC_VAR_H
+
+/* To test Redis with Helgrind (a Valgrind tool) it is useful to define
+ * the following macro, so that __sync macros are used: those can be detected
+ * by Helgrind (even if they are less efficient) so that no false positive
+ * is reported. */
+// #define __ATOMIC_VAR_FORCE_SYNC_MACROS
+
+#if !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057)
+/* Implementation using __atomic macros. */
+
+#define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \
+} while(0)
+#define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGet(var,dstvar) do { \
+ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \
+} while(0)
+#define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED)
+#define REDIS_ATOMIC_API "atomic-builtin"
+
+#elif defined(HAVE_ATOMIC)
+/* Implementation using __sync macros. */
+
+#define atomicIncr(var,count) __sync_add_and_fetch(&var,(count))
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __sync_fetch_and_add(&var,(count)); \
+} while(0)
+#define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count))
+#define atomicGet(var,dstvar) do { \
+ dstvar = __sync_sub_and_fetch(&var,0); \
+} while(0)
+#define atomicSet(var,value) do { \
+ while(!__sync_bool_compare_and_swap(&var,var,value)); \
+} while(0)
+#define REDIS_ATOMIC_API "sync-builtin"
+
+#else
+/* Implementation using pthread mutex. */
+
+#define atomicIncr(var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var += (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ oldvalue_var = var; \
+ var += (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicDecr(var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var -= (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicGet(var,dstvar) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ dstvar = var; \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicSet(var,value) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var = value; \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define REDIS_ATOMIC_API "pthread-mutex"
+
+#endif
+#endif /* __ATOMIC_VAR_H */
diff --git a/src/bio.c b/src/bio.c
index 4bd5a17c6..da11f7b86 100644
--- a/src/bio.c
+++ b/src/bio.c
@@ -58,20 +58,21 @@
*/
-#include "redis.h"
+#include "server.h"
#include "bio.h"
-static pthread_t bio_threads[REDIS_BIO_NUM_OPS];
-static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS];
-static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS];
-static list *bio_jobs[REDIS_BIO_NUM_OPS];
+static pthread_t bio_threads[BIO_NUM_OPS];
+static pthread_mutex_t bio_mutex[BIO_NUM_OPS];
+static pthread_cond_t bio_newjob_cond[BIO_NUM_OPS];
+static pthread_cond_t bio_step_cond[BIO_NUM_OPS];
+static list *bio_jobs[BIO_NUM_OPS];
/* The following array is used to hold the number of pending jobs for every
* OP type. This allows us to export the bioPendingJobsOfType() API that is
* useful when the main thread wants to perform some operation that may involve
* objects shared with the background thread. The main thread will just wait
* that there are no longer jobs of this type to be executed before performing
* the sensible operation. This data is also useful for reporting. */
-static unsigned long long bio_pending[REDIS_BIO_NUM_OPS];
+static unsigned long long bio_pending[BIO_NUM_OPS];
/* This structure represents a background Job. It is only used locally to this
* file as the API does not expose the internals at all. */
@@ -83,6 +84,9 @@ struct bio_job {
};
void *bioProcessBackgroundJobs(void *arg);
+void lazyfreeFreeObjectFromBioThread(robj *o);
+void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2);
+void lazyfreeFreeSlotsMapFromBioThread(zskiplist *sl);
/* Make sure we have enough stack to perform all the things we do in the
* main thread. */
@@ -96,9 +100,10 @@ void bioInit(void) {
int j;
/* Initialization of state vars and objects */
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
pthread_mutex_init(&bio_mutex[j],NULL);
- pthread_cond_init(&bio_condvar[j],NULL);
+ pthread_cond_init(&bio_newjob_cond[j],NULL);
+ pthread_cond_init(&bio_step_cond[j],NULL);
bio_jobs[j] = listCreate();
bio_pending[j] = 0;
}
@@ -113,10 +118,10 @@ void bioInit(void) {
/* Ready to spawn our threads. We use the single argument the thread
* function accepts in order to pass the job ID the thread is
* responsible of. */
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
void *arg = (void*)(unsigned long) j;
if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
- redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs.");
+ serverLog(LL_WARNING,"Fatal: Can't initialize Background Jobs.");
exit(1);
}
bio_threads[j] = thread;
@@ -133,7 +138,7 @@ void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
pthread_mutex_lock(&bio_mutex[type]);
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
- pthread_cond_signal(&bio_condvar[type]);
+ pthread_cond_signal(&bio_newjob_cond[type]);
pthread_mutex_unlock(&bio_mutex[type]);
}
@@ -142,6 +147,13 @@ void *bioProcessBackgroundJobs(void *arg) {
unsigned long type = (unsigned long) arg;
sigset_t sigset;
+ /* Check that the type is within the right interval. */
+ if (type >= BIO_NUM_OPS) {
+ serverLog(LL_WARNING,
+ "Warning: bio thread started with wrong type %lu",type);
+ return NULL;
+ }
+
/* Make the thread killable at any time, so that bioKillThreads()
* can work reliably. */
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
@@ -153,7 +165,7 @@ void *bioProcessBackgroundJobs(void *arg) {
sigemptyset(&sigset);
sigaddset(&sigset, SIGALRM);
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
while(1) {
@@ -161,7 +173,7 @@ void *bioProcessBackgroundJobs(void *arg) {
/* The loop always starts with the lock hold. */
if (listLength(bio_jobs[type]) == 0) {
- pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]);
+ pthread_cond_wait(&bio_newjob_cond[type],&bio_mutex[type]);
continue;
}
/* Pop the job from the queue. */
@@ -172,15 +184,29 @@ void *bioProcessBackgroundJobs(void *arg) {
pthread_mutex_unlock(&bio_mutex[type]);
/* Process the job accordingly to its type. */
- if (type == REDIS_BIO_CLOSE_FILE) {
+ if (type == BIO_CLOSE_FILE) {
close((long)job->arg1);
- } else if (type == REDIS_BIO_AOF_FSYNC) {
+ } else if (type == BIO_AOF_FSYNC) {
aof_fsync((long)job->arg1);
+ } else if (type == BIO_LAZY_FREE) {
+ /* What we free changes depending on what arguments are set:
+ * arg1 -> free the object at pointer.
+ * arg2 & arg3 -> free two dictionaries (a Redis DB).
+ * only arg3 -> free the skiplist. */
+ if (job->arg1)
+ lazyfreeFreeObjectFromBioThread(job->arg1);
+ else if (job->arg2 && job->arg3)
+ lazyfreeFreeDatabaseFromBioThread(job->arg2,job->arg3);
+ else if (job->arg3)
+ lazyfreeFreeSlotsMapFromBioThread(job->arg3);
} else {
- redisPanic("Wrong job type in bioProcessBackgroundJobs().");
+ serverPanic("Wrong job type in bioProcessBackgroundJobs().");
}
zfree(job);
+ /* Unblock threads blocked on bioWaitStepOfType() if any. */
+ pthread_cond_broadcast(&bio_step_cond[type]);
+
/* Lock again before reiterating the loop, if there are no longer
* jobs to process we'll block again in pthread_cond_wait(). */
pthread_mutex_lock(&bio_mutex[type]);
@@ -198,6 +224,28 @@ unsigned long long bioPendingJobsOfType(int type) {
return val;
}
+/* If there are pending jobs for the specified type, the function blocks
+ * and waits that the next job was processed. Otherwise the function
+ * does not block and returns ASAP.
+ *
+ * The function returns the number of jobs still to process of the
+ * requested type.
+ *
+ * This function is useful when from another thread, we want to wait
+ * a bio.c thread to do more work in a blocking way.
+ */
+unsigned long long bioWaitStepOfType(int type) {
+ unsigned long long val;
+ pthread_mutex_lock(&bio_mutex[type]);
+ val = bio_pending[type];
+ if (val != 0) {
+ pthread_cond_wait(&bio_step_cond[type],&bio_mutex[type]);
+ val = bio_pending[type];
+ }
+ pthread_mutex_unlock(&bio_mutex[type]);
+ return val;
+}
+
/* Kill the running bio threads in an unclean way. This function should be
* used only when it's critical to stop the threads for some reason.
* Currently Redis does this only on crash (for instance on SIGSEGV) in order
@@ -205,14 +253,14 @@ unsigned long long bioPendingJobsOfType(int type) {
void bioKillThreads(void) {
int err, j;
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
if (pthread_cancel(bio_threads[j]) == 0) {
if ((err = pthread_join(bio_threads[j],NULL)) != 0) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bio thread for job type #%d can be joined: %s",
j, strerror(err));
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bio thread for job type #%d terminated",j);
}
}
diff --git a/src/bio.h b/src/bio.h
index 85f03ad1a..4b15d1c4d 100644
--- a/src/bio.h
+++ b/src/bio.h
@@ -31,11 +31,12 @@
void bioInit(void);
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3);
unsigned long long bioPendingJobsOfType(int type);
-void bioWaitPendingJobsLE(int type, unsigned long long num);
+unsigned long long bioWaitStepOfType(int type);
time_t bioOlderJobOfType(int type);
void bioKillThreads(void);
/* Background job opcodes */
-#define REDIS_BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
-#define REDIS_BIO_AOF_FSYNC 1 /* Deferred AOF fsync. */
-#define REDIS_BIO_NUM_OPS 2
+#define BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
+#define BIO_AOF_FSYNC 1 /* Deferred AOF fsync. */
+#define BIO_LAZY_FREE 2 /* Deferred objects freeing. */
+#define BIO_NUM_OPS 3
diff --git a/src/bitops.c b/src/bitops.c
index 28f772430..43450fca3 100644
--- a/src/bitops.c
+++ b/src/bitops.c
@@ -28,33 +28,12 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* -----------------------------------------------------------------------------
* Helpers and low level bit functions.
* -------------------------------------------------------------------------- */
-/* This helper function used by GETBIT / SETBIT parses the bit offset argument
- * making sure an error is returned if it is negative or if it overflows
- * Redis 512 MB limit for the string value. */
-static int getBitOffsetFromArgument(redisClient *c, robj *o, size_t *offset) {
- long long loffset;
- char *err = "bit offset is not an integer or out of range";
-
- if (getLongLongFromObjectOrReply(c,o,&loffset,err) != REDIS_OK)
- return REDIS_ERR;
-
- /* Limit offset to 512MB in bytes */
- if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
- {
- addReplyError(c,err);
- return REDIS_ERR;
- }
-
- *offset = (size_t)loffset;
- return REDIS_OK;
-}
-
/* Count number of bits set in the binary array pointed by 's' and long
* 'count' bytes. The implementation of this function is required to
* work with a input string length up to 512 MB. */
@@ -70,16 +49,19 @@ size_t redisPopcount(void *s, long count) {
count--;
}
- /* Count bits 16 bytes at a time */
+ /* Count bits 28 bytes at a time */
p4 = (uint32_t*)p;
- while(count>=16) {
- uint32_t aux1, aux2, aux3, aux4;
+ while(count>=28) {
+ uint32_t aux1, aux2, aux3, aux4, aux5, aux6, aux7;
aux1 = *p4++;
aux2 = *p4++;
aux3 = *p4++;
aux4 = *p4++;
- count -= 16;
+ aux5 = *p4++;
+ aux6 = *p4++;
+ aux7 = *p4++;
+ count -= 28;
aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
@@ -89,10 +71,19 @@ size_t redisPopcount(void *s, long count) {
aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
- bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
- ((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
+ aux5 = aux5 - ((aux5 >> 1) & 0x55555555);
+ aux5 = (aux5 & 0x33333333) + ((aux5 >> 2) & 0x33333333);
+ aux6 = aux6 - ((aux6 >> 1) & 0x55555555);
+ aux6 = (aux6 & 0x33333333) + ((aux6 >> 2) & 0x33333333);
+ aux7 = aux7 - ((aux7 >> 1) & 0x55555555);
+ aux7 = (aux7 & 0x33333333) + ((aux7 >> 2) & 0x33333333);
+ bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) +
+ ((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) +
+ ((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) +
+ ((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) +
+ ((aux5 + (aux5 >> 4)) & 0x0F0F0F0F) +
+ ((aux6 + (aux6 >> 4)) & 0x0F0F0F0F) +
+ ((aux7 + (aux7 >> 4)) & 0x0F0F0F0F))* 0x01010101) >> 24;
}
/* Count the remaining bytes. */
p = (unsigned char*)p4;
@@ -107,12 +98,13 @@ size_t redisPopcount(void *s, long count) {
* no zero bit is found, it returns count*8 assuming the string is zero
* padded on the right. However if 'bit' is 1 it is possible that there is
* not a single set bit in the bitmap. In this special case -1 is returned. */
-long redisBitpos(void *s, long count, int bit) {
+long redisBitpos(void *s, unsigned long count, int bit) {
unsigned long *l;
unsigned char *c;
unsigned long skipval, word = 0, one;
long pos = 0; /* Position of bit, to return to the caller. */
- int j;
+ unsigned long j;
+ int found;
/* Process whole words first, seeking for first word that is not
* all ones or all zeros respectively if we are lookig for zeros
@@ -126,21 +118,27 @@ long redisBitpos(void *s, long count, int bit) {
/* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */
skipval = bit ? 0 : UCHAR_MAX;
c = (unsigned char*) s;
+ found = 0;
while((unsigned long)c & (sizeof(*l)-1) && count) {
- if (*c != skipval) break;
+ if (*c != skipval) {
+ found = 1;
+ break;
+ }
c++;
count--;
pos += 8;
}
/* Skip bits with full word step. */
- skipval = bit ? 0 : ULONG_MAX;
l = (unsigned long*) c;
- while (count >= sizeof(*l)) {
- if (*l != skipval) break;
- l++;
- count -= sizeof(*l);
- pos += sizeof(*l)*8;
+ if (!found) {
+ skipval = bit ? 0 : ULONG_MAX;
+ while (count >= sizeof(*l)) {
+ if (*l != skipval) break;
+ l++;
+ count -= sizeof(*l);
+ pos += sizeof(*l)*8;
+ }
}
/* Load bytes into "word" considering the first byte as the most significant
@@ -183,10 +181,213 @@ long redisBitpos(void *s, long count, int bit) {
/* If we reached this point, there is a bug in the algorithm, since
* the case of no match is handled as a special case before. */
- redisPanic("End of redisBitpos() reached.");
+ serverPanic("End of redisBitpos() reached.");
return 0; /* Just to avoid warnings. */
}
+/* The following set.*Bitfield and get.*Bitfield functions implement setting
+ * and getting arbitrary size (up to 64 bits) signed and unsigned integers
+ * at arbitrary positions into a bitmap.
+ *
+ * The representation considers the bitmap as having the bit number 0 to be
+ * the most significant bit of the first byte, and so forth, so for example
+ * setting a 5 bits unsigned integer to value 23 at offset 7 into a bitmap
+ * previously set to all zeroes, will produce the following representation:
+ *
+ * +--------+--------+
+ * |00000001|01110000|
+ * +--------+--------+
+ *
+ * When offsets and integer sizes are aligned to bytes boundaries, this is the
+ * same as big endian, however when such alignment does not exist, its important
+ * to also understand how the bits inside a byte are ordered.
+ *
+ * Note that this format follows the same convention as SETBIT and related
+ * commands.
+ */
+
+void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value) {
+ uint64_t byte, bit, byteval, bitval, j;
+
+ for (j = 0; j < bits; j++) {
+ bitval = (value & ((uint64_t)1<<(bits-1-j))) != 0;
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ byteval &= ~(1 << bit);
+ byteval |= bitval << bit;
+ p[byte] = byteval & 0xff;
+ offset++;
+ }
+}
+
+void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value) {
+ uint64_t uv = value; /* Casting will add UINT64_MAX + 1 if v is negative. */
+ setUnsignedBitfield(p,offset,bits,uv);
+}
+
+uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ uint64_t byte, bit, byteval, bitval, j, value = 0;
+
+ for (j = 0; j < bits; j++) {
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ bitval = (byteval >> bit) & 1;
+ value = (value<<1) | bitval;
+ offset++;
+ }
+ return value;
+}
+
+int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ int64_t value;
+ union {uint64_t u; int64_t i;} conv;
+
+ /* Converting from unsigned to signed is undefined when the value does
+ * not fit, however here we assume two's complement and the original value
+ * was obtained from signed -> unsigned conversion, so we'll find the
+ * most significant bit set if the original value was negative.
+ *
+ * Note that two's complement is mandatory for exact-width types
+ * according to the C99 standard. */
+ conv.u = getUnsignedBitfield(p,offset,bits);
+ value = conv.i;
+
+ /* If the top significant bit is 1, propagate it to all the
+ * higher bits for two's complement representation of signed
+ * integers. */
+ if (value & ((uint64_t)1 << (bits-1)))
+ value |= ((uint64_t)-1) << bits;
+ return value;
+}
+
+/* The following two functions detect overflow of a value in the context
+ * of storing it as an unsigned or signed integer with the specified
+ * number of bits. The functions both take the value and a possible increment.
+ * If no overflow could happen and the value+increment fit inside the limits,
+ * then zero is returned, otherwise in case of overflow, 1 is returned,
+ * otherwise in case of underflow, -1 is returned.
+ *
+ * When non-zero is returned (oferflow or underflow), if not NULL, *limit is
+ * set to the value the operation should result when an overflow happens,
+ * depending on the specified overflow semantics:
+ *
+ * For BFOVERFLOW_SAT if 1 is returned, *limit it is set maximum value that
+ * you can store in that integer. when -1 is returned, *limit is set to the
+ * minimum value that an integer of that size can represent.
+ *
+ * For BFOVERFLOW_WRAP *limit is set by performing the operation in order to
+ * "wrap" around towards zero for unsigned integers, or towards the most
+ * negative number that is possible to represent for signed integers. */
+
+#define BFOVERFLOW_WRAP 0
+#define BFOVERFLOW_SAT 1
+#define BFOVERFLOW_FAIL 2 /* Used by the BITFIELD command implementation. */
+
+int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit) {
+ uint64_t max = (bits == 64) ? UINT64_MAX : (((uint64_t)1<<bits)-1);
+ int64_t maxincr = max-value;
+ int64_t minincr = -value;
+
+ if (value > max || (incr > 0 && incr > maxincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (incr < 0 && incr < minincr) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = 0;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ uint64_t res = value+incr;
+
+ res &= ~mask;
+ *limit = res;
+ }
+ return 1;
+}
+
+int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit) {
+ int64_t max = (bits == 64) ? INT64_MAX : (((int64_t)1<<(bits-1))-1);
+ int64_t min = (-max)-1;
+
+ /* Note that maxincr and minincr could overflow, but we use the values
+ * only after checking 'value' range, so when we use it no overflow
+ * happens. */
+ int64_t maxincr = max-value;
+ int64_t minincr = min-value;
+
+ if (value > max || (bits != 64 && incr > maxincr) || (value >= 0 && incr > 0 && incr > maxincr))
+ {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (value < min || (bits != 64 && incr < minincr) || (value < 0 && incr < 0 && incr < minincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = min;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ uint64_t msb = (uint64_t)1 << (bits-1);
+ uint64_t a = value, b = incr, c;
+ c = a+b; /* Perform addition as unsigned so that's defined. */
+
+ /* If the sign bit is set, propagate to all the higher order
+ * bits, to cap the negative value. If it's clear, mask to
+ * the positive integer limit. */
+ if (c & msb) {
+ c |= mask;
+ } else {
+ c &= ~mask;
+ }
+ *limit = c;
+ }
+ return 1;
+}
+
+/* Debugging function. Just show bits in the specified bitmap. Not used
+ * but here for not having to rewrite it when debugging is needed. */
+void printBits(unsigned char *p, unsigned long count) {
+ unsigned long j, i, byte;
+
+ for (j = 0; j < count; j++) {
+ byte = p[j];
+ for (i = 0x80; i > 0; i /= 2)
+ printf("%c", (byte & i) ? '1' : '0');
+ printf("|");
+ }
+ printf("\n");
+}
+
/* -----------------------------------------------------------------------------
* Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP.
* -------------------------------------------------------------------------- */
@@ -196,19 +397,143 @@ long redisBitpos(void *s, long count, int bit) {
#define BITOP_XOR 2
#define BITOP_NOT 3
+#define BITFIELDOP_GET 0
+#define BITFIELDOP_SET 1
+#define BITFIELDOP_INCRBY 2
+
+/* This helper function used by GETBIT / SETBIT parses the bit offset argument
+ * making sure an error is returned if it is negative or if it overflows
+ * Redis 512 MB limit for the string value.
+ *
+ * If the 'hash' argument is true, and 'bits is positive, then the command
+ * will also parse bit offsets prefixed by "#". In such a case the offset
+ * is multiplied by 'bits'. This is useful for the BITFIELD command. */
+int getBitOffsetFromArgument(client *c, robj *o, size_t *offset, int hash, int bits) {
+ long long loffset;
+ char *err = "bit offset is not an integer or out of range";
+ char *p = o->ptr;
+ size_t plen = sdslen(p);
+ int usehash = 0;
+
+ /* Handle #<offset> form. */
+ if (p[0] == '#' && hash && bits > 0) usehash = 1;
+
+ if (string2ll(p+usehash,plen-usehash,&loffset) == 0) {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ /* Adjust the offset by 'bits' for #<offset> form. */
+ if (usehash) loffset *= bits;
+
+ /* Limit offset to 512MB in bytes */
+ if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ *offset = (size_t)loffset;
+ return C_OK;
+}
+
+/* This helper function for BITFIELD parses a bitfield type in the form
+ * <sign><bits> where sign is 'u' or 'i' for unsigned and signed, and
+ * the bits is a value between 1 and 64. However 64 bits unsigned integers
+ * are reported as an error because of current limitations of Redis protocol
+ * to return unsigned integer values greater than INT64_MAX.
+ *
+ * On error C_ERR is returned and an error is sent to the client. */
+int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
+ char *p = o->ptr;
+ char *err = "Invalid bitfield type. Use something like i16 u8. Note that u64 is not supported but i64 is.";
+ long long llbits;
+
+ if (p[0] == 'i') {
+ *sign = 1;
+ } else if (p[0] == 'u') {
+ *sign = 0;
+ } else {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ if ((string2ll(p+1,strlen(p+1),&llbits)) == 0 ||
+ llbits < 1 ||
+ (*sign == 1 && llbits > 64) ||
+ (*sign == 0 && llbits > 63))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+ *bits = llbits;
+ return C_OK;
+}
+
+/* This is an helper function for commands implementations that need to write
+ * bits to a string object. The command creates or pad with zeroes the string
+ * so that the 'maxbit' bit can be addressed. The object is finally
+ * returned. Otherwise if the key holds a wrong type NULL is returned and
+ * an error is sent to the client. */
+robj *lookupStringForBitCommand(client *c, size_t maxbit) {
+ size_t byte = maxbit >> 3;
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+
+ if (o == NULL) {
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
+ dbAdd(c->db,c->argv[1],o);
+ } else {
+ if (checkType(c,o,OBJ_STRING)) return NULL;
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ o->ptr = sdsgrowzero(o->ptr,byte+1);
+ }
+ return o;
+}
+
+/* Return a pointer to the string object content, and stores its length
+ * in 'len'. The user is required to pass (likely stack allocated) buffer
+ * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case
+ * the object is integer encoded in order to provide the representation
+ * without usign heap allocation.
+ *
+ * The function returns the pointer to the object array of bytes representing
+ * the string it contains, that may be a pointer to 'llbuf' or to the
+ * internal object representation. As a side effect 'len' is filled with
+ * the length of such buffer.
+ *
+ * If the source object is NULL the function is guaranteed to return NULL
+ * and set 'len' to 0. */
+unsigned char *getObjectReadOnlyString(robj *o, long *len, char *llbuf) {
+ serverAssert(o->type == OBJ_STRING);
+ unsigned char *p = NULL;
+
+ /* Set the 'p' pointer to the string, that can be just a stack allocated
+ * array if our string was integer encoded. */
+ if (o && o->encoding == OBJ_ENCODING_INT) {
+ p = (unsigned char*) llbuf;
+ if (len) *len = ll2string(llbuf,LONG_STR_SIZE,(long)o->ptr);
+ } else if (o) {
+ p = (unsigned char*) o->ptr;
+ if (len) *len = sdslen(o->ptr);
+ } else {
+ if (len) *len = 0;
+ }
+ return p;
+}
+
/* SETBIT key offset bitvalue */
-void setbitCommand(redisClient *c) {
+void setbitCommand(client *c) {
robj *o;
char *err = "bit is not an integer or out of range";
size_t bitoffset;
- int byte, bit;
+ ssize_t byte, bit;
int byteval, bitval;
long on;
- if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
return;
- if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != C_OK)
return;
/* Bits can only be set or cleared... */
@@ -217,20 +542,10 @@ void setbitCommand(redisClient *c) {
return;
}
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (o == NULL) {
- o = createObject(REDIS_STRING,sdsempty());
- dbAdd(c->db,c->argv[1],o);
- } else {
- if (checkType(c,o,REDIS_STRING)) return;
- o = dbUnshareStringValue(c->db,c->argv[1],o);
- }
-
- /* Grow sds value to the right length if necessary */
- byte = bitoffset >> 3;
- o->ptr = sdsgrowzero(o->ptr,byte+1);
+ if ((o = lookupStringForBitCommand(c,bitoffset)) == NULL) return;
/* Get current values */
+ byte = bitoffset >> 3;
byteval = ((uint8_t*)o->ptr)[byte];
bit = 7 - (bitoffset & 0x7);
bitval = byteval & (1 << bit);
@@ -240,24 +555,24 @@ void setbitCommand(redisClient *c) {
byteval |= ((on & 0x1) << bit);
((uint8_t*)o->ptr)[byte] = byteval;
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty++;
addReply(c, bitval ? shared.cone : shared.czero);
}
/* GETBIT key offset */
-void getbitCommand(redisClient *c) {
+void getbitCommand(client *c) {
robj *o;
char llbuf[32];
size_t bitoffset;
size_t byte, bit;
size_t bitval = 0;
- if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
byte = bitoffset >> 3;
bit = 7 - (bitoffset & 0x7);
@@ -273,14 +588,15 @@ void getbitCommand(redisClient *c) {
}
/* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */
-void bitopCommand(redisClient *c) {
+void bitopCommand(client *c) {
char *opname = c->argv[1]->ptr;
robj *o, *targetkey = c->argv[2];
- long op, j, numkeys;
+ unsigned long op, j, numkeys;
robj **objects; /* Array of source objects. */
unsigned char **src; /* Array of source strings pointers. */
- long *len, maxlen = 0; /* Array of length of src strings, and max len. */
- long minlen = 0; /* Min len among the input keys. */
+ unsigned long *len, maxlen = 0; /* Array of length of src strings,
+ and max len. */
+ unsigned long minlen = 0; /* Min len among the input keys. */
unsigned char *res = NULL; /* Resulting string. */
/* Parse the operation name. */
@@ -319,10 +635,11 @@ void bitopCommand(redisClient *c) {
continue;
}
/* Return an error if one of the keys is not a string. */
- if (checkType(c,o,REDIS_STRING)) {
- for (j = j-1; j >= 0; j--) {
- if (objects[j])
- decrRefCount(objects[j]);
+ if (checkType(c,o,OBJ_STRING)) {
+ unsigned long i;
+ for (i = 0; i < j; i++) {
+ if (objects[i])
+ decrRefCount(objects[i]);
}
zfree(src);
zfree(len);
@@ -340,13 +657,16 @@ void bitopCommand(redisClient *c) {
if (maxlen) {
res = (unsigned char*) sdsnewlen(NULL,maxlen);
unsigned char output, byte;
- long i;
+ unsigned long i;
/* Fast path: as far as we have data for all the input bitmaps we
* can take a fast path that performs much better than the
- * vanilla algorithm. */
+ * vanilla algorithm. On ARM we skip the fast path since it will
+ * result in GCC compiling the code using multiple-words load/store
+ * operations that are not supported even in ARM >= v6. */
j = 0;
- if (minlen && numkeys <= 16) {
+ #ifndef USE_ALIGNED_ACCESS
+ if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) {
unsigned long *lp[16];
unsigned long *lres = (unsigned long*) res;
@@ -406,6 +726,7 @@ void bitopCommand(redisClient *c) {
}
}
}
+ #endif
/* j is set to the next byte to process by the previous loop. */
for (; j < maxlen; j++) {
@@ -432,46 +753,41 @@ void bitopCommand(redisClient *c) {
/* Store the computed value into the target key */
if (maxlen) {
- o = createObject(REDIS_STRING,res);
+ o = createObject(OBJ_STRING,res);
setKey(c->db,targetkey,o);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",targetkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id);
decrRefCount(o);
} else if (dbDelete(c->db,targetkey)) {
signalModifiedKey(c->db,targetkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",targetkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id);
}
server.dirty++;
addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */
}
/* BITCOUNT key [start end] */
-void bitcountCommand(redisClient *c) {
+void bitcountCommand(client *c) {
robj *o;
long start, end, strlen;
unsigned char *p;
- char llbuf[32];
+ char llbuf[LONG_STR_SIZE];
/* Lookup, check for type, and return 0 for non existing keys. */
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
-
- /* Set the 'p' pointer to the string, that can be just a stack allocated
- * array if our string was integer encoded. */
- if (o->encoding == REDIS_ENCODING_INT) {
- p = (unsigned char*) llbuf;
- strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
- } else {
- p = (unsigned char*) o->ptr;
- strlen = sdslen(o->ptr);
- }
+ checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4) {
- if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
- if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
/* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.czero);
+ return;
+ }
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
@@ -499,16 +815,16 @@ void bitcountCommand(redisClient *c) {
}
/* BITPOS key bit [start [end]] */
-void bitposCommand(redisClient *c) {
+void bitposCommand(client *c) {
robj *o;
long bit, start, end, strlen;
unsigned char *p;
- char llbuf[32];
+ char llbuf[LONG_STR_SIZE];
int end_given = 0;
/* Parse the bit argument to understand what we are looking for, set
* or clear bits. */
- if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != C_OK)
return;
if (bit != 0 && bit != 1) {
addReplyError(c, "The bit argument must be 1 or 0.");
@@ -522,24 +838,15 @@ void bitposCommand(redisClient *c) {
addReplyLongLong(c, bit ? -1 : 0);
return;
}
- if (checkType(c,o,REDIS_STRING)) return;
-
- /* Set the 'p' pointer to the string, that can be just a stack allocated
- * array if our string was integer encoded. */
- if (o->encoding == REDIS_ENCODING_INT) {
- p = (unsigned char*) llbuf;
- strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
- } else {
- p = (unsigned char*) o->ptr;
- strlen = sdslen(o->ptr);
- }
+ if (checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4 || c->argc == 5) {
- if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
return;
if (c->argc == 5) {
- if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
return;
end_given = 1;
} else {
@@ -584,3 +891,235 @@ void bitposCommand(redisClient *c) {
addReplyLongLong(c,pos);
}
}
+
+/* BITFIELD key subcommmand-1 arg ... subcommand-2 arg ... subcommand-N ...
+ *
+ * Supported subcommands:
+ *
+ * GET <type> <offset>
+ * SET <type> <offset> <value>
+ * INCRBY <type> <offset> <increment>
+ * OVERFLOW [WRAP|SAT|FAIL]
+ */
+
+struct bitfieldOp {
+ uint64_t offset; /* Bitfield offset. */
+ int64_t i64; /* Increment amount (INCRBY) or SET value */
+ int opcode; /* Operation id. */
+ int owtype; /* Overflow type to use. */
+ int bits; /* Integer bitfield bits width. */
+ int sign; /* True if signed, otherwise unsigned op. */
+};
+
+void bitfieldCommand(client *c) {
+ robj *o;
+ size_t bitoffset;
+ int j, numops = 0, changes = 0;
+ struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
+ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
+ int readonly = 1;
+ size_t higest_write_offset = 0;
+
+ for (j = 2; j < c->argc; j++) {
+ int remargs = c->argc-j-1; /* Remaining args other than current. */
+ char *subcmd = c->argv[j]->ptr; /* Current command name. */
+ int opcode; /* Current operation code. */
+ long long i64 = 0; /* Signed SET value. */
+ int sign = 0; /* Signed or unsigned type? */
+ int bits = 0; /* Bitfield width in bits. */
+
+ if (!strcasecmp(subcmd,"get") && remargs >= 2)
+ opcode = BITFIELDOP_GET;
+ else if (!strcasecmp(subcmd,"set") && remargs >= 3)
+ opcode = BITFIELDOP_SET;
+ else if (!strcasecmp(subcmd,"incrby") && remargs >= 3)
+ opcode = BITFIELDOP_INCRBY;
+ else if (!strcasecmp(subcmd,"overflow") && remargs >= 1) {
+ char *owtypename = c->argv[j+1]->ptr;
+ j++;
+ if (!strcasecmp(owtypename,"wrap"))
+ owtype = BFOVERFLOW_WRAP;
+ else if (!strcasecmp(owtypename,"sat"))
+ owtype = BFOVERFLOW_SAT;
+ else if (!strcasecmp(owtypename,"fail"))
+ owtype = BFOVERFLOW_FAIL;
+ else {
+ addReplyError(c,"Invalid OVERFLOW type specified");
+ zfree(ops);
+ return;
+ }
+ continue;
+ } else {
+ addReply(c,shared.syntaxerr);
+ zfree(ops);
+ return;
+ }
+
+ /* Get the type and offset arguments, common to all the ops. */
+ if (getBitfieldTypeFromArgument(c,c->argv[j+1],&sign,&bits) != C_OK) {
+ zfree(ops);
+ return;
+ }
+
+ if (getBitOffsetFromArgument(c,c->argv[j+2],&bitoffset,1,bits) != C_OK){
+ zfree(ops);
+ return;
+ }
+
+ if (opcode != BITFIELDOP_GET) {
+ readonly = 0;
+ if (higest_write_offset < bitoffset + bits - 1)
+ higest_write_offset = bitoffset + bits - 1;
+ /* INCRBY and SET require another argument. */
+ if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){
+ zfree(ops);
+ return;
+ }
+ }
+
+ /* Populate the array of operations we'll process. */
+ ops = zrealloc(ops,sizeof(*ops)*(numops+1));
+ ops[numops].offset = bitoffset;
+ ops[numops].i64 = i64;
+ ops[numops].opcode = opcode;
+ ops[numops].owtype = owtype;
+ ops[numops].bits = bits;
+ ops[numops].sign = sign;
+ numops++;
+
+ j += 3 - (opcode == BITFIELDOP_GET);
+ }
+
+ if (readonly) {
+ /* Lookup for read is ok if key doesn't exit, but errors
+ * if it's not a string. */
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ } else {
+ /* Lookup by making room up to the farest bit reached by
+ * this operation. */
+ if ((o = lookupStringForBitCommand(c,
+ higest_write_offset)) == NULL) return;
+ }
+
+ addReplyMultiBulkLen(c,numops);
+
+ /* Actually process the operations. */
+ for (j = 0; j < numops; j++) {
+ struct bitfieldOp *thisop = ops+j;
+
+ /* Execute the operation. */
+ if (thisop->opcode == BITFIELDOP_SET ||
+ thisop->opcode == BITFIELDOP_INCRBY)
+ {
+ /* SET and INCRBY: We handle both with the same code path
+ * for simplicity. SET return value is the previous value so
+ * we need fetch & store as well. */
+
+ /* We need two different but very similar code paths for signed
+ * and unsigned operations, since the set of functions to get/set
+ * the integers and the used variables types are different. */
+ if (thisop->sign) {
+ int64_t oldval, newval, wrapped, retval;
+ int overflow;
+
+ oldval = getSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ newval = oldval + thisop->i64;
+ overflow = checkSignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkSignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ } else {
+ uint64_t oldval, newval, wrapped, retval;
+ int overflow;
+
+ oldval = getUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ newval = oldval + thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ }
+ changes++;
+ } else {
+ /* GET */
+ unsigned char buf[9];
+ long strlen = 0;
+ unsigned char *src = NULL;
+ char llbuf[LONG_STR_SIZE];
+
+ if (o != NULL)
+ src = getObjectReadOnlyString(o,&strlen,llbuf);
+
+ /* For GET we use a trick: before executing the operation
+ * copy up to 9 bytes to a local buffer, so that we can easily
+ * execute up to 64 bit operations that are at actual string
+ * object boundaries. */
+ memset(buf,0,9);
+ int i;
+ size_t byte = thisop->offset >> 3;
+ for (i = 0; i < 9; i++) {
+ if (src == NULL || i+byte >= (size_t)strlen) break;
+ buf[i] = src[i+byte];
+ }
+
+ /* Now operate on the copied buffer which is guaranteed
+ * to be zero-padded. */
+ if (thisop->sign) {
+ int64_t val = getSignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ } else {
+ uint64_t val = getUnsignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ }
+ }
+ }
+
+ if (changes) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ server.dirty += changes;
+ }
+ zfree(ops);
+}
diff --git a/src/blocked.c b/src/blocked.c
index 4cd632bd3..54b26b713 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -34,17 +34,17 @@
* getTimeoutFromObjectOrReply() is just an utility function to parse a
* timeout argument since blocking operations usually require a timeout.
*
- * blockClient() set the REDIS_BLOCKED flag in the client, and set the
- * specified block type 'btype' filed to one of REDIS_BLOCKED_* macros.
+ * blockClient() set the CLIENT_BLOCKED flag in the client, and set the
+ * specified block type 'btype' filed to one of BLOCKED_* macros.
*
* unblockClient() unblocks the client doing the following:
* 1) It calls the btype-specific function to cleanup the state.
- * 2) It unblocks the client by unsetting the REDIS_BLOCKED flag.
+ * 2) It unblocks the client by unsetting the CLIENT_BLOCKED flag.
* 3) It puts the client into a list of just unblocked clients that are
* processed ASAP in the beforeSleep() event loop callback, so that
* if there is some query buffer to process, we do it. This is also
* required because otherwise there is no 'readable' event fired, we
- * already read the pending commands. We also set the REDIS_UNBLOCKED
+ * already read the pending commands. We also set the CLIENT_UNBLOCKED
* flag to remember the client is in the unblocked_clients list.
*
* processUnblockedClients() is called inside the beforeSleep() function
@@ -59,9 +59,11 @@
* When implementing a new type of blocking opeation, the implementation
* should modify unblockClient() and replyToBlockedClientTimedOut() in order
* to handle the btype-specific behavior of this two functions.
+ * If the blocking operation waits for certain keys to change state, the
+ * clusterRedirectBlockedClientIfNeeded() function should also be updated.
*/
-#include "redis.h"
+#include "server.h"
/* Get a timeout value from an object and store it into 'timeout'.
* The final timeout is always stored as milliseconds as a time where the
@@ -71,16 +73,16 @@
* Note that if the timeout is zero (usually from the point of view of
* commands API this means no timeout) the value stored into 'timeout'
* is zero. */
-int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit) {
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit) {
long long tval;
if (getLongLongFromObjectOrReply(c,object,&tval,
- "timeout is not an integer or out of range") != REDIS_OK)
- return REDIS_ERR;
+ "timeout is not an integer or out of range") != C_OK)
+ return C_ERR;
if (tval < 0) {
addReplyError(c,"timeout is negative");
- return REDIS_ERR;
+ return C_ERR;
}
if (tval > 0) {
@@ -89,14 +91,14 @@ int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout,
}
*timeout = tval;
- return REDIS_OK;
+ return C_OK;
}
-/* Block a client for the specific operation type. Once the REDIS_BLOCKED
+/* Block a client for the specific operation type. Once the CLIENT_BLOCKED
* flag is set client query buffer is not longer processed, but accumulated,
* and will be processed when the client is unblocked. */
-void blockClient(redisClient *c, int btype) {
- c->flags |= REDIS_BLOCKED;
+void blockClient(client *c, int btype) {
+ c->flags |= CLIENT_BLOCKED;
c->btype = btype;
server.bpop_blocked_clients++;
}
@@ -106,53 +108,88 @@ void blockClient(redisClient *c, int btype) {
* unblocked after a blocking operation. */
void processUnblockedClients(void) {
listNode *ln;
- redisClient *c;
+ client *c;
while (listLength(server.unblocked_clients)) {
ln = listFirst(server.unblocked_clients);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
c = ln->value;
listDelNode(server.unblocked_clients,ln);
- c->flags &= ~REDIS_UNBLOCKED;
- c->btype = REDIS_BLOCKED_NONE;
+ c->flags &= ~CLIENT_UNBLOCKED;
- /* Process remaining data in the input buffer. */
- if (c->querybuf && sdslen(c->querybuf) > 0) {
- server.current_client = c;
- processInputBuffer(c);
- server.current_client = NULL;
+ /* Process remaining data in the input buffer, unless the client
+ * is blocked again. Actually processInputBuffer() checks that the
+ * client is not blocked before to proceed, but things may change and
+ * the code is conceptually more correct this way. */
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ if (c->querybuf && sdslen(c->querybuf) > 0) {
+ processInputBuffer(c);
+ }
}
}
}
/* Unblock a client calling the right function depending on the kind
* of operation the client is blocking for. */
-void unblockClient(redisClient *c) {
- if (c->btype == REDIS_BLOCKED_LIST) {
+void unblockClient(client *c) {
+ if (c->btype == BLOCKED_LIST) {
unblockClientWaitingData(c);
- } else if (c->btype == REDIS_BLOCKED_WAIT) {
+ } else if (c->btype == BLOCKED_WAIT) {
unblockClientWaitingReplicas(c);
+ } else if (c->btype == BLOCKED_MODULE) {
+ unblockClientFromModule(c);
} else {
- redisPanic("Unknown btype in unblockClient().");
+ serverPanic("Unknown btype in unblockClient().");
}
/* Clear the flags, and put the client in the unblocked list so that
* we'll process new commands in its query buffer ASAP. */
- c->flags &= ~REDIS_BLOCKED;
- c->flags |= REDIS_UNBLOCKED;
- c->btype = REDIS_BLOCKED_NONE;
+ c->flags &= ~CLIENT_BLOCKED;
+ c->btype = BLOCKED_NONE;
server.bpop_blocked_clients--;
- listAddNodeTail(server.unblocked_clients,c);
+ /* The client may already be into the unblocked list because of a previous
+ * blocking operation, don't add back it into the list multiple times. */
+ if (!(c->flags & CLIENT_UNBLOCKED)) {
+ c->flags |= CLIENT_UNBLOCKED;
+ listAddNodeTail(server.unblocked_clients,c);
+ }
}
/* This function gets called when a blocked client timed out in order to
- * send it a reply of some kind. */
-void replyToBlockedClientTimedOut(redisClient *c) {
- if (c->btype == REDIS_BLOCKED_LIST) {
+ * send it a reply of some kind. After this function is called,
+ * unblockClient() will be called with the same client as argument. */
+void replyToBlockedClientTimedOut(client *c) {
+ if (c->btype == BLOCKED_LIST) {
addReply(c,shared.nullmultibulk);
- } else if (c->btype == REDIS_BLOCKED_WAIT) {
+ } else if (c->btype == BLOCKED_WAIT) {
addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset));
+ } else if (c->btype == BLOCKED_MODULE) {
+ moduleBlockedClientTimedOut(c);
} else {
- redisPanic("Unknown btype in replyToBlockedClientTimedOut().");
+ serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
}
}
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+
+ if (c->flags & CLIENT_BLOCKED) {
+ addReplySds(c,sdsnew(
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> slave?)\r\n"));
+ unblockClient(c);
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+}
diff --git a/src/childinfo.c b/src/childinfo.c
new file mode 100644
index 000000000..719025e8c
--- /dev/null
+++ b/src/childinfo.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <unistd.h>
+
+/* Open a child-parent channel used in order to move information about the
+ * RDB / AOF saving process from the child to the parent (for instance
+ * the amount of copy on write memory used) */
+void openChildInfoPipe(void) {
+ if (pipe(server.child_info_pipe) == -1) {
+ /* On error our two file descriptors should be still set to -1,
+ * but we call anyway cloesChildInfoPipe() since can't hurt. */
+ closeChildInfoPipe();
+ } else if (anetNonBlock(NULL,server.child_info_pipe[0]) != ANET_OK) {
+ closeChildInfoPipe();
+ } else {
+ memset(&server.child_info_data,0,sizeof(server.child_info_data));
+ }
+}
+
+/* Close the pipes opened with openChildInfoPipe(). */
+void closeChildInfoPipe(void) {
+ if (server.child_info_pipe[0] != -1 ||
+ server.child_info_pipe[1] != -1)
+ {
+ close(server.child_info_pipe[0]);
+ close(server.child_info_pipe[1]);
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ }
+}
+
+/* Send COW data to parent. The child should call this function after populating
+ * the corresponding fields it want to sent (according to the process type). */
+void sendChildInfo(int ptype) {
+ if (server.child_info_pipe[1] == -1) return;
+ server.child_info_data.magic = CHILD_INFO_MAGIC;
+ server.child_info_data.process_type = ptype;
+ ssize_t wlen = sizeof(server.child_info_data);
+ if (write(server.child_info_pipe[1],&server.child_info_data,wlen) != wlen) {
+ /* Nothing to do on error, this will be detected by the other side. */
+ }
+}
+
+/* Receive COW data from parent. */
+void receiveChildInfo(void) {
+ if (server.child_info_pipe[0] == -1) return;
+ ssize_t wlen = sizeof(server.child_info_data);
+ if (read(server.child_info_pipe[0],&server.child_info_data,wlen) == wlen &&
+ server.child_info_data.magic == CHILD_INFO_MAGIC)
+ {
+ if (server.child_info_data.process_type == CHILD_INFO_TYPE_RDB) {
+ server.stat_rdb_cow_bytes = server.child_info_data.cow_size;
+ } else if (server.child_info_data.process_type == CHILD_INFO_TYPE_AOF) {
+ server.stat_aof_cow_bytes = server.child_info_data.cow_size;
+ }
+ }
+}
diff --git a/src/cluster.c b/src/cluster.c
index 0d908349c..a9fedce0c 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include "endianconv.h"
@@ -37,9 +37,9 @@
#include <arpa/inet.h>
#include <fcntl.h>
#include <unistd.h>
-#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
+#include <math.h>
/* A global reference to myself is handy to make code more clear.
* Myself always points to server.cluster->myself, that is, the clusterNode
@@ -72,33 +72,20 @@ void resetManualFailover(void);
void clusterCloseAllSlots(void);
void clusterSetNodeAsMaster(clusterNode *n);
void clusterDelNode(clusterNode *delnode);
+sds representClusterNodeFlags(sds ci, uint16_t flags);
+uint64_t clusterGetMaxEpoch(void);
+int clusterBumpConfigEpochWithoutConsensus(void);
/* -----------------------------------------------------------------------------
* Initialization
* -------------------------------------------------------------------------- */
-/* Return the greatest configEpoch found in the cluster. */
-uint64_t clusterGetMaxEpoch(void) {
- uint64_t max = 0;
- dictIterator *di;
- dictEntry *de;
-
- di = dictGetSafeIterator(server.cluster->nodes);
- while((de = dictNext(di)) != NULL) {
- clusterNode *node = dictGetVal(de);
- if (node->configEpoch > max) max = node->configEpoch;
- }
- dictReleaseIterator(di);
- if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
- return max;
-}
-
/* Load the cluster config from 'filename'.
*
* If the file does not exist or is zero-length (this may happen because
* when we lock the nodes.conf file, we create a zero-length one for the
- * sake of locking if it does not already exist), REDIS_ERR is returned.
- * If the configuration was loaded from the file, REDIS_OK is returned. */
+ * sake of locking if it does not already exist), C_ERR is returned.
+ * If the configuration was loaded from the file, C_OK is returned. */
int clusterLoadConfig(char *filename) {
FILE *fp = fopen(filename,"r");
struct stat sb;
@@ -107,30 +94,30 @@ int clusterLoadConfig(char *filename) {
if (fp == NULL) {
if (errno == ENOENT) {
- return REDIS_ERR;
+ return C_ERR;
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Loading the cluster node config from %s: %s",
filename, strerror(errno));
exit(1);
}
}
- /* Check if the file is zero-length: if so return REDIS_ERR to signal
+ /* Check if the file is zero-length: if so return C_ERR to signal
* we have to write the config. */
if (fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
fclose(fp);
- return REDIS_ERR;
+ return C_ERR;
}
- /* Parse the file. Note that single liens of the cluster config file can
+ /* Parse the file. Note that single lines of the cluster config file can
* be really long as they include all the hash slots of the node.
* This means in the worst possible case, half of the Redis slots will be
* present in a single line, possibly in importing or migrating state, so
* together with the node ID of the sender/receiver.
*
- * To simplify we allocate 1024+REDIS_CLUSTER_SLOTS*128 bytes per line. */
- maxline = 1024+REDIS_CLUSTER_SLOTS*128;
+ * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */
+ maxline = 1024+CLUSTER_SLOTS*128;
line = zmalloc(maxline);
while(fgets(line,maxline,fp) != NULL) {
int argc;
@@ -141,7 +128,7 @@ int clusterLoadConfig(char *filename) {
/* Skip blank lines, they can be created either by users manually
* editing nodes.conf or by the config writing process if stopped
* before the truncate() call. */
- if (line[0] == '\n') continue;
+ if (line[0] == '\n' || line[0] == '\0') continue;
/* Split the line into arguments for processing. */
argv = sdssplitargs(line,&argc);
@@ -158,14 +145,18 @@ int clusterLoadConfig(char *filename) {
server.cluster->lastVoteEpoch =
strtoull(argv[j+1],NULL,10);
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Skipping unknown cluster config variable '%s'",
argv[j]);
}
}
+ sdsfreesplitres(argv,argc);
continue;
}
+ /* Regular config lines have at least eight fields */
+ if (argc < 8) goto fmterr;
+
/* Create this node if it does not exist */
n = clusterLookupNode(argv[0]);
if (!n) {
@@ -173,10 +164,20 @@ int clusterLoadConfig(char *filename) {
clusterAddNode(n);
}
/* Address and port */
- if ((p = strchr(argv[1],':')) == NULL) goto fmterr;
+ if ((p = strrchr(argv[1],':')) == NULL) goto fmterr;
*p = '\0';
memcpy(n->ip,argv[1],strlen(argv[1])+1);
- n->port = atoi(p+1);
+ char *port = p+1;
+ char *busp = strchr(port,'@');
+ if (busp) {
+ *busp = '\0';
+ busp++;
+ }
+ n->port = atoi(port);
+ /* In older versions of nodes.conf the "@busport" part is missing.
+ * In this case we set it to the default offset of 10000 from the
+ * base port. */
+ n->cport = busp ? atoi(busp) : n->port + CLUSTER_PORT_INCR;
/* Parse flags */
p = s = argv[2];
@@ -184,26 +185,26 @@ int clusterLoadConfig(char *filename) {
p = strchr(s,',');
if (p) *p = '\0';
if (!strcasecmp(s,"myself")) {
- redisAssert(server.cluster->myself == NULL);
+ serverAssert(server.cluster->myself == NULL);
myself = server.cluster->myself = n;
- n->flags |= REDIS_NODE_MYSELF;
+ n->flags |= CLUSTER_NODE_MYSELF;
} else if (!strcasecmp(s,"master")) {
- n->flags |= REDIS_NODE_MASTER;
+ n->flags |= CLUSTER_NODE_MASTER;
} else if (!strcasecmp(s,"slave")) {
- n->flags |= REDIS_NODE_SLAVE;
+ n->flags |= CLUSTER_NODE_SLAVE;
} else if (!strcasecmp(s,"fail?")) {
- n->flags |= REDIS_NODE_PFAIL;
+ n->flags |= CLUSTER_NODE_PFAIL;
} else if (!strcasecmp(s,"fail")) {
- n->flags |= REDIS_NODE_FAIL;
+ n->flags |= CLUSTER_NODE_FAIL;
n->fail_time = mstime();
} else if (!strcasecmp(s,"handshake")) {
- n->flags |= REDIS_NODE_HANDSHAKE;
+ n->flags |= CLUSTER_NODE_HANDSHAKE;
} else if (!strcasecmp(s,"noaddr")) {
- n->flags |= REDIS_NODE_NOADDR;
+ n->flags |= CLUSTER_NODE_NOADDR;
} else if (!strcasecmp(s,"noflags")) {
/* nothing to do */
} else {
- redisPanic("Unknown flag in redis cluster config file");
+ serverPanic("Unknown flag in redis cluster config file");
}
if (p) s = p+1;
}
@@ -238,7 +239,7 @@ int clusterLoadConfig(char *filename) {
clusterNode *cn;
p = strchr(argv[j],'-');
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
*p = '\0';
direction = p[1]; /* Either '>' or '<' */
slot = atoi(argv[j]+1);
@@ -266,12 +267,13 @@ int clusterLoadConfig(char *filename) {
sdsfreesplitres(argv,argc);
}
+ /* Config sanity check */
+ if (server.cluster->myself == NULL) goto fmterr;
+
zfree(line);
fclose(fp);
- /* Config sanity check */
- redisAssert(server.cluster->myself != NULL);
- redisLog(REDIS_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
+ serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
/* Something that should never happen: currentEpoch smaller than
* the max epoch found in the nodes configuration. However we handle this
@@ -279,12 +281,13 @@ int clusterLoadConfig(char *filename) {
if (clusterGetMaxEpoch() > server.cluster->currentEpoch) {
server.cluster->currentEpoch = clusterGetMaxEpoch();
}
- return REDIS_OK;
+ return C_OK;
fmterr:
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unrecoverable error: corrupted cluster config file.");
- fclose(fp);
+ zfree(line);
+ if (fp) fclose(fp);
exit(1);
}
@@ -310,7 +313,7 @@ int clusterSaveConfig(int do_fsync) {
/* Get the nodes description and concatenate our "vars" directive to
* save currentEpoch and lastVoteEpoch. */
- ci = clusterGenNodesDescription(REDIS_NODE_HANDSHAKE);
+ ci = clusterGenNodesDescription(CLUSTER_NODE_HANDSHAKE);
ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n",
(unsigned long long) server.cluster->currentEpoch,
(unsigned long long) server.cluster->lastVoteEpoch);
@@ -321,7 +324,7 @@ int clusterSaveConfig(int do_fsync) {
/* Pad the new payload if the existing file length is greater. */
if (fstat(fd,&sb) != -1) {
- if (sb.st_size > content_size) {
+ if (sb.st_size > (off_t)content_size) {
ci = sdsgrowzero(ci,sb.st_size);
memset(ci+content_size,'\n',sb.st_size-content_size);
}
@@ -349,7 +352,7 @@ err:
void clusterSaveConfigOrDie(int do_fsync) {
if (clusterSaveConfig(do_fsync) == -1) {
- redisLog(REDIS_WARNING,"Fatal: can't update cluster config file.");
+ serverLog(LL_WARNING,"Fatal: can't update cluster config file.");
exit(1);
}
}
@@ -361,37 +364,44 @@ void clusterSaveConfigOrDie(int do_fsync) {
* in-place, reopening the file, and writing to it in place (later adjusting
* the length with ftruncate()).
*
- * On success REDIS_OK is returned, otherwise an error is logged and
- * the function returns REDIS_ERR to signal a lock was not acquired. */
+ * On success C_OK is returned, otherwise an error is logged and
+ * the function returns C_ERR to signal a lock was not acquired. */
int clusterLockConfig(char *filename) {
+/* flock() does not exist on Solaris
+ * and a fcntl-based solution won't help, as we constantly re-open that file,
+ * which will release _all_ locks anyway
+ */
+#if !defined(__sun)
/* To lock it, we need to open the file in a way it is created if
* it does not exist, otherwise there is a race condition with other
* processes. */
int fd = open(filename,O_WRONLY|O_CREAT,0644);
if (fd == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Can't open %s in order to acquire a lock: %s",
filename, strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
if (errno == EWOULDBLOCK) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sorry, the cluster configuration file %s is already used "
"by a different Redis Cluster node. Please make sure that "
"different nodes use different cluster configuration "
"files.", filename);
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Impossible to lock %s: %s", filename, strerror(errno));
}
close(fd);
- return REDIS_ERR;
+ return C_ERR;
}
/* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the
* lock to the file as long as the process exists. */
- return REDIS_OK;
+#endif /* __sun */
+
+ return C_OK;
}
void clusterInit(void) {
@@ -400,7 +410,7 @@ void clusterInit(void) {
server.cluster = zmalloc(sizeof(clusterState));
server.cluster->myself = NULL;
server.cluster->currentEpoch = 0;
- server.cluster->state = REDIS_CLUSTER_FAIL;
+ server.cluster->state = CLUSTER_FAIL;
server.cluster->size = 1;
server.cluster->todo_before_sleep = 0;
server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL);
@@ -410,24 +420,28 @@ void clusterInit(void) {
server.cluster->failover_auth_count = 0;
server.cluster->failover_auth_rank = 0;
server.cluster->failover_auth_epoch = 0;
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
server.cluster->lastVoteEpoch = 0;
- server.cluster->stats_bus_messages_sent = 0;
- server.cluster->stats_bus_messages_received = 0;
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ server.cluster->stats_bus_messages_sent[i] = 0;
+ server.cluster->stats_bus_messages_received[i] = 0;
+ }
+ server.cluster->stats_pfail_nodes = 0;
memset(server.cluster->slots,0, sizeof(server.cluster->slots));
clusterCloseAllSlots();
/* Lock the cluster config file to make sure every node uses
* its own nodes.conf. */
- if (clusterLockConfig(server.cluster_configfile) == REDIS_ERR)
+ if (clusterLockConfig(server.cluster_configfile) == C_ERR)
exit(1);
/* Load or create a new nodes configuration. */
- if (clusterLoadConfig(server.cluster_configfile) == REDIS_ERR) {
+ if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
/* No configuration found. We will just use the random name provided
* by the createClusterNode() function. */
myself = server.cluster->myself =
- createClusterNode(NULL,REDIS_NODE_MYSELF|REDIS_NODE_MASTER);
- redisLog(REDIS_NOTICE,"No cluster configuration found, I'm %.40s",
+ createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
+ serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
myself->name);
clusterAddNode(myself);
saveconf = 1;
@@ -440,8 +454,8 @@ void clusterInit(void) {
/* Port sanity check II
* The other handshake port check is triggered too late to stop
* us from trying to use a too-high cluster port number. */
- if (server.port > (65535-REDIS_CLUSTER_PORT_INCR)) {
- redisLog(REDIS_WARNING, "Redis port number too high. "
+ if (server.port > (65535-CLUSTER_PORT_INCR)) {
+ serverLog(LL_WARNING, "Redis port number too high. "
"Cluster communication port is 10,000 port "
"numbers higher than your Redis port. "
"Your Redis port number must be "
@@ -449,8 +463,8 @@ void clusterInit(void) {
exit(1);
}
- if (listenToPort(server.port+REDIS_CLUSTER_PORT_INCR,
- server.cfd,&server.cfd_count) == REDIS_ERR)
+ if (listenToPort(server.port+CLUSTER_PORT_INCR,
+ server.cfd,&server.cfd_count) == C_ERR)
{
exit(1);
} else {
@@ -459,18 +473,26 @@ void clusterInit(void) {
for (j = 0; j < server.cfd_count; j++) {
if (aeCreateFileEvent(server.el, server.cfd[j], AE_READABLE,
clusterAcceptHandler, NULL) == AE_ERR)
- redisPanic("Unrecoverable error creating Redis Cluster "
+ serverPanic("Unrecoverable error creating Redis Cluster "
"file event.");
}
}
- /* The slots -> keys map is a sorted set. Init it. */
- server.cluster->slots_to_keys = zslCreate();
+ /* The slots -> keys map is a radix tree. Initialize it here. */
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
- /* Set myself->port to my listening port, we'll just need to discover
- * the IP address via MEET messages. */
+ /* Set myself->port / cport to my listening ports, we'll just need to
+ * discover the IP address via MEET messages. */
myself->port = server.port;
+ myself->cport = server.port+CLUSTER_PORT_INCR;
+ if (server.cluster_announce_port)
+ myself->port = server.cluster_announce_port;
+ if (server.cluster_announce_bus_port)
+ myself->cport = server.cluster_announce_bus_port;
+ server.cluster->mf_end = 0;
resetManualFailover();
}
@@ -492,7 +514,7 @@ void clusterReset(int hard) {
if (nodeIsSlave(myself)) {
clusterSetNodeAsMaster(myself);
replicationUnsetMaster();
- emptyDb(NULL);
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
}
/* Close slots, reset manual failover state. */
@@ -500,7 +522,7 @@ void clusterReset(int hard) {
resetManualFailover();
/* Unassign all the slots. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) clusterDelSlot(j);
+ for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j);
/* Forget all the nodes, but myself. */
di = dictGetSafeIterator(server.cluster->nodes);
@@ -519,15 +541,16 @@ void clusterReset(int hard) {
server.cluster->currentEpoch = 0;
server.cluster->lastVoteEpoch = 0;
myself->configEpoch = 0;
- redisLog(REDIS_WARNING, "configEpoch set to 0 via CLUSTER RESET HARD");
+ serverLog(LL_WARNING, "configEpoch set to 0 via CLUSTER RESET HARD");
/* To change the Node ID we need to remove the old name from the
* nodes table, change the ID, and re-add back with new name. */
- oldname = sdsnewlen(myself->name, REDIS_CLUSTER_NAMELEN);
+ oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN);
dictDelete(server.cluster->nodes,oldname);
sdsfree(oldname);
- getRandomHexChars(myself->name, REDIS_CLUSTER_NAMELEN);
+ getRandomHexChars(myself->name, CLUSTER_NAMELEN);
clusterAddNode(myself);
+ serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name);
}
/* Make sure to persist the new config and update the state. */
@@ -570,11 +593,11 @@ void freeClusterLink(clusterLink *link) {
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd;
int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
- char cip[REDIS_IP_STR_LEN];
+ char cip[NET_IP_STR_LEN];
clusterLink *link;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
/* If the server is starting up, don't accept cluster connections:
* UPDATE messages may interact with the database content. */
@@ -584,15 +607,15 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_VERBOSE,
- "Accepting cluster node: %s", server.neterr);
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node: %s", server.neterr);
return;
}
anetNonBlock(NULL,cfd);
anetEnableTcpNoDelay(NULL,cfd);
/* Use non-blocking I/O for cluster messages. */
- redisLog(REDIS_VERBOSE,"Accepted cluster node %s:%d", cip, cport);
+ serverLog(LL_VERBOSE,"Accepted cluster node %s:%d", cip, cport);
/* Create a link object we use to handle the connection.
* It gets passed to the readable handler when data is available.
* Initiallly the link->node pointer is set to NULL as we don't know
@@ -650,9 +673,9 @@ clusterNode *createClusterNode(char *nodename, int flags) {
clusterNode *node = zmalloc(sizeof(*node));
if (nodename)
- memcpy(node->name, nodename, REDIS_CLUSTER_NAMELEN);
+ memcpy(node->name, nodename, CLUSTER_NAMELEN);
else
- getRandomHexChars(node->name, REDIS_CLUSTER_NAMELEN);
+ getRandomHexChars(node->name, CLUSTER_NAMELEN);
node->ctime = mstime();
node->configEpoch = 0;
node->flags = flags;
@@ -666,8 +689,10 @@ clusterNode *createClusterNode(char *nodename, int flags) {
node->link = NULL;
memset(node->ip,0,sizeof(node->ip));
node->port = 0;
+ node->cport = 0;
node->fail_reports = listCreate();
node->voted_time = 0;
+ node->orphaned_time = 0;
node->repl_offset_time = 0;
node->repl_offset = 0;
listSetFreeMethod(node->fail_reports,zfree);
@@ -720,7 +745,7 @@ void clusterNodeCleanupFailureReports(clusterNode *node) {
listIter li;
clusterNodeFailReport *fr;
mstime_t maxtime = server.cluster_node_timeout *
- REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT;
+ CLUSTER_FAIL_REPORT_VALIDITY_MULT;
mstime_t now = mstime();
listRewind(l,&li);
@@ -774,13 +799,18 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
for (j = 0; j < master->numslaves; j++) {
if (master->slaves[j] == slave) {
- memmove(master->slaves+j,master->slaves+(j+1),
- (master->numslaves-1)-j);
+ if ((j+1) < master->numslaves) {
+ int remaining_slaves = (master->numslaves - j) - 1;
+ memmove(master->slaves+j,master->slaves+(j+1),
+ (sizeof(*master->slaves) * remaining_slaves));
+ }
master->numslaves--;
- return REDIS_OK;
+ if (master->numslaves == 0)
+ master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
}
}
- return REDIS_ERR;
+ return C_ERR;
}
int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
@@ -788,18 +818,13 @@ int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
/* If it's already a slave, don't add it again. */
for (j = 0; j < master->numslaves; j++)
- if (master->slaves[j] == slave) return REDIS_ERR;
+ if (master->slaves[j] == slave) return C_ERR;
master->slaves = zrealloc(master->slaves,
sizeof(clusterNode*)*(master->numslaves+1));
master->slaves[master->numslaves] = slave;
master->numslaves++;
- return REDIS_OK;
-}
-
-void clusterNodeResetSlaves(clusterNode *n) {
- zfree(n->slaves);
- n->numslaves = 0;
- n->slaves = NULL;
+ master->flags |= CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
}
int clusterCountNonFailingSlaves(clusterNode *n) {
@@ -810,15 +835,28 @@ int clusterCountNonFailingSlaves(clusterNode *n) {
return okslaves;
}
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
void freeClusterNode(clusterNode *n) {
sds nodename;
+ int j;
+
+ /* If the node has associated slaves, we have to set
+ * all the slaves->slaveof fields to NULL (unknown). */
+ for (j = 0; j < n->numslaves; j++)
+ n->slaves[j]->slaveof = NULL;
+
+ /* Remove this node from the list of slaves of its master. */
+ if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
- nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN);
- redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
+ /* Unlink from the set of nodes. */
+ nodename = sdsnewlen(n->name, CLUSTER_NAMELEN);
+ serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
sdsfree(nodename);
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof, n);
+
+ /* Release link and associated data structures. */
if (n->link) freeClusterLink(n->link);
listRelease(n->fail_reports);
+ zfree(n->slaves);
zfree(n);
}
@@ -827,15 +865,20 @@ int clusterAddNode(clusterNode *node) {
int retval;
retval = dictAdd(server.cluster->nodes,
- sdsnewlen(node->name,REDIS_CLUSTER_NAMELEN), node);
- return (retval == DICT_OK) ? REDIS_OK : REDIS_ERR;
+ sdsnewlen(node->name,CLUSTER_NAMELEN), node);
+ return (retval == DICT_OK) ? C_OK : C_ERR;
}
-/* Remove a node from the cluster:
- * 1) Mark all the nodes handled by it as unassigned.
- * 2) Remove all the failure reports sent by this node.
- * 3) Free the node, that will in turn remove it from the hash table
- * and from the list of slaves of its master, if it is a slave node.
+/* Remove a node from the cluster. The functio performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ * other nodes.
+ * 3) Free the node with freeClusterNode() that will in turn remove it
+ * from the hash table and from the list of slaves of its master, if
+ * it is a slave node.
*/
void clusterDelNode(clusterNode *delnode) {
int j;
@@ -843,7 +886,7 @@ void clusterDelNode(clusterNode *delnode) {
dictEntry *de;
/* 1) Mark slots as unassigned. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (server.cluster->importing_slots_from[j] == delnode)
server.cluster->importing_slots_from[j] = NULL;
if (server.cluster->migrating_slots_to[j] == delnode)
@@ -862,17 +905,13 @@ void clusterDelNode(clusterNode *delnode) {
}
dictReleaseIterator(di);
- /* 3) Remove this node from its master's slaves if needed. */
- if (nodeIsSlave(delnode) && delnode->slaveof)
- clusterNodeRemoveSlave(delnode->slaveof,delnode);
-
- /* 4) Free the node, unlinking it from the cluster. */
+ /* 3) Free the node, unlinking it from the cluster. */
freeClusterNode(delnode);
}
/* Node lookup by name */
clusterNode *clusterLookupNode(char *name) {
- sds s = sdsnewlen(name, REDIS_CLUSTER_NAMELEN);
+ sds s = sdsnewlen(name, CLUSTER_NAMELEN);
dictEntry *de;
de = dictFind(server.cluster->nodes,s);
@@ -887,30 +926,162 @@ clusterNode *clusterLookupNode(char *name) {
* this function. */
void clusterRenameNode(clusterNode *node, char *newname) {
int retval;
- sds s = sdsnewlen(node->name, REDIS_CLUSTER_NAMELEN);
+ sds s = sdsnewlen(node->name, CLUSTER_NAMELEN);
- redisLog(REDIS_DEBUG,"Renaming node %.40s into %.40s",
+ serverLog(LL_DEBUG,"Renaming node %.40s into %.40s",
node->name, newname);
retval = dictDelete(server.cluster->nodes, s);
sdsfree(s);
- redisAssert(retval == DICT_OK);
- memcpy(node->name, newname, REDIS_CLUSTER_NAMELEN);
+ serverAssert(retval == DICT_OK);
+ memcpy(node->name, newname, CLUSTER_NAMELEN);
clusterAddNode(node);
}
/* -----------------------------------------------------------------------------
+ * CLUSTER config epoch handling
+ * -------------------------------------------------------------------------- */
+
+/* Return the greatest configEpoch found in the cluster, or the current
+ * epoch if greater than any node configEpoch. */
+uint64_t clusterGetMaxEpoch(void) {
+ uint64_t max = 0;
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->configEpoch > max) max = node->configEpoch;
+ }
+ dictReleaseIterator(di);
+ if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
+ return max;
+}
+
+/* If this node epoch is zero or is not already the greatest across the
+ * cluster (from the POV of the local configuration), this function will:
+ *
+ * 1) Generate a new config epoch, incrementing the current epoch.
+ * 2) Assign the new epoch to this node, WITHOUT any consensus.
+ * 3) Persist the configuration on disk before sending packets with the
+ * new configuration.
+ *
+ * If the new config epoch is generated and assigend, C_OK is returned,
+ * otherwise C_ERR is returned (since the node has already the greatest
+ * configuration around) and no operation is performed.
+ *
+ * Important note: this function violates the principle that config epochs
+ * should be generated with consensus and should be unique across the cluster.
+ * However Redis Cluster uses this auto-generated new config epochs in two
+ * cases:
+ *
+ * 1) When slots are closed after importing. Otherwise resharding would be
+ * too expensive.
+ * 2) When CLUSTER FAILOVER is called with options that force a slave to
+ * failover its master even if there is not master majority able to
+ * create a new configuration epoch.
+ *
+ * Redis Cluster will not explode using this function, even in the case of
+ * a collision between this node and another node, generating the same
+ * configuration epoch unilaterally, because the config epoch conflict
+ * resolution algorithm will eventually move colliding nodes to different
+ * config epochs. However using this function may violate the "last failover
+ * wins" rule, so should only be used with care. */
+int clusterBumpConfigEpochWithoutConsensus(void) {
+ uint64_t maxEpoch = clusterGetMaxEpoch();
+
+ if (myself->configEpoch == 0 ||
+ myself->configEpoch != maxEpoch)
+ {
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ serverLog(LL_WARNING,
+ "New configEpoch set to %llu",
+ (unsigned long long) myself->configEpoch);
+ return C_OK;
+ } else {
+ return C_ERR;
+ }
+}
+
+/* This function is called when this node is a master, and we receive from
+ * another master a configuration epoch that is equal to our configuration
+ * epoch.
+ *
+ * BACKGROUND
+ *
+ * It is not possible that different slaves get the same config
+ * epoch during a failover election, because the slaves need to get voted
+ * by a majority. However when we perform a manual resharding of the cluster
+ * the node will assign a configuration epoch to itself without to ask
+ * for agreement. Usually resharding happens when the cluster is working well
+ * and is supervised by the sysadmin, however it is possible for a failover
+ * to happen exactly while the node we are resharding a slot to assigns itself
+ * a new configuration epoch, but before it is able to propagate it.
+ *
+ * So technically it is possible in this condition that two nodes end with
+ * the same configuration epoch.
+ *
+ * Another possibility is that there are bugs in the implementation causing
+ * this to happen.
+ *
+ * Moreover when a new cluster is created, all the nodes start with the same
+ * configEpoch. This collision resolution code allows nodes to automatically
+ * end with a different configEpoch at startup automatically.
+ *
+ * In all the cases, we want a mechanism that resolves this issue automatically
+ * as a safeguard. The same configuration epoch for masters serving different
+ * set of slots is not harmful, but it is if the nodes end serving the same
+ * slots for some reason (manual errors or software bugs) without a proper
+ * failover procedure.
+ *
+ * In general we want a system that eventually always ends with different
+ * masters having different configuration epochs whatever happened, since
+ * nothign is worse than a split-brain condition in a distributed system.
+ *
+ * BEHAVIOR
+ *
+ * When this function gets called, what happens is that if this node
+ * has the lexicographically smaller Node ID compared to the other node
+ * with the conflicting epoch (the 'sender' node), it will assign itself
+ * the greatest configuration epoch currently detected among nodes plus 1.
+ *
+ * This means that even if there are multiple nodes colliding, the node
+ * with the greatest Node ID never moves forward, so eventually all the nodes
+ * end with a different configuration epoch.
+ */
+void clusterHandleConfigEpochCollision(clusterNode *sender) {
+ /* Prerequisites: nodes have the same configEpoch and are both masters. */
+ if (sender->configEpoch != myself->configEpoch ||
+ !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
+ /* Don't act if the colliding node has a smaller Node ID. */
+ if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return;
+ /* Get the next ID available at the best of this node knowledge. */
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterSaveConfigOrDie(1);
+ serverLog(LL_VERBOSE,
+ "WARNING: configEpoch collision with node %.40s."
+ " configEpoch set to %llu",
+ sender->name,
+ (unsigned long long) myself->configEpoch);
+}
+
+/* -----------------------------------------------------------------------------
* CLUSTER nodes blacklist
*
* The nodes blacklist is just a way to ensure that a given node with a given
* Node ID is not readded before some time elapsed (this time is specified
- * in seconds in REDIS_CLUSTER_BLACKLIST_TTL).
+ * in seconds in CLUSTER_BLACKLIST_TTL).
*
* This is useful when we want to remove a node from the cluster completely:
* when CLUSTER FORGET is called, it also puts the node into the blacklist so
* that even if we receive gossip messages from other nodes that still remember
* about the node we want to remove, we don't re-add it before some time.
*
- * Currently the REDIS_CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
+ * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
* that redis-trib has 60 seconds to send CLUSTER FORGET messages to nodes
* in the cluster without dealing with the problem of other nodes re-adding
* back the node to nodes we already sent the FORGET command to.
@@ -920,7 +1091,7 @@ void clusterRenameNode(clusterNode *node, char *newname) {
* value.
* -------------------------------------------------------------------------- */
-#define REDIS_CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */
+#define CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */
/* Before of the addNode() or Exists() operations we always remove expired
@@ -946,7 +1117,7 @@ void clusterBlacklistCleanup(void) {
/* Cleanup the blacklist and add a new node ID to the black list. */
void clusterBlacklistAddNode(clusterNode *node) {
dictEntry *de;
- sds id = sdsnewlen(node->name,REDIS_CLUSTER_NAMELEN);
+ sds id = sdsnewlen(node->name,CLUSTER_NAMELEN);
clusterBlacklistCleanup();
if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) {
@@ -955,7 +1126,7 @@ void clusterBlacklistAddNode(clusterNode *node) {
id = sdsdup(id);
}
de = dictFind(server.cluster->nodes_black_list,id);
- dictSetUnsignedIntegerVal(de,time(NULL)+REDIS_CLUSTER_BLACKLIST_TTL);
+ dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL);
sdsfree(id);
}
@@ -963,7 +1134,7 @@ void clusterBlacklistAddNode(clusterNode *node) {
* You don't need to pass an sds string here, any pointer to 40 bytes
* will work. */
int clusterBlacklistExists(char *nodeid) {
- sds id = sdsnewlen(nodeid,REDIS_CLUSTER_NAMELEN);
+ sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN);
int retval;
clusterBlacklistCleanup();
@@ -1009,12 +1180,12 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
if (nodeIsMaster(myself)) failures++;
if (failures < needed_quorum) return; /* No weak agreement from masters. */
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Marking node %.40s as failing (quorum reached).", node->name);
/* Mark the node as failing. */
- node->flags &= ~REDIS_NODE_PFAIL;
- node->flags |= REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_PFAIL;
+ node->flags |= CLUSTER_NODE_FAIL;
node->fail_time = mstime();
/* Broadcast the failing node name to everybody, forcing all the other
@@ -1029,16 +1200,16 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
void clearNodeFailureIfNeeded(clusterNode *node) {
mstime_t now = mstime();
- redisAssert(nodeFailed(node));
+ serverAssert(nodeFailed(node));
/* For slaves we always clear the FAIL flag if we can contact the
* node again. */
if (nodeIsSlave(node) || node->numslots == 0) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Clear FAIL state for node %.40s: %s is reachable again.",
node->name,
nodeIsSlave(node) ? "slave" : "master without slots");
- node->flags &= ~REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_FAIL;
clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
}
@@ -1048,12 +1219,12 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
* Apparently no one is going to fix these slots, clear the FAIL flag. */
if (nodeIsMaster(node) && node->numslots > 0 &&
(now - node->fail_time) >
- (server.cluster_node_timeout * REDIS_CLUSTER_FAIL_UNDO_TIME_MULT))
+ (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT))
{
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Clear FAIL state for node %.40s: is reachable again and nobody is serving its slots after some time.",
node->name);
- node->flags &= ~REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_FAIL;
clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
}
}
@@ -1061,7 +1232,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
/* Return true if we already have a node in HANDSHAKE state matching the
* specified ip address and port number. This function is used in order to
* avoid adding a new handshake node for the same address multiple times. */
-int clusterHandshakeInProgress(char *ip, int port) {
+int clusterHandshakeInProgress(char *ip, int port, int cport) {
dictIterator *di;
dictEntry *de;
@@ -1070,7 +1241,9 @@ int clusterHandshakeInProgress(char *ip, int port) {
clusterNode *node = dictGetVal(de);
if (!nodeInHandshake(node)) continue;
- if (!strcasecmp(node->ip,ip) && node->port == port) break;
+ if (!strcasecmp(node->ip,ip) &&
+ node->port == port &&
+ node->cport == cport) break;
}
dictReleaseIterator(di);
return de != NULL;
@@ -1083,9 +1256,9 @@ int clusterHandshakeInProgress(char *ip, int port) {
*
* EAGAIN - There is already an handshake in progress for this address.
* EINVAL - IP or port are not valid. */
-int clusterStartHandshake(char *ip, int port) {
+int clusterStartHandshake(char *ip, int port, int cport) {
clusterNode *n;
- char norm_ip[REDIS_IP_STR_LEN];
+ char norm_ip[NET_IP_STR_LEN];
struct sockaddr_storage sa;
/* IP sanity check */
@@ -1103,33 +1276,35 @@ int clusterStartHandshake(char *ip, int port) {
}
/* Port sanity check */
- if (port <= 0 || port > (65535-REDIS_CLUSTER_PORT_INCR)) {
+ if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) {
errno = EINVAL;
return 0;
}
/* Set norm_ip as the normalized string representation of the node
* IP address. */
+ memset(norm_ip,0,NET_IP_STR_LEN);
if (sa.ss_family == AF_INET)
inet_ntop(AF_INET,
(void*)&(((struct sockaddr_in *)&sa)->sin_addr),
- norm_ip,REDIS_IP_STR_LEN);
+ norm_ip,NET_IP_STR_LEN);
else
inet_ntop(AF_INET6,
(void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr),
- norm_ip,REDIS_IP_STR_LEN);
+ norm_ip,NET_IP_STR_LEN);
- if (clusterHandshakeInProgress(norm_ip,port)) {
+ if (clusterHandshakeInProgress(norm_ip,port,cport)) {
errno = EAGAIN;
return 0;
}
/* Add the node with a random address (NULL as first argument to
* createClusterNode()). Everything will be fixed during the
- * handskake. */
- n = createClusterNode(NULL,REDIS_NODE_HANDSHAKE|REDIS_NODE_MEET);
+ * handshake. */
+ n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET);
memcpy(n->ip,norm_ip,sizeof(n->ip));
n->port = port;
+ n->cport = cport;
clusterAddNode(n);
return 1;
}
@@ -1144,26 +1319,20 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender);
while(count--) {
- sds ci = sdsempty();
uint16_t flags = ntohs(g->flags);
clusterNode *node;
-
- if (flags == 0) ci = sdscat(ci,"noflags,");
- if (flags & REDIS_NODE_MYSELF) ci = sdscat(ci,"myself,");
- if (flags & REDIS_NODE_MASTER) ci = sdscat(ci,"master,");
- if (flags & REDIS_NODE_SLAVE) ci = sdscat(ci,"slave,");
- if (flags & REDIS_NODE_PFAIL) ci = sdscat(ci,"fail?,");
- if (flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
- if (flags & REDIS_NODE_HANDSHAKE) ci = sdscat(ci,"handshake,");
- if (flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
- if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
-
- redisLog(REDIS_DEBUG,"GOSSIP %.40s %s:%d %s",
- g->nodename,
- g->ip,
- ntohs(g->port),
- ci);
- sdsfree(ci);
+ sds ci;
+
+ if (server.verbosity == LL_DEBUG) {
+ ci = representClusterNodeFlags(sdsempty(), flags);
+ serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s",
+ g->nodename,
+ g->ip,
+ ntohs(g->port),
+ ntohs(g->cport),
+ ci);
+ sdsfree(ci);
+ }
/* Update our state accordingly to the gossip sections */
node = clusterLookupNode(g->nodename);
@@ -1171,31 +1340,61 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
/* We already know this node.
Handle failure reports, only when the sender is a master. */
if (sender && nodeIsMaster(sender) && node != myself) {
- if (flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL)) {
+ if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) {
if (clusterNodeAddFailureReport(node,sender)) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s reported node %.40s as not reachable.",
sender->name, node->name);
}
markNodeAsFailingIfNeeded(node);
} else {
if (clusterNodeDelFailureReport(node,sender)) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s reported node %.40s is back online.",
sender->name, node->name);
}
}
}
+ /* If from our POV the node is up (no failure flags are set),
+ * we have no pending ping for the node, nor we have failure
+ * reports for this node, update the last pong time with the
+ * one we see from the other nodes. */
+ if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ node->ping_sent == 0 &&
+ clusterNodeFailureReportsCount(node) == 0)
+ {
+ mstime_t pongtime = ntohl(g->pong_received);
+ pongtime *= 1000; /* Convert back to milliseconds. */
+
+ /* Replace the pong time with the received one only if
+ * it's greater than our view but is not in the future
+ * (with 500 milliseconds tolerance) from the POV of our
+ * clock. */
+ if (pongtime <= (server.mstime+500) &&
+ pongtime > node->pong_received)
+ {
+ node->pong_received = pongtime;
+ }
+ }
+
/* If we already know this node, but it is not reachable, and
- * we see a different address in the gossip section, start an
- * handshake with the (possibly) new address: this will result
- * into a node address update if the handshake will be
- * successful. */
- if (node->flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL) &&
- (strcasecmp(node->ip,g->ip) || node->port != ntohs(g->port)))
+ * we see a different address in the gossip section of a node that
+ * can talk with this other node, update the address, disconnect
+ * the old link if any, so that we'll attempt to connect with the
+ * new address. */
+ if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
+ !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ (strcasecmp(node->ip,g->ip) ||
+ node->port != ntohs(g->port) ||
+ node->cport != ntohs(g->cport)))
{
- clusterStartHandshake(g->ip,ntohs(g->port));
+ if (node->link) freeClusterLink(node->link);
+ memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+ node->port = ntohs(g->port);
+ node->cport = ntohs(g->cport);
+ node->flags &= ~CLUSTER_NODE_NOADDR;
}
} else {
/* If it's not in NOADDR state and we don't have it, we
@@ -1205,10 +1404,10 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
* is a well known node in our cluster, otherwise we risk
* joining another cluster. */
if (sender &&
- !(flags & REDIS_NODE_NOADDR) &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
!clusterBlacklistExists(g->nodename))
{
- clusterStartHandshake(g->ip,ntohs(g->port));
+ clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport));
}
}
@@ -1217,23 +1416,36 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
}
}
-/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes. */
-void nodeIp2String(char *buf, clusterLink *link) {
- anetPeerToString(link->fd, buf, REDIS_IP_STR_LEN, NULL);
+/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes.
+ * If 'announced_ip' length is non-zero, it is used instead of extracting
+ * the IP from the socket peer address. */
+void nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
+ if (announced_ip[0] != '\0') {
+ memcpy(buf,announced_ip,NET_IP_STR_LEN);
+ buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
+ } else {
+ anetPeerToString(link->fd, buf, NET_IP_STR_LEN, NULL);
+ }
}
/* Update the node address to the IP address that can be extracted
- * from link->fd, and at the specified port.
- * Also disconnect the node link so that we'll connect again to the new
- * address.
+ * from link->fd, or if hdr->myip is non empty, to the address the node
+ * is announcing us. The port is taken from the packet header as well.
+ *
+ * If the address or port changed, disconnect the node link so that we'll
+ * connect again to the new address.
*
* If the ip/port pair are already correct no operation is performed at
* all.
*
* The function returns 0 if the node address is still the same,
* otherwise 1 is returned. */
-int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
- char ip[REDIS_IP_STR_LEN];
+int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link,
+ clusterMsg *hdr)
+{
+ char ip[NET_IP_STR_LEN] = {0};
+ int port = ntohs(hdr->port);
+ int cport = ntohs(hdr->cport);
/* We don't proceed if the link is the same as the sender link, as this
* function is designed to see if the node link is consistent with the
@@ -1243,15 +1455,17 @@ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
* it is safe to call during packet processing. */
if (link == node->link) return 0;
- nodeIp2String(ip,link);
- if (node->port == port && strcmp(ip,node->ip) == 0) return 0;
+ nodeIp2String(ip,link,hdr->myip);
+ if (node->port == port && node->cport == cport &&
+ strcmp(ip,node->ip) == 0) return 0;
/* IP / port is different, update it. */
memcpy(node->ip,ip,sizeof(ip));
node->port = port;
+ node->cport = cport;
if (node->link) freeClusterLink(node->link);
- node->flags &= ~REDIS_NODE_NOADDR;
- redisLog(REDIS_WARNING,"Address updated for node %.40s, now %s:%d",
+ node->flags &= ~CLUSTER_NODE_NOADDR;
+ serverLog(LL_WARNING,"Address updated for node %.40s, now %s:%d",
node->name, node->ip, node->port);
/* Check if this is our master and we have to change the
@@ -1267,9 +1481,12 @@ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
void clusterSetNodeAsMaster(clusterNode *n) {
if (nodeIsMaster(n)) return;
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
- n->flags &= ~REDIS_NODE_SLAVE;
- n->flags |= REDIS_NODE_MASTER;
+ if (n->slaveof) {
+ clusterNodeRemoveSlave(n->slaveof,n);
+ if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
+ n->flags &= ~CLUSTER_NODE_SLAVE;
+ n->flags |= CLUSTER_NODE_MASTER;
n->slaveof = NULL;
/* Update config and state. */
@@ -1286,8 +1503,8 @@ void clusterSetNodeAsMaster(clusterNode *n) {
* node (see the function comments for more info).
*
* The 'sender' is the node for which we received a configuration update.
- * Sometimes it is not actaully the "Sender" of the information, like in the case
- * we receive the info via an UPDATE packet. */
+ * Sometimes it is not actually the "Sender" of the information, like in the
+ * case we receive the info via an UPDATE packet. */
void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
int j;
clusterNode *curmaster, *newmaster = NULL;
@@ -1298,7 +1515,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
* If the update message is not able to demote a master to slave (in this
* case we'll resync with the master updating the whole key space), we
* need to delete all the keys in the slots we lost ownership. */
- uint16_t dirty_slots[REDIS_CLUSTER_SLOTS];
+ uint16_t dirty_slots[CLUSTER_SLOTS];
int dirty_slots_count = 0;
/* Here we set curmaster to this node or the node this node
@@ -1307,11 +1524,11 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
curmaster = nodeIsMaster(myself) ? myself : myself->slaveof;
if (sender == myself) {
- redisLog(REDIS_WARNING,"Discarding UPDATE message about myself.");
+ serverLog(LL_WARNING,"Discarding UPDATE message about myself.");
return;
}
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(slots,j)) {
/* The slot is already bound to the sender of this message. */
if (server.cluster->slots[j] == sender) continue;
@@ -1358,7 +1575,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
* 2) We are a slave and our master is left without slots. We need
* to replicate to the new slots owner. */
if (newmaster && curmaster->numslots == 0) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Configuration change detected. Reconfiguring myself "
"as a replica of %.40s", sender->name);
clusterSetMaster(sender);
@@ -1378,69 +1595,6 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
}
}
-/* This function is called when this node is a master, and we receive from
- * another master a configuration epoch that is equal to our configuration
- * epoch.
- *
- * BACKGROUND
- *
- * It is not possible that different slaves get the same config
- * epoch during a failover election, because the slaves need to get voted
- * by a majority. However when we perform a manual resharding of the cluster
- * the node will assign a configuration epoch to itself without to ask
- * for agreement. Usually resharding happens when the cluster is working well
- * and is supervised by the sysadmin, however it is possible for a failover
- * to happen exactly while the node we are resharding a slot to assigns itself
- * a new configuration epoch, but before it is able to propagate it.
- *
- * So technically it is possible in this condition that two nodes end with
- * the same configuration epoch.
- *
- * Another possibility is that there are bugs in the implementation causing
- * this to happen.
- *
- * Moreover when a new cluster is created, all the nodes start with the same
- * configEpoch. This collision resolution code allows nodes to automatically
- * end with a different configEpoch at startup automatically.
- *
- * In all the cases, we want a mechanism that resolves this issue automatically
- * as a safeguard. The same configuration epoch for masters serving different
- * set of slots is not harmful, but it is if the nodes end serving the same
- * slots for some reason (manual errors or software bugs) without a proper
- * failover procedure.
- *
- * In general we want a system that eventually always ends with different
- * masters having different configuration epochs whatever happened, since
- * nothign is worse than a split-brain condition in a distributed system.
- *
- * BEHAVIOR
- *
- * When this function gets called, what happens is that if this node
- * has the lexicographically smaller Node ID compared to the other node
- * with the conflicting epoch (the 'sender' node), it will assign itself
- * the greatest configuration epoch currently detected among nodes plus 1.
- *
- * This means that even if there are multiple nodes colliding, the node
- * with the greatest Node ID never moves forward, so eventually all the nodes
- * end with a different configuration epoch.
- */
-void clusterHandleConfigEpochCollision(clusterNode *sender) {
- /* Prerequisites: nodes have the same configEpoch and are both masters. */
- if (sender->configEpoch != myself->configEpoch ||
- !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
- /* Don't act if the colliding node has a smaller Node ID. */
- if (memcmp(sender->name,myself->name,REDIS_CLUSTER_NAMELEN) <= 0) return;
- /* Get the next ID available at the best of this node knowledge. */
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterSaveConfigOrDie(1);
- redisLog(REDIS_VERBOSE,
- "WARNING: configEpoch collision with node %.40s."
- " configEpoch set to %llu",
- sender->name,
- (unsigned long long) myself->configEpoch);
-}
-
/* When this function is called, there is a packet to process starting
* at node->rcvbuf. Releasing the buffer is up to the caller, so this
* function should just handle the higher level stuff of processing the
@@ -1454,18 +1608,25 @@ int clusterProcessPacket(clusterLink *link) {
clusterMsg *hdr = (clusterMsg*) link->rcvbuf;
uint32_t totlen = ntohl(hdr->totlen);
uint16_t type = ntohs(hdr->type);
- uint16_t flags = ntohs(hdr->flags);
- uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
- clusterNode *sender;
- server.cluster->stats_bus_messages_received++;
- redisLog(REDIS_DEBUG,"--- Processing packet of type %d, %lu bytes",
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_received[type]++;
+ serverLog(LL_DEBUG,"--- Processing packet of type %d, %lu bytes",
type, (unsigned long) totlen);
/* Perform sanity checks */
if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
- if (ntohs(hdr->ver) != 0) return 1; /* Can't handle versions other than 0.*/
if (totlen > sdslen(link->rcvbuf)) return 1;
+
+ if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) {
+ /* Can't handle messages of different versions. */
+ return 1;
+ }
+
+ uint16_t flags = ntohs(hdr->flags);
+ uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
+ clusterNode *sender;
+
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
{
@@ -1483,7 +1644,8 @@ int clusterProcessPacket(clusterLink *link) {
} else if (type == CLUSTERMSG_TYPE_PUBLISH) {
uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
- explen += sizeof(clusterMsgDataPublish) +
+ explen += sizeof(clusterMsgDataPublish) -
+ 8 +
ntohl(hdr->data.publish.msg.channel_len) +
ntohl(hdr->data.publish.msg.message_len);
if (totlen != explen) return 1;
@@ -1527,32 +1689,38 @@ int clusterProcessPacket(clusterLink *link) {
server.cluster->mf_master_offset == 0)
{
server.cluster->mf_master_offset = sender->repl_offset;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Received replication offset for paused "
"master manual failover: %lld",
server.cluster->mf_master_offset);
}
}
- /* Process packets by type. */
+ /* Initial processing of PING and MEET requests replying with a PONG. */
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
- redisLog(REDIS_DEBUG,"Ping packet received: %p", (void*)link->node);
+ serverLog(LL_DEBUG,"Ping packet received: %p", (void*)link->node);
/* We use incoming MEET messages in order to set the address
* for 'myself', since only other cluster nodes will send us
- * MEET messagses on handshakes, when the cluster joins, or
+ * MEET messages on handshakes, when the cluster joins, or
* later if we changed address, and those nodes will use our
* official address to connect to us. So by obtaining this address
* from the socket is a simple way to discover / update our own
- * address in the cluster without it being hardcoded in the config. */
- if (type == CLUSTERMSG_TYPE_MEET) {
- char ip[REDIS_IP_STR_LEN];
+ * address in the cluster without it being hardcoded in the config.
+ *
+ * However if we don't have an address at all, we update the address
+ * even with a normal PING packet. If it's wrong it will be fixed
+ * by MEET later. */
+ if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
+ server.cluster_announce_ip == NULL)
+ {
+ char ip[NET_IP_STR_LEN];
if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 &&
strcmp(ip,myself->ip))
{
- memcpy(myself->ip,ip,REDIS_IP_STR_LEN);
- redisLog(REDIS_WARNING,"IP address for this node updated to %s",
+ memcpy(myself->ip,ip,NET_IP_STR_LEN);
+ serverLog(LL_WARNING,"IP address for this node updated to %s",
myself->ip);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
}
@@ -1565,25 +1733,29 @@ int clusterProcessPacket(clusterLink *link) {
if (!sender && type == CLUSTERMSG_TYPE_MEET) {
clusterNode *node;
- node = createClusterNode(NULL,REDIS_NODE_HANDSHAKE);
- nodeIp2String(node->ip,link);
+ node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
+ nodeIp2String(node->ip,link,hdr->myip);
node->port = ntohs(hdr->port);
+ node->cport = ntohs(hdr->cport);
clusterAddNode(node);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
}
- /* Get info from the gossip section */
- clusterProcessGossipSection(hdr,link);
+ /* If this is a MEET packet from an unknown node, we still process
+ * the gossip section here since we have to trust the sender because
+ * of the message type. */
+ if (!sender && type == CLUSTERMSG_TYPE_MEET)
+ clusterProcessGossipSection(hdr,link);
/* Anyway reply with a PONG */
clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
}
- /* PING or PONG: process config information. */
+ /* PING, PONG, MEET: process config information. */
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
{
- redisLog(REDIS_DEBUG,"%s packet received: %p",
+ serverLog(LL_DEBUG,"%s packet received: %p",
type == CLUSTERMSG_TYPE_PING ? "ping" : "pong",
(void*)link->node);
if (link->node) {
@@ -1591,38 +1763,42 @@ int clusterProcessPacket(clusterLink *link) {
/* If we already have this node, try to change the
* IP/port of the node with the new one. */
if (sender) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Handshake: we already know node %.40s, "
"updating the address if needed.", sender->name);
- if (nodeUpdateAddressIfNeeded(sender,link,ntohs(hdr->port)))
+ if (nodeUpdateAddressIfNeeded(sender,link,hdr))
{
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
}
- /* Free this node as we alrady have it. This will
+ /* Free this node as we already have it. This will
* cause the link to be freed as well. */
- freeClusterNode(link->node);
+ clusterDelNode(link->node);
return 0;
}
/* First thing to do is replacing the random name with the
* right node name if this was a handshake stage. */
clusterRenameNode(link->node, hdr->sender);
- redisLog(REDIS_DEBUG,"Handshake with node %.40s completed.",
+ serverLog(LL_DEBUG,"Handshake with node %.40s completed.",
link->node->name);
- link->node->flags &= ~REDIS_NODE_HANDSHAKE;
- link->node->flags |= flags&(REDIS_NODE_MASTER|REDIS_NODE_SLAVE);
+ link->node->flags &= ~CLUSTER_NODE_HANDSHAKE;
+ link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
} else if (memcmp(link->node->name,hdr->sender,
- REDIS_CLUSTER_NAMELEN) != 0)
+ CLUSTER_NAMELEN) != 0)
{
/* If the reply has a non matching node ID we
* disconnect this node and set it as not having an associated
* address. */
- redisLog(REDIS_DEBUG,"PONG contains mismatching sender ID");
- link->node->flags |= REDIS_NODE_NOADDR;
+ serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d",
+ link->node->name,
+ (int)(mstime()-(link->node->ctime)),
+ link->node->flags);
+ link->node->flags |= CLUSTER_NODE_NOADDR;
link->node->ip[0] = '\0';
link->node->port = 0;
+ link->node->cport = 0;
freeClusterLink(link);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
return 0;
@@ -1632,7 +1808,7 @@ int clusterProcessPacket(clusterLink *link) {
/* Update the node address if it changed. */
if (sender && type == CLUSTERMSG_TYPE_PING &&
!nodeInHandshake(sender) &&
- nodeUpdateAddressIfNeeded(sender,link,ntohs(hdr->port)))
+ nodeUpdateAddressIfNeeded(sender,link,hdr))
{
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
@@ -1650,7 +1826,7 @@ int clusterProcessPacket(clusterLink *link) {
* The FAIL condition is also reversible under specific
* conditions detected by clearNodeFailureIfNeeded(). */
if (nodeTimedOut(link->node)) {
- link->node->flags &= ~REDIS_NODE_PFAIL;
+ link->node->flags &= ~CLUSTER_NODE_PFAIL;
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
} else if (nodeFailed(link->node)) {
@@ -1660,7 +1836,7 @@ int clusterProcessPacket(clusterLink *link) {
/* Check for role switch: slave -> master or master -> slave. */
if (sender) {
- if (!memcmp(hdr->slaveof,REDIS_NODE_NULL_NAME,
+ if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME,
sizeof(hdr->slaveof)))
{
/* Node is a master. */
@@ -1672,11 +1848,9 @@ int clusterProcessPacket(clusterLink *link) {
if (nodeIsMaster(sender)) {
/* Master turned into a slave! Reconfigure the node. */
clusterDelNodeSlots(sender);
- sender->flags &= ~REDIS_NODE_MASTER;
- sender->flags |= REDIS_NODE_SLAVE;
-
- /* Remove the list of slaves from the node. */
- if (sender->numslaves) clusterNodeResetSlaves(sender);
+ sender->flags &= ~(CLUSTER_NODE_MASTER|
+ CLUSTER_NODE_MIGRATE_TO);
+ sender->flags |= CLUSTER_NODE_SLAVE;
/* Update config and state. */
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
@@ -1699,7 +1873,7 @@ int clusterProcessPacket(clusterLink *link) {
/* Update our info about served slots.
*
* Note: this MUST happen after we update the master/slave state
- * so that REDIS_NODE_MASTER flag will be set. */
+ * so that CLUSTER_NODE_MASTER flag will be set. */
/* Many checks are only needed if the set of served slots this
* instance claims is different compared to the set of slots we have
@@ -1743,14 +1917,14 @@ int clusterProcessPacket(clusterLink *link) {
if (sender && dirty_slots) {
int j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(hdr->myslots,j)) {
if (server.cluster->slots[j] == sender ||
server.cluster->slots[j] == NULL) continue;
if (server.cluster->slots[j]->configEpoch >
senderConfigEpoch)
{
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s has old slots configuration, sending "
"an UPDATE message about %.40s",
sender->name, server.cluster->slots[j]->name);
@@ -1776,27 +1950,27 @@ int clusterProcessPacket(clusterLink *link) {
}
/* Get info from the gossip section */
- clusterProcessGossipSection(hdr,link);
+ if (sender) clusterProcessGossipSection(hdr,link);
} else if (type == CLUSTERMSG_TYPE_FAIL) {
clusterNode *failing;
if (sender) {
failing = clusterLookupNode(hdr->data.fail.about.nodename);
if (failing &&
- !(failing->flags & (REDIS_NODE_FAIL|REDIS_NODE_MYSELF)))
+ !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF)))
{
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"FAIL message received from %.40s about %.40s",
hdr->sender, hdr->data.fail.about.nodename);
- failing->flags |= REDIS_NODE_FAIL;
+ failing->flags |= CLUSTER_NODE_FAIL;
failing->fail_time = mstime();
- failing->flags &= ~REDIS_NODE_PFAIL;
+ failing->flags &= ~CLUSTER_NODE_PFAIL;
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
}
} else {
- redisLog(REDIS_NOTICE,
- "Ignoring FAIL message from unknonw node %.40s about %.40s",
+ serverLog(LL_NOTICE,
+ "Ignoring FAIL message from unknown node %.40s about %.40s",
hdr->sender, hdr->data.fail.about.nodename);
}
} else if (type == CLUSTERMSG_TYPE_PUBLISH) {
@@ -1842,10 +2016,10 @@ int clusterProcessPacket(clusterLink *link) {
/* Manual failover requested from slaves. Initialize the state
* accordingly. */
resetManualFailover();
- server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
+ server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
server.cluster->mf_slave = sender;
- pauseClients(mstime()+(REDIS_CLUSTER_MF_TIMEOUT*2));
- redisLog(REDIS_WARNING,"Manual failover requested by slave %.40s.",
+ pauseClients(mstime()+(CLUSTER_MF_TIMEOUT*2));
+ serverLog(LL_WARNING,"Manual failover requested by slave %.40s.",
sender->name);
} else if (type == CLUSTERMSG_TYPE_UPDATE) {
clusterNode *n; /* The node the update is about. */
@@ -1865,12 +2039,12 @@ int clusterProcessPacket(clusterLink *link) {
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_FSYNC_CONFIG);
- /* Check the bitmap of served slots and udpate our
+ /* Check the bitmap of served slots and update our
* config accordingly. */
clusterUpdateSlotsConfigWith(n,reportedConfigEpoch,
hdr->data.update.nodecfg.slots);
} else {
- redisLog(REDIS_WARNING,"Received unknown packet type: %d", type);
+ serverLog(LL_WARNING,"Received unknown packet type: %d", type);
}
return 1;
}
@@ -1891,12 +2065,12 @@ void handleLinkIOError(clusterLink *link) {
void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
clusterLink *link = (clusterLink*) privdata;
ssize_t nwritten;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(mask);
nwritten = write(fd, link->sndbuf, sdslen(link->sndbuf));
if (nwritten <= 0) {
- redisLog(REDIS_DEBUG,"I/O error writing to node link: %s",
+ serverLog(LL_DEBUG,"I/O error writing to node link: %s",
strerror(errno));
handleLinkIOError(link);
return;
@@ -1914,9 +2088,9 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
ssize_t nread;
clusterMsg *hdr;
clusterLink *link = (clusterLink*) privdata;
- int readlen, rcvbuflen;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ unsigned int readlen, rcvbuflen;
+ UNUSED(el);
+ UNUSED(mask);
while(1) { /* Read as long as there is data to read. */
rcvbuflen = sdslen(link->rcvbuf);
@@ -1933,7 +2107,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (memcmp(hdr->sig,"RCmb",4) != 0 ||
ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bad message length or signature received "
"from Cluster bus.");
handleLinkIOError(link);
@@ -1949,7 +2123,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (nread <= 0) {
/* I/O error... */
- redisLog(REDIS_DEBUG,"I/O error reading from node link: %s",
+ serverLog(LL_DEBUG,"I/O error reading from node link: %s",
(nread == 0) ? "connection closed" : strerror(errno));
handleLinkIOError(link);
return;
@@ -1983,7 +2157,12 @@ void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) {
clusterWriteHandler,link);
link->sndbuf = sdscatlen(link->sndbuf, msg, msglen);
- server.cluster->stats_bus_messages_sent++;
+
+ /* Populate sent messages stats. */
+ clusterMsg *hdr = (clusterMsg*) msg;
+ uint16_t type = ntohs(hdr->type);
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_sent[type]++;
}
/* Send a message to all the nodes that are part of the cluster having
@@ -2001,14 +2180,15 @@ void clusterBroadcastMessage(void *buf, size_t len) {
clusterNode *node = dictGetVal(de);
if (!node->link) continue;
- if (node->flags & (REDIS_NODE_MYSELF|REDIS_NODE_HANDSHAKE))
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
continue;
clusterSendMessage(node->link,buf,len);
}
dictReleaseIterator(di);
}
-/* Build the message header */
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
int totlen = 0;
uint64_t offset;
@@ -2022,18 +2202,36 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
myself->slaveof : myself;
memset(hdr,0,sizeof(*hdr));
+ hdr->ver = htons(CLUSTER_PROTO_VER);
hdr->sig[0] = 'R';
hdr->sig[1] = 'C';
hdr->sig[2] = 'm';
hdr->sig[3] = 'b';
hdr->type = htons(type);
- memcpy(hdr->sender,myself->name,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN);
+
+ /* If cluster-announce-ip option is enabled, force the receivers of our
+ * packets to use the specified address for this node. Otherwise if the
+ * first byte is zero, they'll do auto discovery. */
+ memset(hdr->myip,0,NET_IP_STR_LEN);
+ if (server.cluster_announce_ip) {
+ strncpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ hdr->myip[NET_IP_STR_LEN-1] = '\0';
+ }
+
+ /* Handle cluster-announce-port as well. */
+ int announced_port = server.cluster_announce_port ?
+ server.cluster_announce_port : server.port;
+ int announced_cport = server.cluster_announce_bus_port ?
+ server.cluster_announce_bus_port :
+ (server.port + CLUSTER_PORT_INCR);
memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
- memset(hdr->slaveof,0,REDIS_CLUSTER_NAMELEN);
+ memset(hdr->slaveof,0,CLUSTER_NAMELEN);
if (myself->slaveof != NULL)
- memcpy(hdr->slaveof,myself->slaveof->name, REDIS_CLUSTER_NAMELEN);
- hdr->port = htons(server.port);
+ memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN);
+ hdr->port = htons(announced_port);
+ hdr->cport = htons(announced_cport);
hdr->flags = htons(myself->flags);
hdr->state = server.cluster->state;
@@ -2065,68 +2263,161 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
/* For PING, PONG, and MEET, fixing the totlen field is up to the caller. */
}
+/* Return non zero if the node is already present in the gossip section of the
+ * message pointed by 'hdr' and having 'count' gossip entries. Otherwise
+ * zero is returned. Helper for clusterSendPing(). */
+int clusterNodeIsInGossipSection(clusterMsg *hdr, int count, clusterNode *n) {
+ int j;
+ for (j = 0; j < count; j++) {
+ if (memcmp(hdr->data.ping.gossip[j].nodename,n->name,
+ CLUSTER_NAMELEN) == 0) break;
+ }
+ return j != count;
+}
+
+/* Set the i-th entry of the gossip section in the message pointed by 'hdr'
+ * to the info of the specified node 'n'. */
+void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) {
+ clusterMsgDataGossip *gossip;
+ gossip = &(hdr->data.ping.gossip[i]);
+ memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN);
+ gossip->ping_sent = htonl(n->ping_sent/1000);
+ gossip->pong_received = htonl(n->pong_received/1000);
+ memcpy(gossip->ip,n->ip,sizeof(n->ip));
+ gossip->port = htons(n->port);
+ gossip->cport = htons(n->cport);
+ gossip->flags = htons(n->flags);
+ gossip->notused1 = 0;
+}
+
/* Send a PING or PONG packet to the specified node, making sure to add enough
* gossip informations. */
void clusterSendPing(clusterLink *link, int type) {
- unsigned char buf[sizeof(clusterMsg)];
- clusterMsg *hdr = (clusterMsg*) buf;
- int gossipcount = 0, totlen;
- /* freshnodes is the number of nodes we can still use to populate the
- * gossip section of the ping packet. Basically we start with the nodes
- * we have in memory minus two (ourself and the node we are sending the
- * message to). Every time we add a node we decrement the counter, so when
- * it will drop to <= zero we know there is no more gossip info we can
- * send. */
+ unsigned char *buf;
+ clusterMsg *hdr;
+ int gossipcount = 0; /* Number of gossip sections added so far. */
+ int wanted; /* Number of gossip sections we want to append if possible. */
+ int totlen; /* Total packet length. */
+ /* freshnodes is the max number of nodes we can hope to append at all:
+ * nodes available minus two (ourself and the node we are sending the
+ * message to). However practically there may be less valid nodes since
+ * nodes in handshake state, disconnected, are not considered. */
int freshnodes = dictSize(server.cluster->nodes)-2;
+ /* How many gossip sections we want to add? 1/10 of the number of nodes
+ * and anyway at least 3. Why 1/10?
+ *
+ * If we have N masters, with N/10 entries, and we consider that in
+ * node_timeout we exchange with each other node at least 4 packets
+ * (we ping in the worst case in node_timeout/2 time, and we also
+ * receive two pings from the host), we have a total of 8 packets
+ * in the node_timeout*2 falure reports validity time. So we have
+ * that, for a single PFAIL node, we can expect to receive the following
+ * number of failure reports (in the specified window of time):
+ *
+ * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+ *
+ * PROB = probability of being featured in a single gossip entry,
+ * which is 1 / NUM_OF_NODES.
+ * ENTRIES = 10.
+ * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+ *
+ * If we assume we have just masters (so num of nodes and num of masters
+ * is the same), with 1/10 we always get over the majority, and specifically
+ * 80% of the number of nodes, to account for many masters failing at the
+ * same time.
+ *
+ * Since we have non-voting slaves that lower the probability of an entry
+ * to feature our node, we set the number of entires per packet as
+ * 10% of the total nodes we have. */
+ wanted = floor(dictSize(server.cluster->nodes)/10);
+ if (wanted < 3) wanted = 3;
+ if (wanted > freshnodes) wanted = freshnodes;
+
+ /* Include all the nodes in PFAIL state, so that failure reports are
+ * faster to propagate to go from PFAIL to FAIL state. */
+ int pfail_wanted = server.cluster->stats_pfail_nodes;
+
+ /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen
+ * later according to the number of gossip sections we really were able
+ * to put inside the packet. */
+ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += (sizeof(clusterMsgDataGossip)*(wanted+pfail_wanted));
+ /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+ * sizeof(clusterMsg) or more. */
+ if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg);
+ buf = zcalloc(totlen);
+ hdr = (clusterMsg*) buf;
+
+ /* Populate the header. */
if (link->node && type == CLUSTERMSG_TYPE_PING)
link->node->ping_sent = mstime();
clusterBuildMessageHdr(hdr,type);
/* Populate the gossip fields */
- while(freshnodes > 0 && gossipcount < 3) {
+ int maxiterations = wanted*3;
+ while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
dictEntry *de = dictGetRandomKey(server.cluster->nodes);
clusterNode *this = dictGetVal(de);
- clusterMsgDataGossip *gossip;
- int j;
+
+ /* Don't include this node: the whole packet header is about us
+ * already, so we just gossip about other nodes. */
+ if (this == myself) continue;
+
+ /* PFAIL nodes will be added later. */
+ if (this->flags & CLUSTER_NODE_PFAIL) continue;
/* In the gossip section don't include:
- * 1) Myself.
- * 2) Nodes in HANDSHAKE state.
+ * 1) Nodes in HANDSHAKE state.
* 3) Nodes with the NOADDR flag set.
* 4) Disconnected nodes if they don't have configured slots.
*/
- if (this == myself ||
- this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
+ if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) ||
(this->link == NULL && this->numslots == 0))
{
- freshnodes--; /* otherwise we may loop forever. */
- continue;
+ freshnodes--; /* Tecnically not correct, but saves CPU. */
+ continue;
}
- /* Check if we already added this node */
- for (j = 0; j < gossipcount; j++) {
- if (memcmp(hdr->data.ping.gossip[j].nodename,this->name,
- REDIS_CLUSTER_NAMELEN) == 0) break;
- }
- if (j != gossipcount) continue;
+ /* Do not add a node we already have. */
+ if (clusterNodeIsInGossipSection(hdr,gossipcount,this)) continue;
/* Add it */
+ clusterSetGossipEntry(hdr,gossipcount,this);
freshnodes--;
- gossip = &(hdr->data.ping.gossip[gossipcount]);
- memcpy(gossip->nodename,this->name,REDIS_CLUSTER_NAMELEN);
- gossip->ping_sent = htonl(this->ping_sent);
- gossip->pong_received = htonl(this->pong_received);
- memcpy(gossip->ip,this->ip,sizeof(this->ip));
- gossip->port = htons(this->port);
- gossip->flags = htons(this->flags);
gossipcount++;
}
+
+ /* If there are PFAIL nodes, add them at the end. */
+ if (pfail_wanted) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL && pfail_wanted > 0) {
+ clusterNode *node = dictGetVal(de);
+ if (node->flags & CLUSTER_NODE_HANDSHAKE) continue;
+ if (node->flags & CLUSTER_NODE_NOADDR) continue;
+ if (!(node->flags & CLUSTER_NODE_PFAIL)) continue;
+ clusterSetGossipEntry(hdr,gossipcount,node);
+ freshnodes--;
+ gossipcount++;
+ /* We take the count of the slots we allocated, since the
+ * PFAIL stats may not match perfectly with the current number
+ * of PFAIL nodes. */
+ pfail_wanted--;
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Ready to send... fix the totlen fiend and queue the message in the
+ * output buffer. */
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
hdr->count = htons(gossipcount);
hdr->totlen = htonl(totlen);
clusterSendMessage(link,buf,totlen);
+ zfree(buf);
}
/* Send a PONG packet to every connected node that's not in handshake state
@@ -2182,7 +2473,7 @@ void clusterSendPublish(clusterLink *link, robj *channel, robj *message) {
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_PUBLISH);
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
- totlen += sizeof(clusterMsgDataPublish) + channel_len + message_len;
+ totlen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
hdr->data.publish.msg.channel_len = htonl(channel_len);
hdr->data.publish.msg.message_len = htonl(message_len);
@@ -2212,15 +2503,15 @@ void clusterSendPublish(clusterLink *link, robj *channel, robj *message) {
/* Send a FAIL message to all the nodes we are able to contact.
* The FAIL message is sent when we detect that a node is failing
- * (REDIS_NODE_PFAIL) and we also receive a gossip confirmation of this:
- * we switch the node state to REDIS_NODE_FAIL and ask all the other
+ * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this:
+ * we switch the node state to CLUSTER_NODE_FAIL and ask all the other
* nodes to do the same ASAP. */
void clusterSendFail(char *nodename) {
unsigned char buf[sizeof(clusterMsg)];
clusterMsg *hdr = (clusterMsg*) buf;
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_FAIL);
- memcpy(hdr->data.fail.about.nodename,nodename,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN);
clusterBroadcastMessage(buf,ntohl(hdr->totlen));
}
@@ -2233,7 +2524,7 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) {
if (link == NULL) return;
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_UPDATE);
- memcpy(hdr->data.update.nodecfg.nodename,node->name,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN);
hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch);
memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots));
clusterSendMessage(link,buf,ntohl(hdr->totlen));
@@ -2321,7 +2612,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
* our currentEpoch was updated as a side effect of receiving this
* request, if the request epoch was greater. */
if (requestCurrentEpoch < server.cluster->currentEpoch) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: reqEpoch (%llu) < curEpoch(%llu)",
node->name,
(unsigned long long) requestCurrentEpoch,
@@ -2331,7 +2622,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* I already voted for this epoch? Return ASAP. */
if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: already voted for epoch %llu",
node->name,
(unsigned long long) server.cluster->currentEpoch);
@@ -2345,15 +2636,15 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
(!nodeFailed(master) && !force_ack))
{
if (nodeIsMaster(node)) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: it is a master node",
node->name);
} else if (master == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: I don't know its master",
node->name);
} else if (!nodeFailed(master)) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: its master is up",
node->name);
}
@@ -2365,7 +2656,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
* of the algorithm but makes the base case more linear. */
if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: "
"can't vote about this master before %lld milliseconds",
node->name,
@@ -2377,7 +2668,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* The slave requesting the vote must have a configEpoch for the claimed
* slots that is >= the one of the masters currently serving the same
* slots in the current configuration. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(claimed_slots, j) == 0) continue;
if (server.cluster->slots[j] == NULL ||
server.cluster->slots[j]->configEpoch <= requestConfigEpoch)
@@ -2387,7 +2678,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* If we reached this point we found a slot that in our current slots
* is served by a master with a greater configEpoch than the one claimed
* by the slave requesting our vote. Refuse to vote for this slave. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: "
"slot %d epoch (%llu) > reqEpoch (%llu)",
node->name, j,
@@ -2400,7 +2691,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
clusterSendFailoverAuth(node);
server.cluster->lastVoteEpoch = server.cluster->currentEpoch;
node->slaveof->voted_time = mstime();
- redisLog(REDIS_WARNING, "Failover auth granted to %.40s for epoch %llu",
+ serverLog(LL_WARNING, "Failover auth granted to %.40s for epoch %llu",
node->name, (unsigned long long) server.cluster->currentEpoch);
}
@@ -2421,7 +2712,7 @@ int clusterGetSlaveRank(void) {
int j, rank = 0;
clusterNode *master;
- redisAssert(nodeIsSlave(myself));
+ serverAssert(nodeIsSlave(myself));
master = myself->slaveof;
if (master == NULL) return 0; /* Never called by slaves without master. */
@@ -2432,6 +2723,106 @@ int clusterGetSlaveRank(void) {
return rank;
}
+/* This function is called by clusterHandleSlaveFailover() in order to
+ * let the slave log why it is not able to failover. Sometimes there are
+ * not the conditions, but since the failover function is called again and
+ * again, we can't log the same things continuously.
+ *
+ * This function works by logging only if a given set of conditions are
+ * true:
+ *
+ * 1) The reason for which the failover can't be initiated changed.
+ * The reasons also include a NONE reason we reset the state to
+ * when the slave finds that its master is fine (no FAIL flag).
+ * 2) Also, the log is emitted again if the master is still down and
+ * the reason for not failing over is still the same, but more than
+ * CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
+ * 3) Finally, the function only logs if the slave is down for more than
+ * five seconds + NODE_TIMEOUT. This way nothing is logged when a
+ * failover starts in a reasonable time.
+ *
+ * The function is called with the reason why the slave can't failover
+ * which is one of the integer macros CLUSTER_CANT_FAILOVER_*.
+ *
+ * The function is guaranteed to be called only if 'myself' is a slave. */
+void clusterLogCantFailover(int reason) {
+ char *msg;
+ static time_t lastlog_time = 0;
+ mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
+
+ /* Don't log if we have the same reason for some time. */
+ if (reason == server.cluster->cant_failover_reason &&
+ time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
+ return;
+
+ server.cluster->cant_failover_reason = reason;
+
+ /* We also don't emit any log if the master failed no long ago, the
+ * goal of this function is to log slaves in a stalled condition for
+ * a long time. */
+ if (myself->slaveof &&
+ nodeFailed(myself->slaveof) &&
+ (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
+
+ switch(reason) {
+ case CLUSTER_CANT_FAILOVER_DATA_AGE:
+ msg = "Disconnected from master for longer than allowed. "
+ "Please check the 'cluster-slave-validity-factor' configuration "
+ "option.";
+ break;
+ case CLUSTER_CANT_FAILOVER_WAITING_DELAY:
+ msg = "Waiting the delay before I can start a new failover.";
+ break;
+ case CLUSTER_CANT_FAILOVER_EXPIRED:
+ msg = "Failover attempt expired.";
+ break;
+ case CLUSTER_CANT_FAILOVER_WAITING_VOTES:
+ msg = "Waiting for votes, but majority still not reached.";
+ break;
+ default:
+ msg = "Unknown reason code.";
+ break;
+ }
+ lastlog_time = time(NULL);
+ serverLog(LL_WARNING,"Currently unable to failover: %s", msg);
+}
+
+/* This function implements the final part of automatic and manual failovers,
+ * where the slave grabs its master's hash slots, and propagates the new
+ * configuration.
+ *
+ * Note that it's up to the caller to be sure that the node got a new
+ * configuration epoch already. */
+void clusterFailoverReplaceYourMaster(void) {
+ int j;
+ clusterNode *oldmaster = myself->slaveof;
+
+ if (nodeIsMaster(myself) || oldmaster == NULL) return;
+
+ /* 1) Turn this node into a master. */
+ clusterSetNodeAsMaster(myself);
+ replicationUnsetMaster();
+
+ /* 2) Claim all the slots assigned to our master. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(oldmaster,j)) {
+ clusterDelSlot(j);
+ clusterAddSlot(myself,j);
+ }
+ }
+
+ /* 3) Update state and save config. */
+ clusterUpdateState();
+ clusterSaveConfigOrDie(1);
+
+ /* 4) Pong all the other nodes so that they can update the state
+ * accordingly and detect that we switched to master role. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+
+ /* 5) If there was a manual failover in progress, clear the state. */
+ resetManualFailover();
+}
+
/* This function is called if we are a slave node and our master serving
* a non-zero amount of hash slots is in FAIL state.
*
@@ -2446,16 +2837,15 @@ void clusterHandleSlaveFailover(void) {
int needed_quorum = (server.cluster->size / 2) + 1;
int manual_failover = server.cluster->mf_end != 0 &&
server.cluster->mf_can_start;
- int j;
mstime_t auth_timeout, auth_retry_time;
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
/* Compute the failover timeout (the max time we have to send votes
* and wait for replies), and the failover retry time (the time to wait
- * before waiting again.
+ * before trying to get voted again).
*
- * Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds.
+ * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds.
* Retry is two times the Timeout.
*/
auth_timeout = server.cluster_node_timeout*2;
@@ -2470,11 +2860,17 @@ void clusterHandleSlaveFailover(void) {
if (nodeIsMaster(myself) ||
myself->slaveof == NULL ||
(!nodeFailed(myself->slaveof) && !manual_failover) ||
- myself->slaveof->numslots == 0) return;
+ myself->slaveof->numslots == 0)
+ {
+ /* There are no reasons to failover, so we set the reason why we
+ * are returning without failing over to NONE. */
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
+ return;
+ }
/* Set data_age to the number of seconds we are disconnected from
* the master. */
- if (server.repl_state == REDIS_REPL_CONNECTED) {
+ if (server.repl_state == REPL_STATE_CONNECTED) {
data_age = (mstime_t)(server.unixtime - server.master->lastinteraction)
* 1000;
} else {
@@ -2496,7 +2892,10 @@ void clusterHandleSlaveFailover(void) {
(((mstime_t)server.repl_ping_slave_period * 1000) +
(server.cluster_node_timeout * server.cluster_slave_validity_factor)))
{
- if (!manual_failover) return;
+ if (!manual_failover) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE);
+ return;
+ }
}
/* If the previous failover attempt timedout and the retry time has
@@ -2518,7 +2917,7 @@ void clusterHandleSlaveFailover(void) {
server.cluster->failover_auth_time = mstime();
server.cluster->failover_auth_rank = 0;
}
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Start of election delayed for %lld milliseconds "
"(rank #%d, offset %lld).",
server.cluster->failover_auth_time - mstime(),
@@ -2545,23 +2944,29 @@ void clusterHandleSlaveFailover(void) {
(newrank - server.cluster->failover_auth_rank) * 1000;
server.cluster->failover_auth_time += added_delay;
server.cluster->failover_auth_rank = newrank;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Slave rank updated to #%d, added %lld milliseconds of delay.",
newrank, added_delay);
}
}
/* Return ASAP if we can't still start the election. */
- if (mstime() < server.cluster->failover_auth_time) return;
+ if (mstime() < server.cluster->failover_auth_time) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
+ return;
+ }
/* Return ASAP if the election is too old to be valid. */
- if (auth_age > auth_timeout) return;
+ if (auth_age > auth_timeout) {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED);
+ return;
+ }
/* Ask for votes if needed. */
if (server.cluster->failover_auth_sent == 0) {
server.cluster->currentEpoch++;
server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
- redisLog(REDIS_WARNING,"Starting a failover election for epoch %llu.",
+ serverLog(LL_WARNING,"Starting a failover election for epoch %llu.",
(unsigned long long) server.cluster->currentEpoch);
clusterRequestFailoverAuth();
server.cluster->failover_auth_sent = 1;
@@ -2573,43 +2978,23 @@ void clusterHandleSlaveFailover(void) {
/* Check if we reached the quorum. */
if (server.cluster->failover_auth_count >= needed_quorum) {
- clusterNode *oldmaster = myself->slaveof;
+ /* We have the quorum, we can finally failover the master. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover election won: I'm the new master.");
- /* We have the quorum, perform all the steps to correctly promote
- * this slave to a master.
- *
- * 1) Turn this node into a master. */
- clusterSetNodeAsMaster(myself);
- replicationUnsetMaster();
-
- /* 2) Claim all the slots assigned to our master. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (clusterNodeGetSlotBit(oldmaster,j)) {
- clusterDelSlot(j);
- clusterAddSlot(myself,j);
- }
- }
- /* 3) Update my configEpoch to the epoch of the election. */
+ /* Update my configEpoch to the epoch of the election. */
if (myself->configEpoch < server.cluster->failover_auth_epoch) {
myself->configEpoch = server.cluster->failover_auth_epoch;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"configEpoch set to %llu after successful failover",
(unsigned long long) myself->configEpoch);
}
- /* 4) Update state and save config. */
- clusterUpdateState();
- clusterSaveConfigOrDie(1);
-
- /* 5) Pong all the other nodes so that they can update the state
- * accordingly and detect that we switched to master role. */
- clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
-
- /* 6) If there was a manual failover in progress, clear the state. */
- resetManualFailover();
+ /* Take responsability for the cluster slots. */
+ clusterFailoverReplaceYourMaster();
+ } else {
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
}
}
@@ -2619,7 +3004,7 @@ void clusterHandleSlaveFailover(void) {
* Slave migration is the process that allows a slave of a master that is
* already covered by at least another slave, to "migrate" to a master that
* is orpaned, that is, left with no working slaves.
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------- */
/* This function is responsible to decide if this replica should be migrated
* to a different (orphaned) master. It is called by the clusterCron() function
@@ -2647,7 +3032,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
dictEntry *de;
/* Step 1: Don't migrate if the cluster state is not ok. */
- if (server.cluster->state != REDIS_CLUSTER_OK) return;
+ if (server.cluster->state != CLUSTER_OK) return;
/* Step 2: Don't migrate if my master will not be left with at least
* 'migration-barrier' slaves after my migration. */
@@ -2659,46 +3044,66 @@ void clusterHandleSlaveMigration(int max_slaves) {
/* Step 3: Idenitfy a candidate for migration, and check if among the
* masters with the greatest number of ok slaves, I'm the one with the
- * smaller node ID.
+ * smallest node ID (the "candidate slave").
*
- * Note that this means that eventually a replica migration will occurr
+ * Note: this means that eventually a replica migration will occurr
* since slaves that are reachable again always have their FAIL flag
- * cleared. At the same time this does not mean that there are no
- * race conditions possible (two slaves migrating at the same time), but
- * this is extremely unlikely to happen, and harmless. */
+ * cleared, so eventually there must be a candidate. At the same time
+ * this does not mean that there are no race conditions possible (two
+ * slaves migrating at the same time), but this is unlikely to
+ * happen, and harmless when happens. */
candidate = myself;
di = dictGetSafeIterator(server.cluster->nodes);
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
- int okslaves;
+ int okslaves = 0, is_orphaned = 1;
- /* Only iterate over working masters. */
- if (nodeIsSlave(node) || nodeFailed(node)) continue;
- /* If this master never had slaves so far, don't migrate. We want
- * to migrate to a master that remained orphaned, not masters that
- * were never configured to have slaves. */
- if (node->numslaves == 0) continue;
- okslaves = clusterCountNonFailingSlaves(node);
+ /* We want to migrate only if this master is working, orphaned, and
+ * used to have slaves or if failed over a master that had slaves
+ * (MIGRATE_TO flag). This way we only migrate to instances that were
+ * supposed to have replicas. */
+ if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0;
+ if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0;
- if (okslaves == 0 && target == NULL && node->numslots > 0)
- target = node;
+ /* Check number of working slaves. */
+ if (nodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node);
+ if (okslaves > 0) is_orphaned = 0;
+ if (is_orphaned) {
+ if (!target && node->numslots > 0) target = node;
+
+ /* Track the starting time of the orphaned condition for this
+ * master. */
+ if (!node->orphaned_time) node->orphaned_time = mstime();
+ } else {
+ node->orphaned_time = 0;
+ }
+
+ /* Check if I'm the slave candidate for the migration: attached
+ * to a master with the maximum number of slaves and with the smallest
+ * node ID. */
if (okslaves == max_slaves) {
for (j = 0; j < node->numslaves; j++) {
if (memcmp(node->slaves[j]->name,
candidate->name,
- REDIS_CLUSTER_NAMELEN) < 0)
+ CLUSTER_NAMELEN) < 0)
{
candidate = node->slaves[j];
}
}
}
}
+ dictReleaseIterator(di);
/* Step 4: perform the migration if there is a target, and if I'm the
- * candidate. */
- if (target && candidate == myself) {
- redisLog(REDIS_WARNING,"Migrating to orphaned master %.40s",
+ * candidate, but only if the master is continuously orphaned for a
+ * couple of seconds, so that during failovers, we give some time to
+ * the natural slaves of this instance to advertise their switch from
+ * the old master to the new one. */
+ if (target && candidate == myself &&
+ (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY)
+ {
+ serverLog(LL_WARNING,"Migrating to orphaned master %.40s",
target->name);
clusterSetMaster(target);
}
@@ -2712,7 +3117,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
* setting mf_end to the millisecond unix time at which we'll abort the
* attempt.
* 2) Slave sends a MFSTART message to the master requesting to pause clients
- * for two times the manual failover timeout REDIS_CLUSTER_MF_TIMEOUT.
+ * for two times the manual failover timeout CLUSTER_MF_TIMEOUT.
* When master is paused for manual failover, it also starts to flag
* packets with CLUSTERMSG_FLAG0_PAUSED.
* 3) Slave waits for master to send its replication offset flagged as PAUSED.
@@ -2752,7 +3157,7 @@ void resetManualFailover(void) {
/* If a manual failover timed out, abort it. */
void manualFailoverCheckTimeout(void) {
if (server.cluster->mf_end && server.cluster->mf_end < mstime()) {
- redisLog(REDIS_WARNING,"Manual failover timed out.");
+ serverLog(LL_WARNING,"Manual failover timed out.");
resetManualFailover();
}
}
@@ -2763,7 +3168,7 @@ void clusterHandleManualFailover(void) {
/* Return ASAP if no manual failover is in progress. */
if (server.cluster->mf_end == 0) return;
- /* If mf_can_start is non-zero, the failover was alrady triggered so the
+ /* If mf_can_start is non-zero, the failover was already triggered so the
* next steps are performed by clusterHandleSlaveFailover(). */
if (server.cluster->mf_can_start) return;
@@ -2773,7 +3178,7 @@ void clusterHandleManualFailover(void) {
/* Our replication offset matches the master replication offset
* announced after clients were paused. We can start the failover. */
server.cluster->mf_can_start = 1;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"All master replication stream processed, "
"manual failover can start.");
}
@@ -2798,6 +3203,31 @@ void clusterCron(void) {
iteration++; /* Number of times this function was called so far. */
+ /* We want to take myself->ip in sync with the cluster-announce-ip option.
+ * The option can be set at runtime via CONFIG SET, so we periodically check
+ * if the option changed to reflect this into myself->ip. */
+ {
+ static char *prev_ip = NULL;
+ char *curr_ip = server.cluster_announce_ip;
+ int changed = 0;
+
+ if (prev_ip == NULL && curr_ip != NULL) changed = 1;
+ if (prev_ip != NULL && curr_ip == NULL) changed = 1;
+ if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
+
+ if (changed) {
+ prev_ip = curr_ip;
+ if (prev_ip) prev_ip = zstrdup(prev_ip);
+
+ if (curr_ip) {
+ strncpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ myself->ip[NET_IP_STR_LEN-1] = '\0';
+ } else {
+ myself->ip[0] = '\0'; /* Force autodetection. */
+ }
+ }
+ }
+
/* The handshake timeout is the time after which a handshake node that was
* not turned into a normal node is removed from the nodes. Usually it is
* just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
@@ -2805,17 +3235,25 @@ void clusterCron(void) {
handshake_timeout = server.cluster_node_timeout;
if (handshake_timeout < 1000) handshake_timeout = 1000;
- /* Check if we have disconnected nodes and re-establish the connection. */
+ /* Check if we have disconnected nodes and re-establish the connection.
+ * Also update a few stats while we are here, that can be used to make
+ * better decisions in other part of the code. */
di = dictGetSafeIterator(server.cluster->nodes);
+ server.cluster->stats_pfail_nodes = 0;
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
- if (node->flags & (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR)) continue;
+ /* Not interested in reconnecting the link with myself or nodes
+ * for which we have no address. */
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) continue;
+
+ if (node->flags & CLUSTER_NODE_PFAIL)
+ server.cluster->stats_pfail_nodes++;
/* A Node in HANDSHAKE state has a limited lifespan equal to the
* configured node timeout. */
if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
- freeClusterNode(node);
+ clusterDelNode(node);
continue;
}
@@ -2825,12 +3263,17 @@ void clusterCron(void) {
clusterLink *link;
fd = anetTcpNonBlockBindConnect(server.neterr, node->ip,
- node->port+REDIS_CLUSTER_PORT_INCR, REDIS_BIND_ADDR);
+ node->cport, NET_FIRST_BIND_ADDR);
if (fd == -1) {
- redisLog(REDIS_DEBUG, "Unable to connect to "
+ /* We got a synchronous error from connect before
+ * clusterSendPing() had a chance to be called.
+ * If node->ping_sent is zero, failure detection can't work,
+ * so we claim we actually sent a ping now (that will
+ * be really sent as soon as the link is obtained). */
+ if (node->ping_sent == 0) node->ping_sent = mstime();
+ serverLog(LL_DEBUG, "Unable to connect to "
"Cluster Node [%s]:%d -> %s", node->ip,
- node->port+REDIS_CLUSTER_PORT_INCR,
- server.neterr);
+ node->cport, server.neterr);
continue;
}
link = createClusterLink(node);
@@ -2845,7 +3288,7 @@ void clusterCron(void) {
* of a PING one, to force the receiver to add us in its node
* table. */
old_ping_sent = node->ping_sent;
- clusterSendPing(link, node->flags & REDIS_NODE_MEET ?
+ clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
if (old_ping_sent) {
/* If there was an active ping before the link was
@@ -2858,10 +3301,10 @@ void clusterCron(void) {
* to this node. Instead after the PONG is received and we
* are no longer in meet/handshake status, we want to send
* normal PING packets. */
- node->flags &= ~REDIS_NODE_MEET;
+ node->flags &= ~CLUSTER_NODE_MEET;
- redisLog(REDIS_DEBUG,"Connecting with Node %.40s at %s:%d",
- node->name, node->ip, node->port+REDIS_CLUSTER_PORT_INCR);
+ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
+ node->name, node->ip, node->cport);
}
}
dictReleaseIterator(di);
@@ -2879,7 +3322,7 @@ void clusterCron(void) {
/* Don't ping nodes disconnected or with a ping currently active. */
if (this->link == NULL || this->ping_sent != 0) continue;
- if (this->flags & (REDIS_NODE_MYSELF|REDIS_NODE_HANDSHAKE))
+ if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
continue;
if (min_pong_node == NULL || min_pong > this->pong_received) {
min_pong_node = this;
@@ -2887,7 +3330,7 @@ void clusterCron(void) {
}
}
if (min_pong_node) {
- redisLog(REDIS_DEBUG,"Pinging node %.40s", min_pong_node->name);
+ serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
}
}
@@ -2908,7 +3351,7 @@ void clusterCron(void) {
mstime_t delay;
if (node->flags &
- (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE))
+ (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
continue;
/* Orphaned master check, useful only if the current instance
@@ -2918,9 +3361,12 @@ void clusterCron(void) {
/* A master is orphaned if it is serving a non-zero number of
* slots, have no working slaves, but used to have at least one
- * slave. */
- if (okslaves == 0 && node->numslots > 0 && node->numslaves)
+ * slave, or failed over a master that used to have slaves. */
+ if (okslaves == 0 && node->numslots > 0 &&
+ node->flags & CLUSTER_NODE_MIGRATE_TO)
+ {
orphaned_masters++;
+ }
if (okslaves > max_slaves) max_slaves = okslaves;
if (nodeIsSlave(myself) && myself->slaveof == node)
this_slaves = okslaves;
@@ -2975,10 +3421,10 @@ void clusterCron(void) {
if (delay > server.cluster_node_timeout) {
/* Timeout reached. Set the node as possibly failing if it is
* not already in this state. */
- if (!(node->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) {
- redisLog(REDIS_DEBUG,"*** NODE %.40s possibly failing",
+ if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
+ serverLog(LL_DEBUG,"*** NODE %.40s possibly failing",
node->name);
- node->flags |= REDIS_NODE_PFAIL;
+ node->flags |= CLUSTER_NODE_PFAIL;
update_state = 1;
}
}
@@ -3011,7 +3457,7 @@ void clusterCron(void) {
clusterHandleSlaveMigration(max_slaves);
}
- if (update_state || server.cluster->state == REDIS_CLUSTER_FAIL)
+ if (update_state || server.cluster->state == CLUSTER_FAIL)
clusterUpdateState();
}
@@ -3072,11 +3518,45 @@ void bitmapClearBit(unsigned char *bitmap, int pos) {
bitmap[byte] &= ~(1<<bit);
}
+/* Return non-zero if there is at least one master with slaves in the cluster.
+ * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the
+ * MIGRATE_TO flag the when a master gets the first slot. */
+int clusterMastersHaveSlaves(void) {
+ dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
+ dictEntry *de;
+ int slaves = 0;
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (nodeIsSlave(node)) continue;
+ slaves += node->numslaves;
+ }
+ dictReleaseIterator(di);
+ return slaves != 0;
+}
+
/* Set the slot bit and return the old value. */
int clusterNodeSetSlotBit(clusterNode *n, int slot) {
int old = bitmapTestBit(n->slots,slot);
bitmapSetBit(n->slots,slot);
- if (!old) n->numslots++;
+ if (!old) {
+ n->numslots++;
+ /* When a master gets its first slot, even if it has no slaves,
+ * it gets flagged with MIGRATE_TO, that is, the master is a valid
+ * target for replicas migration, if and only if at least one of
+ * the other masters has slaves right now.
+ *
+ * Normally masters are valid targerts of replica migration if:
+ * 1. The used to have slaves (but no longer have).
+ * 2. They are slaves failing over a master that used to have slaves.
+ *
+ * However new masters with slots assigned are considered valid
+ * migration tagets if the rest of the cluster is not a slave-less.
+ *
+ * See https://github.com/antirez/redis/issues/3043 for more info. */
+ if (n->numslots == 1 && clusterMastersHaveSlaves())
+ n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
return old;
}
@@ -3094,26 +3574,26 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) {
}
/* Add the specified slot to the list of slots that node 'n' will
- * serve. Return REDIS_OK if the operation ended with success.
+ * serve. Return C_OK if the operation ended with success.
* If the slot is already assigned to another instance this is considered
- * an error and REDIS_ERR is returned. */
+ * an error and C_ERR is returned. */
int clusterAddSlot(clusterNode *n, int slot) {
- if (server.cluster->slots[slot]) return REDIS_ERR;
+ if (server.cluster->slots[slot]) return C_ERR;
clusterNodeSetSlotBit(n,slot);
server.cluster->slots[slot] = n;
- return REDIS_OK;
+ return C_OK;
}
/* Delete the specified slot marking it as unassigned.
- * Returns REDIS_OK if the slot was assigned, otherwise if the slot was
- * already unassigned REDIS_ERR is returned. */
+ * Returns C_OK if the slot was assigned, otherwise if the slot was
+ * already unassigned C_ERR is returned. */
int clusterDelSlot(int slot) {
clusterNode *n = server.cluster->slots[slot];
- if (!n) return REDIS_ERR;
- redisAssert(clusterNodeClearSlotBit(n,slot) == 1);
+ if (!n) return C_ERR;
+ serverAssert(clusterNodeClearSlotBit(n,slot) == 1);
server.cluster->slots[slot] = NULL;
- return REDIS_OK;
+ return C_OK;
}
/* Delete all the slots associated with the specified node.
@@ -3121,9 +3601,11 @@ int clusterDelSlot(int slot) {
int clusterDelNodeSlots(clusterNode *node) {
int deleted = 0, j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (clusterNodeGetSlotBit(node,j)) clusterDelSlot(j);
- deleted++;
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(node,j)) {
+ clusterDelSlot(j);
+ deleted++;
+ }
}
return deleted;
}
@@ -3145,13 +3627,13 @@ void clusterCloseAllSlots(void) {
* and are based on heuristics. Actaully the main point about the rejoin and
* writable delay is that they should be a few orders of magnitude larger
* than the network latency. */
-#define REDIS_CLUSTER_MAX_REJOIN_DELAY 5000
-#define REDIS_CLUSTER_MIN_REJOIN_DELAY 500
-#define REDIS_CLUSTER_WRITABLE_DELAY 2000
+#define CLUSTER_MAX_REJOIN_DELAY 5000
+#define CLUSTER_MIN_REJOIN_DELAY 500
+#define CLUSTER_WRITABLE_DELAY 2000
void clusterUpdateState(void) {
int j, new_state;
- int unreachable_masters = 0;
+ int reachable_masters = 0;
static mstime_t among_minority_time;
static mstime_t first_call_time = 0;
@@ -3165,28 +3647,30 @@ void clusterUpdateState(void) {
* to don't count the DB loading time. */
if (first_call_time == 0) first_call_time = mstime();
if (nodeIsMaster(myself) &&
- server.cluster->state == REDIS_CLUSTER_FAIL &&
- mstime() - first_call_time < REDIS_CLUSTER_WRITABLE_DELAY) return;
+ server.cluster->state == CLUSTER_FAIL &&
+ mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return;
/* Start assuming the state is OK. We'll turn it into FAIL if there
* are the right conditions. */
- new_state = REDIS_CLUSTER_OK;
+ new_state = CLUSTER_OK;
/* Check if all the slots are covered. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (server.cluster->slots[j] == NULL ||
- server.cluster->slots[j]->flags & (REDIS_NODE_FAIL))
- {
- new_state = REDIS_CLUSTER_FAIL;
- break;
+ if (server.cluster_require_full_coverage) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (server.cluster->slots[j] == NULL ||
+ server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL))
+ {
+ new_state = CLUSTER_FAIL;
+ break;
+ }
}
}
/* Compute the cluster size, that is the number of master nodes
* serving at least a single slot.
*
- * At the same time count the number of unreachable masters with
- * at least one node. */
+ * At the same time count the number of reachable masters having
+ * at least one slot. */
{
dictIterator *di;
dictEntry *de;
@@ -3198,21 +3682,20 @@ void clusterUpdateState(void) {
if (nodeIsMaster(node) && node->numslots) {
server.cluster->size++;
- if (node->flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL))
- unreachable_masters++;
+ if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0)
+ reachable_masters++;
}
}
dictReleaseIterator(di);
}
- /* If we can't reach at least half the masters, change the cluster state
- * to FAIL, as we are not even able to mark nodes as FAIL in this side
- * of the netsplit because of lack of majority. */
+ /* If we are in a minority partition, change the cluster state
+ * to FAIL. */
{
int needed_quorum = (server.cluster->size / 2) + 1;
- if (unreachable_masters >= needed_quorum) {
- new_state = REDIS_CLUSTER_FAIL;
+ if (reachable_masters < needed_quorum) {
+ new_state = CLUSTER_FAIL;
among_minority_time = mstime();
}
}
@@ -3225,12 +3708,12 @@ void clusterUpdateState(void) {
* minority, don't let it accept queries for some time after the
* partition heals, to make sure there is enough time to receive
* a configuration update. */
- if (rejoin_delay > REDIS_CLUSTER_MAX_REJOIN_DELAY)
- rejoin_delay = REDIS_CLUSTER_MAX_REJOIN_DELAY;
- if (rejoin_delay < REDIS_CLUSTER_MIN_REJOIN_DELAY)
- rejoin_delay = REDIS_CLUSTER_MIN_REJOIN_DELAY;
+ if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MAX_REJOIN_DELAY;
+ if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MIN_REJOIN_DELAY;
- if (new_state == REDIS_CLUSTER_OK &&
+ if (new_state == CLUSTER_OK &&
nodeIsMaster(myself) &&
mstime() - among_minority_time < rejoin_delay)
{
@@ -3238,8 +3721,8 @@ void clusterUpdateState(void) {
}
/* Change the state and log the event. */
- redisLog(REDIS_WARNING,"Cluster state changed: %s",
- new_state == REDIS_CLUSTER_OK ? "ok" : "fail");
+ serverLog(LL_WARNING,"Cluster state changed: %s",
+ new_state == CLUSTER_OK ? "ok" : "fail");
server.cluster->state = new_state;
}
}
@@ -3255,13 +3738,13 @@ void clusterUpdateState(void) {
* this lots, we set the slots as IMPORTING from our point of view
* in order to justify we have those slots, and in order to make
* redis-trib aware of the issue, so that it can try to fix it.
- * 2) If we find data in a DB different than DB0 we return REDIS_ERR to
+ * 2) If we find data in a DB different than DB0 we return C_ERR to
* signal the caller it should quit the server with an error message
* or take other actions.
*
- * The function always returns REDIS_OK even if it will try to correct
+ * The function always returns C_OK even if it will try to correct
* the error described in "1". However if data is found in DB different
- * from DB0, REDIS_ERR is returned.
+ * from DB0, C_ERR is returned.
*
* The function also uses the logging facility in order to warn the user
* about desynchronizations between the data we have in memory and the
@@ -3272,16 +3755,16 @@ int verifyClusterConfigWithData(void) {
/* If this node is a slave, don't perform the check at all as we
* completely depend on the replication stream. */
- if (nodeIsSlave(myself)) return REDIS_OK;
+ if (nodeIsSlave(myself)) return C_OK;
/* Make sure we only have keys in DB0. */
for (j = 1; j < server.dbnum; j++) {
- if (dictSize(server.db[j].dict)) return REDIS_ERR;
+ if (dictSize(server.db[j].dict)) return C_ERR;
}
/* Check that all the slots we see populated memory have a corresponding
* entry in the cluster table. Otherwise fix the table. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (!countKeysInSlot(j)) continue; /* No keys in this slot. */
/* Check if we are assigned to this slot or if we are importing it.
* In both cases check the next slot as the configuration makes
@@ -3294,21 +3777,20 @@ int verifyClusterConfigWithData(void) {
* assigned to this slot. Fix this condition. */
update_config++;
- /* Case A: slot is unassigned. Take responsability for it. */
+ /* Case A: slot is unassigned. Take responsibility for it. */
if (server.cluster->slots[j] == NULL) {
- redisLog(REDIS_WARNING, "I've keys about slot %d that is "
- "unassigned. Taking responsability "
- "for it.",j);
+ serverLog(LL_WARNING, "I have keys for unassigned slot %d. "
+ "Taking responsibility for it.",j);
clusterAddSlot(myself,j);
} else {
- redisLog(REDIS_WARNING, "I've keys about slot %d that is "
- "already assigned to a different node. "
- "Setting it in importing state.",j);
+ serverLog(LL_WARNING, "I have keys for slot %d, but the slot is "
+ "assigned to another node. "
+ "Setting it to importing state.",j);
server.cluster->importing_slots_from[j] = server.cluster->slots[j];
}
}
if (update_config) clusterSaveConfigOrDie(1);
- return REDIS_OK;
+ return C_OK;
}
/* -----------------------------------------------------------------------------
@@ -3318,12 +3800,12 @@ int verifyClusterConfigWithData(void) {
/* Set the specified node 'n' as master for this node.
* If this node is currently a master, it is turned into a slave. */
void clusterSetMaster(clusterNode *n) {
- redisAssert(n != myself);
- redisAssert(myself->numslots == 0);
+ serverAssert(n != myself);
+ serverAssert(myself->numslots == 0);
if (nodeIsMaster(myself)) {
- myself->flags &= ~REDIS_NODE_MASTER;
- myself->flags |= REDIS_NODE_SLAVE;
+ myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
+ myself->flags |= CLUSTER_NODE_SLAVE;
clusterCloseAllSlots();
} else {
if (myself->slaveof)
@@ -3336,9 +3818,39 @@ void clusterSetMaster(clusterNode *n) {
}
/* -----------------------------------------------------------------------------
- * CLUSTER command
+ * Nodes to string representation functions.
* -------------------------------------------------------------------------- */
+struct redisNodeFlags {
+ uint16_t flag;
+ char *name;
+};
+
+static struct redisNodeFlags redisNodeFlagsTable[] = {
+ {CLUSTER_NODE_MYSELF, "myself,"},
+ {CLUSTER_NODE_MASTER, "master,"},
+ {CLUSTER_NODE_SLAVE, "slave,"},
+ {CLUSTER_NODE_PFAIL, "fail?,"},
+ {CLUSTER_NODE_FAIL, "fail,"},
+ {CLUSTER_NODE_HANDSHAKE, "handshake,"},
+ {CLUSTER_NODE_NOADDR, "noaddr,"}
+};
+
+/* Concatenate the comma separated list of node flags to the given SDS
+ * string 'ci'. */
+sds representClusterNodeFlags(sds ci, uint16_t flags) {
+ size_t orig_len = sdslen(ci);
+ int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
+ for (i = 0; i < size; i++) {
+ struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
+ if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
+ }
+ /* If no flag was added, add the "noflags" special flag. */
+ if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,");
+ sdsIncrLen(ci,-1); /* Remove trailing comma. */
+ return ci;
+}
+
/* Generate a csv-alike representation of the specified cluster node.
* See clusterGenNodesDescription() top comment for more information.
*
@@ -3348,46 +3860,39 @@ sds clusterGenNodeDescription(clusterNode *node) {
sds ci;
/* Node coordinates */
- ci = sdscatprintf(sdsempty(),"%.40s %s:%d ",
+ ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ",
node->name,
node->ip,
- node->port);
+ node->port,
+ node->cport);
/* Flags */
- if (node->flags == 0) ci = sdscat(ci,"noflags,");
- if (node->flags & REDIS_NODE_MYSELF) ci = sdscat(ci,"myself,");
- if (node->flags & REDIS_NODE_MASTER) ci = sdscat(ci,"master,");
- if (node->flags & REDIS_NODE_SLAVE) ci = sdscat(ci,"slave,");
- if (node->flags & REDIS_NODE_PFAIL) ci = sdscat(ci,"fail?,");
- if (node->flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
- if (node->flags & REDIS_NODE_HANDSHAKE) ci =sdscat(ci,"handshake,");
- if (node->flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
- if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
+ ci = representClusterNodeFlags(ci, node->flags);
/* Slave of... or just "-" */
if (node->slaveof)
- ci = sdscatprintf(ci,"%.40s ",node->slaveof->name);
+ ci = sdscatprintf(ci," %.40s ",node->slaveof->name);
else
- ci = sdscatprintf(ci,"- ");
+ ci = sdscatlen(ci," - ",3);
- /* Latency from the POV of this node, link status */
+ /* Latency from the POV of this node, config epoch, link status */
ci = sdscatprintf(ci,"%lld %lld %llu %s",
(long long) node->ping_sent,
(long long) node->pong_received,
(unsigned long long) node->configEpoch,
- (node->link || node->flags & REDIS_NODE_MYSELF) ?
+ (node->link || node->flags & CLUSTER_NODE_MYSELF) ?
"connected" : "disconnected");
/* Slots served by this instance */
start = -1;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit;
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
- if (start != -1 && (!bit || j == REDIS_CLUSTER_SLOTS-1)) {
- if (bit && j == REDIS_CLUSTER_SLOTS-1) j++;
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
+ if (bit && j == CLUSTER_SLOTS-1) j++;
if (start == j-1) {
ci = sdscatprintf(ci," %d",start);
@@ -3401,8 +3906,8 @@ sds clusterGenNodeDescription(clusterNode *node) {
/* Just for MYSELF node we also dump info about slots that
* we are migrating to other instances or importing from other
* instances. */
- if (node->flags & REDIS_NODE_MYSELF) {
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ if (node->flags & CLUSTER_NODE_MYSELF) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (server.cluster->migrating_slots_to[j]) {
ci = sdscatprintf(ci," [%d->-%.40s]",j,
server.cluster->migrating_slots_to[j]->name);
@@ -3446,11 +3951,30 @@ sds clusterGenNodesDescription(int filter) {
return ci;
}
-int getSlotOrReply(redisClient *c, robj *o) {
+/* -----------------------------------------------------------------------------
+ * CLUSTER command
+ * -------------------------------------------------------------------------- */
+
+const char *clusterGetMessageTypeString(int type) {
+ switch(type) {
+ case CLUSTERMSG_TYPE_PING: return "ping";
+ case CLUSTERMSG_TYPE_PONG: return "pong";
+ case CLUSTERMSG_TYPE_MEET: return "meet";
+ case CLUSTERMSG_TYPE_FAIL: return "fail";
+ case CLUSTERMSG_TYPE_PUBLISH: return "publish";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack";
+ case CLUSTERMSG_TYPE_UPDATE: return "update";
+ case CLUSTERMSG_TYPE_MFSTART: return "mfstart";
+ }
+ return "unknown";
+}
+
+int getSlotOrReply(client *c, robj *o) {
long long slot;
- if (getLongLongFromObject(o,&slot) != REDIS_OK ||
- slot < 0 || slot >= REDIS_CLUSTER_SLOTS)
+ if (getLongLongFromObject(o,&slot) != C_OK ||
+ slot < 0 || slot >= CLUSTER_SLOTS)
{
addReplyError(c,"Invalid or out of range slot");
return -1;
@@ -3458,13 +3982,15 @@ int getSlotOrReply(redisClient *c, robj *o) {
return (int) slot;
}
-void clusterReplyMultiBulkSlots(redisClient *c) {
+void clusterReplyMultiBulkSlots(client *c) {
/* Format: 1) 1) start slot
* 2) end slot
* 3) 1) master IP
* 2) master port
+ * 3) node ID
* 4) 1) replica IP
* 2) replica port
+ * 3) node ID
* ... continued until done
*/
@@ -3481,17 +4007,17 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
* master) and masters not serving any slot. */
if (!nodeIsMaster(node) || node->numslots == 0) continue;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit, i;
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
- if (start != -1 && (!bit || j == REDIS_CLUSTER_SLOTS-1)) {
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
int nested_elements = 3; /* slots (2) + master addr (1). */
void *nested_replylen = addDeferredMultiBulkLength(c);
- if (bit && j == REDIS_CLUSTER_SLOTS-1) j++;
+ if (bit && j == CLUSTER_SLOTS-1) j++;
/* If slot exists in output map, add to it's list.
* else, create a new output map for this slot */
@@ -3505,18 +4031,20 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
start = -1;
/* First node reply position is always the master */
- addReplyMultiBulkLen(c, 2);
+ addReplyMultiBulkLen(c, 3);
addReplyBulkCString(c, node->ip);
addReplyLongLong(c, node->port);
+ addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
/* Remaining nodes in reply are replicas for slot range */
for (i = 0; i < node->numslaves; i++) {
/* This loop is copy/pasted from clusterGenNodeDescription()
* with modifications for per-slot node aggregation */
if (nodeFailed(node->slaves[i])) continue;
- addReplyMultiBulkLen(c, 2);
+ addReplyMultiBulkLen(c, 3);
addReplyBulkCString(c, node->slaves[i]->ip);
addReplyLongLong(c, node->slaves[i]->port);
+ addReplyBulkCBuffer(c, node->slaves[i]->name, CLUSTER_NAMELEN);
nested_elements++;
}
setDeferredMultiBulkLength(c, nested_replylen, nested_elements);
@@ -3528,22 +4056,33 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
setDeferredMultiBulkLength(c, slot_replylen, num_masters);
}
-void clusterCommand(redisClient *c) {
+void clusterCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- if (!strcasecmp(c->argv[1]->ptr,"meet") && c->argc == 4) {
- long long port;
+ if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) {
+ /* CLUSTER MEET <ip> <port> [cport] */
+ long long port, cport;
- if (getLongLongFromObject(c->argv[3], &port) != REDIS_OK) {
- addReplyErrorFormat(c,"Invalid TCP port specified: %s",
+ if (getLongLongFromObject(c->argv[3], &port) != C_OK) {
+ addReplyErrorFormat(c,"Invalid TCP base port specified: %s",
(char*)c->argv[3]->ptr);
return;
}
- if (clusterStartHandshake(c->argv[2]->ptr,port) == 0 &&
+ if (c->argc == 5) {
+ if (getLongLongFromObject(c->argv[4], &cport) != C_OK) {
+ addReplyErrorFormat(c,"Invalid TCP bus port specified: %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ } else {
+ cport = port + CLUSTER_PORT_INCR;
+ }
+
+ if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 &&
errno == EINVAL)
{
addReplyErrorFormat(c,"Invalid node address specified: %s:%s",
@@ -3556,9 +4095,12 @@ void clusterCommand(redisClient *c) {
robj *o;
sds ci = clusterGenNodesDescription(0);
- o = createObject(REDIS_STRING,ci);
+ o = createObject(OBJ_STRING,ci);
addReplyBulk(c,o);
decrRefCount(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+ /* CLUSTER MYID */
+ addReplyBulkCBuffer(c,myself->name, CLUSTER_NAMELEN);
} else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
/* CLUSTER SLOTS */
clusterReplyMultiBulkSlots(c);
@@ -3577,11 +4119,11 @@ void clusterCommand(redisClient *c) {
/* CLUSTER ADDSLOTS <slot> [slot] ... */
/* CLUSTER DELSLOTS <slot> [slot] ... */
int j, slot;
- unsigned char *slots = zmalloc(REDIS_CLUSTER_SLOTS);
+ unsigned char *slots = zmalloc(CLUSTER_SLOTS);
int del = !strcasecmp(c->argv[1]->ptr,"delslots");
- memset(slots,0,REDIS_CLUSTER_SLOTS);
- /* Check that all the arguments are parsable and that all the
+ memset(slots,0,CLUSTER_SLOTS);
+ /* Check that all the arguments are parseable and that all the
* slots are not already busy. */
for (j = 2; j < c->argc; j++) {
if ((slot = getSlotOrReply(c,c->argv[j])) == -1) {
@@ -3604,7 +4146,7 @@ void clusterCommand(redisClient *c) {
return;
}
}
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (slots[j]) {
int retval;
@@ -3615,7 +4157,7 @@ void clusterCommand(redisClient *c) {
retval = del ? clusterDelSlot(j) :
clusterAddSlot(myself,j);
- redisAssertWithInfo(c,NULL,retval == REDIS_OK);
+ serverAssertWithInfo(c,NULL,retval == C_OK);
}
}
zfree(slots);
@@ -3629,6 +4171,11 @@ void clusterCommand(redisClient *c) {
int slot;
clusterNode *n;
+ if (nodeIsSlave(myself)) {
+ addReplyError(c,"Please use SETSLOT only with masters.");
+ return;
+ }
+
if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return;
if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
@@ -3650,7 +4197,7 @@ void clusterCommand(redisClient *c) {
}
if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) {
addReplyErrorFormat(c,"I don't know about node %s",
- (char*)c->argv[3]->ptr);
+ (char*)c->argv[4]->ptr);
return;
}
server.cluster->importing_slots_from[slot] = n;
@@ -3698,17 +4245,9 @@ void clusterCommand(redisClient *c) {
* failover happens at the same time we close the slot, the
* configEpoch collision resolution will fix it assigning
* a different epoch to each node. */
- uint64_t maxEpoch = clusterGetMaxEpoch();
-
- if (myself->configEpoch == 0 ||
- myself->configEpoch != maxEpoch)
- {
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterDoBeforeSleep(CLUSTER_TODO_FSYNC_CONFIG);
- redisLog(REDIS_WARNING,
- "configEpoch set to %llu after importing slot %d",
- (unsigned long long) myself->configEpoch, slot);
+ if (clusterBumpConfigEpochWithoutConsensus() == C_OK) {
+ serverLog(LL_WARNING,
+ "configEpoch updated after importing slot %d", slot);
}
server.cluster->importing_slots_from[slot] = NULL;
}
@@ -3721,6 +4260,13 @@ void clusterCommand(redisClient *c) {
}
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE);
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) {
+ /* CLUSTER BUMPEPOCH */
+ int retval = clusterBumpConfigEpochWithoutConsensus();
+ sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n",
+ (retval == C_OK) ? "BUMPED" : "STILL",
+ (unsigned long long) myself->configEpoch);
+ addReplySds(c,reply);
} else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
/* CLUSTER INFO */
char *statestr[] = {"ok","fail","needhelp"};
@@ -3728,7 +4274,7 @@ void clusterCommand(redisClient *c) {
uint64_t myepoch;
int j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
clusterNode *n = server.cluster->slots[j];
if (n == NULL) continue;
@@ -3755,8 +4301,6 @@ void clusterCommand(redisClient *c) {
"cluster_size:%d\r\n"
"cluster_current_epoch:%llu\r\n"
"cluster_my_epoch:%llu\r\n"
- "cluster_stats_messages_sent:%lld\r\n"
- "cluster_stats_messages_received:%lld\r\n"
, statestr[server.cluster->state],
slots_assigned,
slots_ok,
@@ -3765,10 +4309,36 @@ void clusterCommand(redisClient *c) {
dictSize(server.cluster->nodes),
server.cluster->size,
(unsigned long long) server.cluster->currentEpoch,
- (unsigned long long) myepoch,
- server.cluster->stats_bus_messages_sent,
- server.cluster->stats_bus_messages_received
+ (unsigned long long) myepoch
);
+
+ /* Show stats about messages sent and received. */
+ long long tot_msg_sent = 0;
+ long long tot_msg_received = 0;
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_sent[i] == 0) continue;
+ tot_msg_sent += server.cluster->stats_bus_messages_sent[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_sent:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_sent[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent);
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_received[i] == 0) continue;
+ tot_msg_received += server.cluster->stats_bus_messages_received[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_received:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_received[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_received:%lld\r\n", tot_msg_received);
+
+ /* Produce the reply protocol. */
addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
(unsigned long)sdslen(info)));
addReplySds(c,info);
@@ -3790,9 +4360,9 @@ void clusterCommand(redisClient *c) {
/* CLUSTER COUNTKEYSINSLOT <slot> */
long long slot;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
return;
- if (slot < 0 || slot >= REDIS_CLUSTER_SLOTS) {
+ if (slot < 0 || slot >= CLUSTER_SLOTS) {
addReplyError(c,"Invalid slot");
return;
}
@@ -3803,20 +4373,28 @@ void clusterCommand(redisClient *c) {
unsigned int numkeys, j;
robj **keys;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
return;
if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL)
- != REDIS_OK)
+ != C_OK)
return;
- if (slot < 0 || slot >= REDIS_CLUSTER_SLOTS || maxkeys < 0) {
+ if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) {
addReplyError(c,"Invalid slot or number of keys");
return;
}
+ /* Avoid allocating more than needed in case of large COUNT argument
+ * and smaller actual number of keys. */
+ unsigned int keys_in_slot = countKeysInSlot(slot);
+ if (maxkeys > keys_in_slot) maxkeys = keys_in_slot;
+
keys = zmalloc(sizeof(robj*)*maxkeys);
numkeys = getKeysInSlot(slot, keys, maxkeys);
addReplyMultiBulkLen(c,numkeys);
- for (j = 0; j < numkeys; j++) addReplyBulk(c,keys[j]);
+ for (j = 0; j < numkeys; j++) {
+ addReplyBulk(c,keys[j]);
+ decrRefCount(keys[j]);
+ }
zfree(keys);
} else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) {
/* CLUSTER FORGET <NODE ID> */
@@ -3854,7 +4432,7 @@ void clusterCommand(redisClient *c) {
}
/* Can't replicate a slave. */
- if (n->slaveof != NULL) {
+ if (nodeIsSlave(n)) {
addReplyError(c,"I can only replicate a master, not a slave.");
return;
}
@@ -3896,44 +4474,72 @@ void clusterCommand(redisClient *c) {
addReplyBulkCString(c,ni);
sdsfree(ni);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+ c->argc == 3)
+ {
+ /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr);
+
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else {
+ addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
(c->argc == 2 || c->argc == 3))
{
- /* CLUSTER FAILOVER [FORCE] */
- int force = 0;
+ /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
+ int force = 0, takeover = 0;
if (c->argc == 3) {
if (!strcasecmp(c->argv[2]->ptr,"force")) {
force = 1;
+ } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
+ takeover = 1;
+ force = 1; /* Takeover also implies force. */
} else {
addReply(c,shared.syntaxerr);
return;
}
}
+ /* Check preconditions. */
if (nodeIsMaster(myself)) {
addReplyError(c,"You should send CLUSTER FAILOVER to a slave");
return;
+ } else if (myself->slaveof == NULL) {
+ addReplyError(c,"I'm a slave but my master is unknown to me");
+ return;
} else if (!force &&
- (myself->slaveof == NULL || nodeFailed(myself->slaveof) ||
- myself->slaveof->link == NULL))
+ (nodeFailed(myself->slaveof) ||
+ myself->slaveof->link == NULL))
{
addReplyError(c,"Master is down or failed, "
"please use CLUSTER FAILOVER FORCE");
return;
}
resetManualFailover();
- server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
-
- /* If this is a forced failover, we don't need to talk with our master
- * to agree about the offset. We just failover taking over it without
- * coordination. */
- if (force) {
+ server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
+
+ if (takeover) {
+ /* A takeover does not perform any initial check. It just
+ * generates a new configuration epoch for this node without
+ * consensus, claims the master's slots, and broadcast the new
+ * configuration. */
+ serverLog(LL_WARNING,"Taking over the master (user request).");
+ clusterBumpConfigEpochWithoutConsensus();
+ clusterFailoverReplaceYourMaster();
+ } else if (force) {
+ /* If this is a forced failover, we don't need to talk with our
+ * master to agree about the offset. We just failover taking over
+ * it without coordination. */
+ serverLog(LL_WARNING,"Forced failover user request accepted.");
server.cluster->mf_can_start = 1;
} else {
+ serverLog(LL_WARNING,"Manual failover user request accepted.");
clusterSendMFStart(myself->slaveof);
}
- redisLog(REDIS_WARNING,"Manual failover user request accepted.");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
{
@@ -3946,7 +4552,7 @@ void clusterCommand(redisClient *c) {
* resolution system which is too slow when a big cluster is created. */
long long epoch;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK)
return;
if (epoch < 0) {
@@ -3958,11 +4564,11 @@ void clusterCommand(redisClient *c) {
addReplyError(c,"Node config epoch is already non-zero");
} else {
myself->configEpoch = epoch;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH",
(unsigned long long) myself->configEpoch);
- if (server.cluster->currentEpoch < epoch)
+ if (server.cluster->currentEpoch < (uint64_t)epoch)
server.cluster->currentEpoch = epoch;
/* No need to fsync the config here since in the unlucky event
* of a failure to persist the config, the conflict resolution code
@@ -4016,8 +4622,8 @@ void createDumpPayload(rio *payload, robj *o) {
/* Serialize the object in a RDB-like format. It consist of an object type
* byte followed by the serialized object. This is understood by RESTORE. */
rioInitWithBuffer(payload,sdsempty());
- redisAssert(rdbSaveObjectType(payload,o));
- redisAssert(rdbSaveObject(payload,o));
+ serverAssert(rdbSaveObjectType(payload,o));
+ serverAssert(rdbSaveObject(payload,o));
/* Write the footer, this is how it looks like:
* ----------------+---------------------+---------------+
@@ -4027,8 +4633,8 @@ void createDumpPayload(rio *payload, robj *o) {
*/
/* RDB version */
- buf[0] = REDIS_RDB_VERSION & 0xff;
- buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff;
+ buf[0] = RDB_VERSION & 0xff;
+ buf[1] = (RDB_VERSION >> 8) & 0xff;
payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2);
/* CRC64 */
@@ -4040,7 +4646,7 @@ void createDumpPayload(rio *payload, robj *o) {
/* Verify that the RDB version of the dump payload matches the one of this Redis
* instance and that the checksum is ok.
- * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR
+ * If the DUMP payload looks valid C_OK is returned, otherwise C_ERR
* is returned. */
int verifyDumpPayload(unsigned char *p, size_t len) {
unsigned char *footer;
@@ -4048,23 +4654,23 @@ int verifyDumpPayload(unsigned char *p, size_t len) {
uint64_t crc;
/* At least 2 bytes of RDB version and 8 of CRC64 should be present. */
- if (len < 10) return REDIS_ERR;
+ if (len < 10) return C_ERR;
footer = p+(len-10);
/* Verify RDB version */
rdbver = (footer[1] << 8) | footer[0];
- if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR;
+ if (rdbver > RDB_VERSION) return C_ERR;
/* Verify CRC64 */
crc = crc64(0,p,len-8);
memrev64ifbe(&crc);
- return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR;
+ return (memcmp(&crc,footer+2,8) == 0) ? C_OK : C_ERR;
}
/* DUMP keyname
* DUMP is actually not used by Redis Cluster but it is the obvious
* complement of RESTORE and can be useful for different applications. */
-void dumpCommand(redisClient *c) {
+void dumpCommand(client *c) {
robj *o, *dumpobj;
rio payload;
@@ -4078,14 +4684,14 @@ void dumpCommand(redisClient *c) {
createDumpPayload(&payload,o);
/* Transfer to the client */
- dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
+ dumpobj = createObject(OBJ_STRING,payload.io.buffer.ptr);
addReplyBulk(c,dumpobj);
decrRefCount(dumpobj);
return;
}
/* RESTORE key ttl serialized-value [REPLACE] */
-void restoreCommand(redisClient *c) {
+void restoreCommand(client *c) {
long long ttl;
rio payload;
int j, type, replace = 0;
@@ -4108,7 +4714,7 @@ void restoreCommand(redisClient *c) {
}
/* Check if the TTL value makes sense */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != REDIS_OK) {
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != C_OK) {
return;
} else if (ttl < 0) {
addReplyError(c,"Invalid TTL value, must be >= 0");
@@ -4116,7 +4722,7 @@ void restoreCommand(redisClient *c) {
}
/* Verify RDB version and data checksum. */
- if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR)
+ if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == C_ERR)
{
addReplyError(c,"DUMP payload version or checksum are wrong");
return;
@@ -4135,7 +4741,7 @@ void restoreCommand(redisClient *c) {
/* Create the key and set the TTL if any */
dbAdd(c->db,c->argv[1],obj);
- if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl);
+ if (ttl) setExpire(c,c->db,c->argv[1],mstime()+ttl);
signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.ok);
server.dirty++;
@@ -4148,15 +4754,16 @@ void restoreCommand(redisClient *c) {
* This sockets are closed when the max number we cache is reached, and also
* in serverCron() when they are around for more than a few seconds. */
#define MIGRATE_SOCKET_CACHE_ITEMS 64 /* max num of items in the cache. */
-#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached socekts after 10 sec. */
+#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached sockets after 10 sec. */
typedef struct migrateCachedSocket {
int fd;
+ long last_dbid;
time_t last_use_time;
} migrateCachedSocket;
-/* Return a TCP scoket connected with the target instance, possibly returning
- * a cached one.
+/* Return a migrateCachedSocket containing a TCP socket connected with the
+ * target instance, possibly returning a cached one.
*
* This function is responsible of sending errors to the client if a
* connection can't be established. In this case -1 is returned.
@@ -4164,9 +4771,9 @@ typedef struct migrateCachedSocket {
* attempt to free it after usage.
*
* If the caller detects an error while using the socket, migrateCloseSocket()
- * should be called so that the connection will be craeted from scratch
+ * should be called so that the connection will be created from scratch
* the next time. */
-int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
+migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) {
int fd;
sds name = sdsempty();
migrateCachedSocket *cs;
@@ -4179,7 +4786,7 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
if (cs) {
sdsfree(name);
cs->last_use_time = server.unixtime;
- return cs->fd;
+ return cs;
}
/* No cached socket, create one. */
@@ -4194,12 +4801,12 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
/* Create the socket */
fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr,
- atoi(c->argv[2]->ptr));
+ atoi(c->argv[2]->ptr));
if (fd == -1) {
sdsfree(name);
addReplyErrorFormat(c,"Can't connect to target node: %s",
server.neterr);
- return -1;
+ return NULL;
}
anetEnableTcpNoDelay(server.neterr,fd);
@@ -4209,15 +4816,16 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
addReplySds(c,
sdsnew("-IOERR error or timeout connecting to the client\r\n"));
close(fd);
- return -1;
+ return NULL;
}
/* Add to the cache and return it to the caller. */
cs = zmalloc(sizeof(*cs));
cs->fd = fd;
+ cs->last_dbid = -1;
cs->last_use_time = server.unixtime;
dictAdd(server.migrate_cached_sockets,name,cs);
- return fd;
+ return cs;
}
/* Free a migrate cached connection. */
@@ -4256,21 +4864,31 @@ void migrateCloseTimedoutSockets(void) {
dictReleaseIterator(di);
}
-/* MIGRATE host port key dbid timeout [COPY | REPLACE] */
-void migrateCommand(redisClient *c) {
- int fd, copy, replace, j;
+/* MIGRATE host port key dbid timeout [COPY | REPLACE]
+ *
+ * On in the multiple keys form:
+ *
+ * MIGRATE host port "" dbid timeout [COPY | REPLACE] KEYS key1 key2 ... keyN */
+void migrateCommand(client *c) {
+ migrateCachedSocket *cs;
+ int copy, replace, j;
long timeout;
long dbid;
- long long ttl, expireat;
- robj *o;
+ robj **ov = NULL; /* Objects to migrate. */
+ robj **kv = NULL; /* Key names. */
+ robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */
rio cmd, payload;
- int retry_num = 0;
+ int may_retry = 1;
+ int write_error = 0;
+ int argv_rewritten = 0;
+
+ /* To support the KEYS option we need the following additional state. */
+ int first_key = 3; /* Argument index of the first key. */
+ int num_keys = 1; /* By default only migrate the 'key' argument. */
-try_again:
/* Initialization */
copy = 0;
replace = 0;
- ttl = 0;
/* Parse additional options */
for (j = 6; j < c->argc; j++) {
@@ -4278,6 +4896,16 @@ try_again:
copy = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"replace")) {
replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"keys")) {
+ if (sdslen(c->argv[3]->ptr) != 0) {
+ addReplyError(c,
+ "When using MIGRATE KEYS option, the key argument"
+ " must be set to the empty string");
+ return;
+ }
+ first_key = j+1;
+ num_keys = c->argc - j - 1;
+ break; /* All the remaining args are keys. */
} else {
addReply(c,shared.syntaxerr);
return;
@@ -4285,57 +4913,88 @@ try_again:
}
/* Sanity check */
- if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK)
- return;
- if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != C_OK ||
+ getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != C_OK)
+ {
return;
+ }
if (timeout <= 0) timeout = 1000;
- /* Check if the key is here. If not we reply with success as there is
- * nothing to migrate (for instance the key expired in the meantime), but
- * we include such information in the reply string. */
- if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) {
+ /* Check if the keys are here. If at least one key is to migrate, do it
+ * otherwise if all the keys are missing reply with "NOKEY" to signal
+ * the caller there was nothing to migrate. We don't return an error in
+ * this case, since often this is due to a normal condition like the key
+ * expiring in the meantime. */
+ ov = zrealloc(ov,sizeof(robj*)*num_keys);
+ kv = zrealloc(kv,sizeof(robj*)*num_keys);
+ int oi = 0;
+
+ for (j = 0; j < num_keys; j++) {
+ if ((ov[oi] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) {
+ kv[oi] = c->argv[first_key+j];
+ oi++;
+ }
+ }
+ num_keys = oi;
+ if (num_keys == 0) {
+ zfree(ov); zfree(kv);
addReplySds(c,sdsnew("+NOKEY\r\n"));
return;
}
+try_again:
+ write_error = 0;
+
/* Connect */
- fd = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
- if (fd == -1) return; /* error sent to the client by migrateGetSocket() */
+ cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
+ if (cs == NULL) {
+ zfree(ov); zfree(kv);
+ return; /* error sent to the client by migrateGetSocket() */
+ }
- /* Create RESTORE payload and generate the protocol to call the command. */
rioInitWithBuffer(&cmd,sdsempty());
- redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
- redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
-
- expireat = getExpire(c->db,c->argv[3]);
- if (expireat != -1) {
- ttl = expireat-mstime();
- if (ttl < 1) ttl = 1;
- }
- redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
- if (server.cluster_enabled)
- redisAssertWithInfo(c,NULL,
- rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
- else
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
- redisAssertWithInfo(c,NULL,sdsEncodedObject(c->argv[3]));
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,
- sdslen(c->argv[3]->ptr)));
- redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
-
- /* Emit the payload argument, that is the serialized object using
- * the DUMP format. */
- createDumpPayload(&payload,o);
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,
- sdslen(payload.io.buffer.ptr)));
- sdsfree(payload.io.buffer.ptr);
- /* Add the REPLACE option to the RESTORE command if it was specified
- * as a MIGRATE option. */
- if (replace)
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7));
+ /* Send the SELECT command if the current DB is not already selected. */
+ int select = cs->last_dbid != dbid; /* Should we emit SELECT? */
+ if (select) {
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
+ }
+
+ /* Create RESTORE payload and generate the protocol to call the command. */
+ for (j = 0; j < num_keys; j++) {
+ long long ttl = 0;
+ long long expireat = getExpire(c->db,kv[j]);
+
+ if (expireat != -1) {
+ ttl = expireat-mstime();
+ if (ttl < 1) ttl = 1;
+ }
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
+ if (server.cluster_enabled)
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
+ else
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
+ serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j]));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr,
+ sdslen(kv[j]->ptr)));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
+
+ /* Emit the payload argument, that is the serialized object using
+ * the DUMP format. */
+ createDumpPayload(&payload,ov[j]);
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,payload.io.buffer.ptr,
+ sdslen(payload.io.buffer.ptr)));
+ sdsfree(payload.io.buffer.ptr);
+
+ /* Add the REPLACE option to the RESTORE command if it was specified
+ * as a MIGRATE option. */
+ if (replace)
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7));
+ }
/* Transfer the query to the other node in 64K chunks. */
errno = 0;
@@ -4346,60 +5005,141 @@ try_again:
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
- nwritten = syncWrite(fd,buf+pos,towrite,timeout);
- if (nwritten != (signed)towrite) goto socket_wr_err;
+ nwritten = syncWrite(cs->fd,buf+pos,towrite,timeout);
+ if (nwritten != (signed)towrite) {
+ write_error = 1;
+ goto socket_err;
+ }
pos += nwritten;
}
}
- /* Read back the reply. */
- {
- char buf1[1024];
- char buf2[1024];
-
- /* Read the two replies */
- if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0)
- goto socket_rd_err;
- if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0)
- goto socket_rd_err;
- if (buf1[0] == '-' || buf2[0] == '-') {
- addReplyErrorFormat(c,"Target instance replied with error: %s",
- (buf1[0] == '-') ? buf1+1 : buf2+1);
- } else {
- robj *aux;
+ char buf1[1024]; /* Select reply. */
+ char buf2[1024]; /* Restore reply. */
+ /* Read the SELECT reply if needed. */
+ if (select && syncReadLine(cs->fd, buf1, sizeof(buf1), timeout) <= 0)
+ goto socket_err;
+
+ /* Read the RESTORE replies. */
+ int error_from_target = 0;
+ int socket_error = 0;
+ int del_idx = 1; /* Index of the key argument for the replicated DEL op. */
+
+ if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1));
+
+ for (j = 0; j < num_keys; j++) {
+ if (syncReadLine(cs->fd, buf2, sizeof(buf2), timeout) <= 0) {
+ socket_error = 1;
+ break;
+ }
+ if ((select && buf1[0] == '-') || buf2[0] == '-') {
+ /* On error assume that last_dbid is no longer valid. */
+ if (!error_from_target) {
+ cs->last_dbid = -1;
+ addReplyErrorFormat(c,"Target instance replied with error: %s",
+ (select && buf1[0] == '-') ? buf1+1 : buf2+1);
+ error_from_target = 1;
+ }
+ } else {
if (!copy) {
/* No COPY option: remove the local key, signal the change. */
- dbDelete(c->db,c->argv[3]);
- signalModifiedKey(c->db,c->argv[3]);
+ dbDelete(c->db,kv[j]);
+ signalModifiedKey(c->db,kv[j]);
+ server.dirty++;
+
+ /* Populate the argument vector to replace the old one. */
+ newargv[del_idx++] = kv[j];
+ incrRefCount(kv[j]);
}
- addReply(c,shared.ok);
- server.dirty++;
+ }
+ }
- /* Translate MIGRATE as DEL for replication/AOF. */
- aux = createStringObject("DEL",3);
- rewriteClientCommandVector(c,2,aux,c->argv[3]);
- decrRefCount(aux);
+ /* On socket error, if we want to retry, do it now before rewriting the
+ * command vector. We only retry if we are sure nothing was processed
+ * and we failed to read the first reply (j == 0 test). */
+ if (!error_from_target && socket_error && j == 0 && may_retry &&
+ errno != ETIMEDOUT)
+ {
+ goto socket_err; /* A retry is guaranteed because of tested conditions.*/
+ }
+
+ /* On socket errors, close the migration socket now that we still have
+ * the original host/port in the ARGV. Later the original command may be
+ * rewritten to DEL and will be too later. */
+ if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]);
+
+ if (!copy) {
+ /* Translate MIGRATE as DEL for replication/AOF. Note that we do
+ * this only for the keys for which we received an acknowledgement
+ * from the receiving Redis server, by using the del_idx index. */
+ if (del_idx > 1) {
+ newargv[0] = createStringObject("DEL",3);
+ /* Note that the following call takes ownership of newargv. */
+ replaceClientCommandVector(c,del_idx,newargv);
+ argv_rewritten = 1;
+ } else {
+ /* No key transfer acknowledged, no need to rewrite as DEL. */
+ zfree(newargv);
}
+ newargv = NULL; /* Make it safe to call zfree() on it in the future. */
}
- sdsfree(cmd.io.buffer.ptr);
- return;
+ /* If we are here and a socket error happened, we don't want to retry.
+ * Just signal the problem to the client, but only do it if we did not
+ * already queue a different error reported by the destination server. */
+ if (!error_from_target && socket_error) {
+ may_retry = 0;
+ goto socket_err;
+ }
+
+ if (!error_from_target) {
+ /* Success! Update the last_dbid in migrateCachedSocket, so that we can
+ * avoid SELECT the next time if the target DB is the same. Reply +OK.
+ *
+ * Note: If we reached this point, even if socket_error is true
+ * still the SELECT command succeeded (otherwise the code jumps to
+ * socket_err label. */
+ cs->last_dbid = dbid;
+ addReply(c,shared.ok);
+ } else {
+ /* On error we already sent it in the for loop above, and set
+ * the curretly selected socket to -1 to force SELECT the next time. */
+ }
-socket_wr_err:
sdsfree(cmd.io.buffer.ptr);
- migrateCloseSocket(c->argv[1],c->argv[2]);
- if (errno != ETIMEDOUT && retry_num++ == 0) goto try_again;
- addReplySds(c,
- sdsnew("-IOERR error or timeout writing to target instance\r\n"));
+ zfree(ov); zfree(kv); zfree(newargv);
return;
-socket_rd_err:
+/* On socket errors we try to close the cached socket and try again.
+ * It is very common for the cached socket to get closed, if just reopening
+ * it works it's a shame to notify the error to the caller. */
+socket_err:
+ /* Cleanup we want to perform in both the retry and no retry case.
+ * Note: Closing the migrate socket will also force SELECT next time. */
sdsfree(cmd.io.buffer.ptr);
- migrateCloseSocket(c->argv[1],c->argv[2]);
- if (errno != ETIMEDOUT && retry_num++ == 0) goto try_again;
+
+ /* If the command was rewritten as DEL and there was a socket error,
+ * we already closed the socket earlier. While migrateCloseSocket()
+ * is idempotent, the host/port arguments are now gone, so don't do it
+ * again. */
+ if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]);
+ zfree(newargv);
+ newargv = NULL; /* This will get reallocated on retry. */
+
+ /* Retry only if it's not a timeout and we never attempted a retry
+ * (or the code jumping here did not set may_retry to zero). */
+ if (errno != ETIMEDOUT && may_retry) {
+ may_retry = 0;
+ goto try_again;
+ }
+
+ /* Cleanup we want to do if no retry is attempted. */
+ zfree(ov); zfree(kv);
addReplySds(c,
- sdsnew("-IOERR error or timeout reading from target node\r\n"));
+ sdscatprintf(sdsempty(),
+ "-IOERR error or timeout %s to target instance\r\n",
+ write_error ? "writing" : "reading"));
return;
}
@@ -4411,30 +5151,30 @@ socket_rd_err:
* The client should issue ASKING before to actually send the command to
* the target instance. See the Redis Cluster specification for more
* information. */
-void askingCommand(redisClient *c) {
+void askingCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- c->flags |= REDIS_ASKING;
+ c->flags |= CLIENT_ASKING;
addReply(c,shared.ok);
}
-/* The READONLY command is uesd by clients to enter the read-only mode.
+/* The READONLY command is used by clients to enter the read-only mode.
* In this mode slaves will not redirect clients as long as clients access
* with read-only commands to keys that are served by the slave's master. */
-void readonlyCommand(redisClient *c) {
+void readonlyCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- c->flags |= REDIS_READONLY;
+ c->flags |= CLIENT_READONLY;
addReply(c,shared.ok);
}
/* The READWRITE command just clears the READONLY command state. */
-void readwriteCommand(redisClient *c) {
- c->flags &= ~REDIS_READONLY;
+void readwriteCommand(client *c) {
+ c->flags &= ~CLIENT_READONLY;
addReply(c,shared.ok);
}
@@ -4448,21 +5188,29 @@ void readwriteCommand(redisClient *c) {
* On success the function returns the node that is able to serve the request.
* If the node is not 'myself' a redirection must be perfomed. The kind of
* redirection is specified setting the integer passed by reference
- * 'error_code', which will be set to REDIS_CLUSTER_REDIR_ASK or
- * REDIS_CLUSTER_REDIR_MOVED.
+ * 'error_code', which will be set to CLUSTER_REDIR_ASK or
+ * CLUSTER_REDIR_MOVED.
*
- * When the node is 'myself' 'error_code' is set to REDIS_CLUSTER_REDIR_NONE.
+ * When the node is 'myself' 'error_code' is set to CLUSTER_REDIR_NONE.
*
* If the command fails NULL is returned, and the reason of the failure is
* provided via 'error_code', which will be set to:
*
- * REDIS_CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that
+ * CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that
* don't belong to the same hash slot.
*
- * REDIS_CLUSTER_REDIR_UNSTABLE if the request contains mutliple keys
+ * CLUSTER_REDIR_UNSTABLE if the request contains multiple keys
* belonging to the same slot, but the slot is not stable (in migration or
- * importing state, likely because a resharding is in progress). */
-clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
+ * importing state, likely because a resharding is in progress).
+ *
+ * CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is
+ * not bound to any node. In this case the cluster global state should be
+ * already "down" but it is fragile to rely on the update of the global state,
+ * so we also handle it here.
+ *
+ * CLUSTER_REDIR_DOWN_STATE if the cluster is down but the user attempts to
+ * execute a command that addresses one or more keys. */
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
clusterNode *n = NULL;
robj *firstkey = NULL;
int multiple_keys = 0;
@@ -4471,14 +5219,14 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0;
/* Set error code optimistically for the base case. */
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_NONE;
+ if (error_code) *error_code = CLUSTER_REDIR_NONE;
/* We handle all the cases as if they were EXEC commands, so we have
* a common code path for everything */
if (cmd->proc == execCommand) {
- /* If REDIS_MULTI flag is not set EXEC is just going to return an
+ /* If CLIENT_MULTI flag is not set EXEC is just going to return an
* error. */
- if (!(c->flags & REDIS_MULTI)) return myself;
+ if (!(c->flags & CLIENT_MULTI)) return myself;
ms = &c->mstate;
} else {
/* In order to have a single codepath create a fake Multi State
@@ -4515,7 +5263,18 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
firstkey = thiskey;
slot = thisslot;
n = server.cluster->slots[slot];
- redisAssertWithInfo(c,firstkey,n != NULL);
+
+ /* Error: If a slot is not served, we are in "cluster down"
+ * state. However the state is yet to be updated, so this was
+ * not trapped earlier in processCommand(). Report the same
+ * error to the client. */
+ if (n == NULL) {
+ getKeysFreeResult(keyindex);
+ if (error_code)
+ *error_code = CLUSTER_REDIR_DOWN_UNBOUND;
+ return NULL;
+ }
+
/* If we are migrating or importing this slot, we need to check
* if we have all the keys in the request (the only way we
* can safely serve the request, otherwise we return a TRYAGAIN
@@ -4536,7 +5295,7 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Error: multiple keys from different slots. */
getKeysFreeResult(keyindex);
if (error_code)
- *error_code = REDIS_CLUSTER_REDIR_CROSS_SLOT;
+ *error_code = CLUSTER_REDIR_CROSS_SLOT;
return NULL;
} else {
/* Flag this request as one with multiple different
@@ -4557,19 +5316,28 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
}
/* No key at all in command? then we can serve the request
- * without redirections or errors. */
+ * without redirections or errors in all the cases. */
if (n == NULL) return myself;
+ /* Cluster is globally down but we got keys? We can't serve the request. */
+ if (server.cluster->state != CLUSTER_OK) {
+ if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
+ return NULL;
+ }
+
/* Return the hashslot by reference. */
if (hashslot) *hashslot = slot;
- /* This request is about a slot we are migrating into another instance?
- * Then if we have all the keys. */
+ /* MIGRATE always works in the context of the local node if the slot
+ * is open (migrating or importing state). We need to be able to freely
+ * move keys among instances in this case. */
+ if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand)
+ return myself;
/* If we don't have all the keys and we are migrating the slot, send
* an ASK redirection. */
if (migrating_slot && missing_keys) {
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_ASK;
+ if (error_code) *error_code = CLUSTER_REDIR_ASK;
return server.cluster->migrating_slots_to[slot];
}
@@ -4578,10 +5346,10 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
* involves multiple keys and we don't have them all, the only option is
* to send a TRYAGAIN error. */
if (importing_slot &&
- (c->flags & REDIS_ASKING || cmd->flags & REDIS_CMD_ASKING))
+ (c->flags & CLIENT_ASKING || cmd->flags & CMD_ASKING))
{
if (multiple_keys && missing_keys) {
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_UNSTABLE;
+ if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE;
return NULL;
} else {
return myself;
@@ -4591,8 +5359,8 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Handle the read-only client case reading from a slave: if this
* node is a slave and the request is about an hash slot our master
* is serving, we can reply without redirection. */
- if (c->flags & REDIS_READONLY &&
- cmd->flags & REDIS_CMD_READONLY &&
+ if (c->flags & CLIENT_READONLY &&
+ cmd->flags & CMD_READONLY &&
nodeIsSlave(myself) &&
myself->slaveof == n)
{
@@ -4601,6 +5369,88 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Base case: just return the right node. However if this node is not
* myself, set error_code to MOVED since we need to issue a rediretion. */
- if (n != myself && error_code) *error_code = REDIS_CLUSTER_REDIR_MOVED;
+ if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED;
return n;
}
+
+/* Send the client the right redirection code, according to error_code
+ * that should be set to one of CLUSTER_REDIR_* macros.
+ *
+ * If CLUSTER_REDIR_ASK or CLUSTER_REDIR_MOVED error codes
+ * are used, then the node 'n' should not be NULL, but should be the
+ * node we want to mention in the redirection. Moreover hashslot should
+ * be set to the hash slot that caused the redirection. */
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) {
+ if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
+ addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
+ } else if (error_code == CLUSTER_REDIR_UNSTABLE) {
+ /* The request spawns mutliple keys in the same slot,
+ * but the slot is not "stable" currently as there is
+ * a migration or import in progress. */
+ addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n"));
+ } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n"));
+ } else if (error_code == CLUSTER_REDIR_MOVED ||
+ error_code == CLUSTER_REDIR_ASK)
+ {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-%s %d %s:%d\r\n",
+ (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
+ hashslot,n->ip,n->port));
+ } else {
+ serverPanic("getNodeByQuery() unknown error.");
+ }
+}
+
+/* This function is called by the function processing clients incrementally
+ * to detect timeouts, in order to handle the following case:
+ *
+ * 1) A client blocks with BLPOP or similar blocking operation.
+ * 2) The master migrates the hash slot elsewhere or turns into a slave.
+ * 3) The client may remain blocked forever (or up to the max timeout time)
+ * waiting for a key change that will never happen.
+ *
+ * If the client is found to be blocked into an hash slot this node no
+ * longer handles, the client is sent a redirection error, and the function
+ * returns 1. Otherwise 0 is returned and no operation is performed. */
+int clusterRedirectBlockedClientIfNeeded(client *c) {
+ if (c->flags & CLIENT_BLOCKED && c->btype == BLOCKED_LIST) {
+ dictEntry *de;
+ dictIterator *di;
+
+ /* If the cluster is down, unblock the client with the right error. */
+ if (server.cluster->state == CLUSTER_FAIL) {
+ clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE);
+ return 1;
+ }
+
+ /* All keys must belong to the same slot, so check first key only. */
+ di = dictGetIterator(c->bpop.keys);
+ if ((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr));
+ clusterNode *node = server.cluster->slots[slot];
+
+ /* We send an error and unblock the client if:
+ * 1) The slot is unassigned, emitting a cluster down error.
+ * 2) The slot is not handled by this node, nor being imported. */
+ if (node != myself &&
+ server.cluster->importing_slots_from[slot] == NULL)
+ {
+ if (node == NULL) {
+ clusterRedirectClient(c,NULL,0,
+ CLUSTER_REDIR_DOWN_UNBOUND);
+ } else {
+ clusterRedirectClient(c,node,slot,
+ CLUSTER_REDIR_MOVED);
+ }
+ dictReleaseIterator(di);
+ return 1;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ return 0;
+}
diff --git a/src/cluster.h b/src/cluster.h
index 96072cd91..af85841c9 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -1,34 +1,38 @@
-#ifndef __REDIS_CLUSTER_H
-#define __REDIS_CLUSTER_H
+#ifndef __CLUSTER_H
+#define __CLUSTER_H
/*-----------------------------------------------------------------------------
* Redis cluster data structures, defines, exported API.
*----------------------------------------------------------------------------*/
-#define REDIS_CLUSTER_SLOTS 16384
-#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
-#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
-#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
-#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
+#define CLUSTER_SLOTS 16384
+#define CLUSTER_OK 0 /* Everything looks ok */
+#define CLUSTER_FAIL 1 /* The cluster can't work */
+#define CLUSTER_NAMELEN 40 /* sha1 hex length */
+#define CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
-/* The following defines are amunt of time, sometimes expressed as
+/* The following defines are amount of time, sometimes expressed as
* multiplicators of the node timeout value (when ending with MULT). */
-#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15000
-#define REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
-#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
-#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
-#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
-#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
-#define REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER 1
-#define REDIS_CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
-#define REDIS_CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_DEFAULT_NODE_TIMEOUT 15000
+#define CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
+#define CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1
+#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
+#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
+#define CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
+#define CLUSTER_FAILOVER_DELAY 5 /* Seconds */
+#define CLUSTER_DEFAULT_MIGRATION_BARRIER 1
+#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
+#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
/* Redirection errors returned by getNodeByQuery(). */
-#define REDIS_CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
-#define REDIS_CLUSTER_REDIR_CROSS_SLOT 1 /* Keys in different slots. */
-#define REDIS_CLUSTER_REDIR_UNSTABLE 2 /* Keys in slot resharding. */
-#define REDIS_CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
-#define REDIS_CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
+#define CLUSTER_REDIR_CROSS_SLOT 1 /* -CROSSSLOT request. */
+#define CLUSTER_REDIR_UNSTABLE 2 /* -TRYAGAIN redirection required */
+#define CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
+#define CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define CLUSTER_REDIR_DOWN_STATE 5 /* -CLUSTERDOWN, global state. */
+#define CLUSTER_REDIR_DOWN_UNBOUND 6 /* -CLUSTERDOWN, unbound slot. */
struct clusterNode;
@@ -42,71 +46,109 @@ typedef struct clusterLink {
} clusterLink;
/* Cluster node flags and macros. */
-#define REDIS_NODE_MASTER 1 /* The node is a master */
-#define REDIS_NODE_SLAVE 2 /* The node is a slave */
-#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
-#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
-#define REDIS_NODE_MYSELF 16 /* This node is myself */
-#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
-#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
-#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
-#define REDIS_NODE_PROMOTED 256 /* Master was a slave propoted by failover */
-#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
-
-#define nodeIsMaster(n) ((n)->flags & REDIS_NODE_MASTER)
-#define nodeIsSlave(n) ((n)->flags & REDIS_NODE_SLAVE)
-#define nodeInHandshake(n) ((n)->flags & REDIS_NODE_HANDSHAKE)
-#define nodeHasAddr(n) (!((n)->flags & REDIS_NODE_NOADDR))
-#define nodeWithoutAddr(n) ((n)->flags & REDIS_NODE_NOADDR)
-#define nodeTimedOut(n) ((n)->flags & REDIS_NODE_PFAIL)
-#define nodeFailed(n) ((n)->flags & REDIS_NODE_FAIL)
+#define CLUSTER_NODE_MASTER 1 /* The node is a master */
+#define CLUSTER_NODE_SLAVE 2 /* The node is a slave */
+#define CLUSTER_NODE_PFAIL 4 /* Failure? Need acknowledge */
+#define CLUSTER_NODE_FAIL 8 /* The node is believed to be malfunctioning */
+#define CLUSTER_NODE_MYSELF 16 /* This node is myself */
+#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
+#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
+#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
+#define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */
+#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+
+#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
+#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
+#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
+#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
+#define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR)
+#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
+#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
+
+/* Reasons why a slave is not able to failover. */
+#define CLUSTER_CANT_FAILOVER_NONE 0
+#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
+#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
+#define CLUSTER_CANT_FAILOVER_EXPIRED 3
+#define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */
+
+/* clusterState todo_before_sleep flags. */
+#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
+#define CLUSTER_TODO_UPDATE_STATE (1<<1)
+#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
+#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
+
+/* Message types.
+ *
+ * Note that the PING, PONG and MEET messages are actually the same exact
+ * kind of packet. PONG is the reply to ping, in the exact format as a PING,
+ * while MEET is a special PING that forces the receiver to add the sender
+ * as a node (if it is not already in the list). */
+#define CLUSTERMSG_TYPE_PING 0 /* Ping */
+#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
+#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
+#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
+#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */
+#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */
+#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */
+#define CLUSTERMSG_TYPE_COUNT 9 /* Total number of message types. */
/* This structure represent elements of node->fail_reports. */
-struct clusterNodeFailReport {
+typedef struct clusterNodeFailReport {
struct clusterNode *node; /* Node reporting the failure condition. */
mstime_t time; /* Time of the last report from this node. */
-} typedef clusterNodeFailReport;
+} clusterNodeFailReport;
-struct clusterNode {
+typedef struct clusterNode {
mstime_t ctime; /* Node object creation time. */
- char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
- int flags; /* REDIS_NODE_... */
+ char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
+ int flags; /* CLUSTER_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
- unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */
- struct clusterNode *slaveof; /* pointer to the master node */
+ struct clusterNode *slaveof; /* pointer to the master node. Note that it
+ may be NULL even if the node is a slave
+ if we don't have the master node in our
+ tables. */
mstime_t ping_sent; /* Unix time we sent latest ping */
mstime_t pong_received; /* Unix time we received the pong */
mstime_t fail_time; /* Unix time when FAIL flag was set */
mstime_t voted_time; /* Last time we voted for a slave of this master */
mstime_t repl_offset_time; /* Unix time we received offset for this node */
+ mstime_t orphaned_time; /* Starting time of orphaned master condition */
long long repl_offset; /* Last known repl offset for this node. */
- char ip[REDIS_IP_STR_LEN]; /* Latest known IP address of this node */
- int port; /* Latest known port of this node */
+ char ip[NET_IP_STR_LEN]; /* Latest known IP address of this node */
+ int port; /* Latest known clients port of this node */
+ int cport; /* Latest known cluster port of this node. */
clusterLink *link; /* TCP/IP link with this node */
list *fail_reports; /* List of nodes signaling this as failing */
-};
-typedef struct clusterNode clusterNode;
+} clusterNode;
typedef struct clusterState {
clusterNode *myself; /* This node */
uint64_t currentEpoch;
- int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
+ int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */
int size; /* Num of master nodes with at least one slot */
dict *nodes; /* Hash table of name -> clusterNode structures */
dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
- clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
- clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
- clusterNode *slots[REDIS_CLUSTER_SLOTS];
- zskiplist *slots_to_keys;
+ clusterNode *migrating_slots_to[CLUSTER_SLOTS];
+ clusterNode *importing_slots_from[CLUSTER_SLOTS];
+ clusterNode *slots[CLUSTER_SLOTS];
+ uint64_t slots_keys_count[CLUSTER_SLOTS];
+ rax *slots_to_keys;
/* The following fields are used to take the slave state on elections. */
mstime_t failover_auth_time; /* Time of previous or next election. */
int failover_auth_count; /* Number of votes received so far. */
int failover_auth_sent; /* True if we already asked for votes. */
int failover_auth_rank; /* This slave rank for current auth request. */
uint64_t failover_auth_epoch; /* Epoch of the current election. */
+ int cant_failover_reason; /* Why a slave is currently not able to
+ failover. See the CANT_FAILOVER_* macros. */
/* Manual failover state in common. */
mstime_t mf_end; /* Manual failover time limit (ms unixtime).
It is zero if there is no MF in progress. */
@@ -117,62 +159,49 @@ typedef struct clusterState {
or zero if stil not received. */
int mf_can_start; /* If non-zero signal that the manual failover
can start requesting masters vote. */
- /* The followign fields are uesd by masters to take state on elections. */
+ /* The followign fields are used by masters to take state on elections. */
uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */
int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
- long long stats_bus_messages_sent; /* Num of msg sent via cluster bus. */
- long long stats_bus_messages_received; /* Num of msg rcvd via cluster bus.*/
+ /* Messages received and sent by type. */
+ long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
+ long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
+ long long stats_pfail_nodes; /* Number of nodes in PFAIL status,
+ excluding nodes without address. */
} clusterState;
-/* clusterState todo_before_sleep flags. */
-#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
-#define CLUSTER_TODO_UPDATE_STATE (1<<1)
-#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
-#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
-
/* Redis cluster messages header */
-/* Note that the PING, PONG and MEET messages are actually the same exact
- * kind of packet. PONG is the reply to ping, in the exact format as a PING,
- * while MEET is a special PING that forces the receiver to add the sender
- * as a node (if it is not already in the list). */
-#define CLUSTERMSG_TYPE_PING 0 /* Ping */
-#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
-#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
-#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
-#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */
-#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */
-#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */
-
/* Initially we don't know our "name", but we'll find it once we connect
* to the first node, using the getsockname() function. Then we'll use this
* address for all the next messages. */
typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
+ char nodename[CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
- char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
- uint16_t port; /* port last time it was seen */
- uint16_t flags;
- uint32_t notused; /* for 64 bit alignment */
+ char ip[NET_IP_STR_LEN]; /* IP address last time it was seen */
+ uint16_t port; /* base port last time it was seen */
+ uint16_t cport; /* cluster port last time it was seen */
+ uint16_t flags; /* node->flags copy */
+ uint32_t notused1;
} clusterMsgDataGossip;
typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
+ char nodename[CLUSTER_NAMELEN];
} clusterMsgDataFail;
typedef struct {
uint32_t channel_len;
uint32_t message_len;
- unsigned char bulk_data[8]; /* defined as 8 just for alignment concerns. */
+ /* We can't reclare bulk_data as bulk_data[] since this structure is
+ * nested. The 8 bytes are removed from the count during the message
+ * length computation. */
+ unsigned char bulk_data[8];
} clusterMsgDataPublish;
typedef struct {
uint64_t configEpoch; /* Config epoch of the specified instance. */
- char nodename[REDIS_CLUSTER_NAMELEN]; /* Name of the slots owner. */
- unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* Slots bitmap. */
+ char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* Slots bitmap. */
} clusterMsgDataUpdate;
union clusterMsgData {
@@ -198,12 +227,13 @@ union clusterMsgData {
} update;
};
+#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
typedef struct {
char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
uint32_t totlen; /* Total length of this message */
- uint16_t ver; /* Protocol version, currently set to 0. */
- uint16_t notused0; /* 2 bytes not used. */
+ uint16_t ver; /* Protocol version, currently set to 1. */
+ uint16_t port; /* TCP base port number. */
uint16_t type; /* Message type */
uint16_t count; /* Only used for some kind of messages. */
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
@@ -212,12 +242,13 @@ typedef struct {
slave. */
uint64_t offset; /* Master replication offset if node is a master or
processed replication offset if node is a slave. */
- char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
- unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
- char slaveof[REDIS_CLUSTER_NAMELEN];
- char notused1[32]; /* 32 bytes reserved for future usage. */
- uint16_t port; /* Sender TCP base port */
- uint16_t flags; /* Sender node flags */
+ char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
+ unsigned char myslots[CLUSTER_SLOTS/8];
+ char slaveof[CLUSTER_NAMELEN];
+ char myip[NET_IP_STR_LEN]; /* Sender IP, if not all zeroed. */
+ char notused1[34]; /* 34 bytes reserved for future usage. */
+ uint16_t cport; /* Sender TCP cluster bus port */
+ uint16_t flags; /* Sender node flags */
unsigned char state; /* Cluster state from the POV of the sender */
unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
union clusterMsgData data;
@@ -232,6 +263,8 @@ typedef struct {
master is up. */
/* ---------------------- API exported outside cluster.c -------------------- */
-clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+int clusterRedirectBlockedClientIfNeeded(client *c);
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
-#endif /* __REDIS_CLUSTER_H */
+#endif /* __CLUSTER_H */
diff --git a/src/config.c b/src/config.c
index b0fc50b9d..06d869be1 100644
--- a/src/config.c
+++ b/src/config.c
@@ -28,16 +28,34 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include <fcntl.h>
#include <sys/stat.h>
-static struct {
- const char *name;
- const int value;
-} validSyslogFacilities[] = {
+/*-----------------------------------------------------------------------------
+ * Config file name-value maps.
+ *----------------------------------------------------------------------------*/
+
+typedef struct configEnum {
+ const char *name;
+ const int val;
+} configEnum;
+
+configEnum maxmemory_policy_enum[] = {
+ {"volatile-lru", MAXMEMORY_VOLATILE_LRU},
+ {"volatile-lfu", MAXMEMORY_VOLATILE_LFU},
+ {"volatile-random",MAXMEMORY_VOLATILE_RANDOM},
+ {"volatile-ttl",MAXMEMORY_VOLATILE_TTL},
+ {"allkeys-lru",MAXMEMORY_ALLKEYS_LRU},
+ {"allkeys-lfu",MAXMEMORY_ALLKEYS_LFU},
+ {"allkeys-random",MAXMEMORY_ALLKEYS_RANDOM},
+ {"noeviction",MAXMEMORY_NO_EVICTION},
+ {NULL, 0}
+};
+
+configEnum syslog_facility_enum[] = {
{"user", LOG_USER},
{"local0", LOG_LOCAL0},
{"local1", LOG_LOCAL1},
@@ -50,13 +68,71 @@ static struct {
{NULL, 0}
};
-clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
+configEnum loglevel_enum[] = {
+ {"debug", LL_DEBUG},
+ {"verbose", LL_VERBOSE},
+ {"notice", LL_NOTICE},
+ {"warning", LL_WARNING},
+ {NULL,0}
+};
+
+configEnum supervised_mode_enum[] = {
+ {"upstart", SUPERVISED_UPSTART},
+ {"systemd", SUPERVISED_SYSTEMD},
+ {"auto", SUPERVISED_AUTODETECT},
+ {"no", SUPERVISED_NONE},
+ {NULL, 0}
+};
+
+configEnum aof_fsync_enum[] = {
+ {"everysec", AOF_FSYNC_EVERYSEC},
+ {"always", AOF_FSYNC_ALWAYS},
+ {"no", AOF_FSYNC_NO},
+ {NULL, 0}
+};
+
+/* Output buffer limits presets. */
+clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
{0, 0, 0}, /* normal */
{1024*1024*256, 1024*1024*64, 60}, /* slave */
{1024*1024*32, 1024*1024*8, 60} /* pubsub */
};
/*-----------------------------------------------------------------------------
+ * Enum access functions
+ *----------------------------------------------------------------------------*/
+
+/* Get enum value from name. If there is no match INT_MIN is returned. */
+int configEnumGetValue(configEnum *ce, char *name) {
+ while(ce->name != NULL) {
+ if (!strcasecmp(ce->name,name)) return ce->val;
+ ce++;
+ }
+ return INT_MIN;
+}
+
+/* Get enum name from value. If no match is found NULL is returned. */
+const char *configEnumGetName(configEnum *ce, int val) {
+ while(ce->name != NULL) {
+ if (ce->val == val) return ce->name;
+ ce++;
+ }
+ return NULL;
+}
+
+/* Wrapper for configEnumGetName() returning "unknown" insetad of NULL if
+ * there is no match. */
+const char *configEnumGetNameOrUnknown(configEnum *ce, int val) {
+ const char *name = configEnumGetName(ce,val);
+ return name ? name : "unknown";
+}
+
+/* Used for INFO generation. */
+const char *evictPolicyToString(void) {
+ return configEnumGetNameOrUnknown(maxmemory_policy_enum,server.maxmemory_policy);
+}
+
+/*-----------------------------------------------------------------------------
* Config file parsing
*----------------------------------------------------------------------------*/
@@ -73,12 +149,26 @@ void appendServerSaveParams(time_t seconds, int changes) {
server.saveparamslen++;
}
-void resetServerSaveParams() {
+void resetServerSaveParams(void) {
zfree(server.saveparams);
server.saveparams = NULL;
server.saveparamslen = 0;
}
+void queueLoadModule(sds path, sds *argv, int argc) {
+ int i;
+ struct moduleLoadQueueEntry *loadmod;
+
+ loadmod = zmalloc(sizeof(struct moduleLoadQueueEntry));
+ loadmod->argv = zmalloc(sizeof(robj*)*argc);
+ loadmod->path = sdsnew(path);
+ loadmod->argc = argc;
+ for (i = 0; i < argc; i++) {
+ loadmod->argv[i] = createRawStringObject(argv[i],sdslen(argv[i]));
+ }
+ listAddNodeTail(server.loadmodule_queue,loadmod);
+}
+
void loadServerConfigFromString(char *config) {
char *err = NULL;
int linenum = 0, totlines, i;
@@ -122,6 +212,10 @@ void loadServerConfigFromString(char *config) {
if (server.tcpkeepalive < 0) {
err = "Invalid tcp-keepalive value"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"protected-mode") && argc == 2) {
+ if ((server.protected_mode = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"port") && argc == 2) {
server.port = atoi(argv[1]);
if (server.port < 0 || server.port > 65535) {
@@ -135,7 +229,7 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"bind") && argc >= 2) {
int j, addresses = argc-1;
- if (addresses > REDIS_BINDADDR_MAX) {
+ if (addresses > CONFIG_BINDADDR_MAX) {
err = "Too many bind addresses specified"; goto loaderr;
}
for (j = 0; j < addresses; j++)
@@ -162,17 +256,15 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"dir") && argc == 2) {
if (chdir(argv[1]) == -1) {
- redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
+ serverLog(LL_WARNING,"Can't chdir to '%s': %s",
argv[1], strerror(errno));
exit(1);
}
} else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
- if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
- else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
- else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
- else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
- else {
- err = "Invalid log level. Must be one of debug, notice, warning";
+ server.verbosity = configEnumGetValue(loglevel_enum,argv[1]);
+ if (server.verbosity == INT_MIN) {
+ err = "Invalid log level. "
+ "Must be one of debug, verbose, notice, warning";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
@@ -191,6 +283,10 @@ void loadServerConfigFromString(char *config) {
}
fclose(logfp);
}
+ } else if (!strcasecmp(argv[0],"always-show-logo") && argc == 2) {
+ if ((server.always_show_logo = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"syslog-enabled") && argc == 2) {
if ((server.syslog_enabled = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@@ -199,16 +295,9 @@ void loadServerConfigFromString(char *config) {
if (server.syslog_ident) zfree(server.syslog_ident);
server.syslog_ident = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"syslog-facility") && argc == 2) {
- int i;
-
- for (i = 0; validSyslogFacilities[i].name; i++) {
- if (!strcasecmp(validSyslogFacilities[i].name, argv[1])) {
- server.syslog_facility = validSyslogFacilities[i].value;
- break;
- }
- }
-
- if (!validSyslogFacilities[i].name) {
+ server.syslog_facility =
+ configEnumGetValue(syslog_facility_enum,argv[1]);
+ if (server.syslog_facility == INT_MIN) {
err = "Invalid log facility. Must be one of USER or between LOCAL0-LOCAL7";
goto loaderr;
}
@@ -227,19 +316,9 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
server.maxmemory = memtoll(argv[1],NULL);
} else if (!strcasecmp(argv[0],"maxmemory-policy") && argc == 2) {
- if (!strcasecmp(argv[1],"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(argv[1],"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(argv[1],"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(argv[1],"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(argv[1],"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(argv[1],"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
+ server.maxmemory_policy =
+ configEnumGetValue(maxmemory_policy_enum,argv[1]);
+ if (server.maxmemory_policy == INT_MIN) {
err = "Invalid maxmemory policy";
goto loaderr;
}
@@ -249,11 +328,23 @@ void loadServerConfigFromString(char *config) {
err = "maxmemory-samples must be 1 or greater";
goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"lfu-log-factor") && argc == 2) {
+ server.lfu_log_factor = atoi(argv[1]);
+ if (server.maxmemory_samples < 0) {
+ err = "lfu-log-factor must be 0 or greater";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lfu-decay-time") && argc == 2) {
+ server.lfu_decay_time = atoi(argv[1]);
+ if (server.maxmemory_samples < 1) {
+ err = "lfu-decay-time must be 0 or greater";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
slaveof_linenum = linenum;
server.masterhost = sdsnew(argv[1]);
server.masterport = atoi(argv[2]);
- server.repl_state = REDIS_REPL_CONNECT;
+ server.repl_state = REPL_STATE_CONNECT;
} else if (!strcasecmp(argv[0],"repl-ping-slave-period") && argc == 2) {
server.repl_ping_slave_period = atoi(argv[1]);
if (server.repl_ping_slave_period <= 0) {
@@ -270,6 +361,16 @@ void loadServerConfigFromString(char *config) {
if ((server.repl_disable_tcp_nodelay = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"repl-diskless-sync") && argc==2) {
+ if ((server.repl_diskless_sync = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"repl-diskless-sync-delay") && argc==2) {
+ server.repl_diskless_sync_delay = atoi(argv[1]);
+ if (server.repl_diskless_sync_delay < 0) {
+ err = "repl-diskless-sync-delay can't be negative";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"repl-backlog-size") && argc == 2) {
long long size = memtoll(argv[1],NULL);
if (size <= 0) {
@@ -284,7 +385,8 @@ void loadServerConfigFromString(char *config) {
goto loaderr;
}
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
- server.masterauth = zstrdup(argv[1]);
+ zfree(server.masterauth);
+ server.masterauth = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"slave-serve-stale-data") && argc == 2) {
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@@ -305,21 +407,41 @@ void loadServerConfigFromString(char *config) {
if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-eviction") && argc == 2) {
+ if ((server.lazyfree_lazy_eviction = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-expire") && argc == 2) {
+ if ((server.lazyfree_lazy_expire = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-server-del") && argc == 2){
+ if ((server.lazyfree_lazy_server_del = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"slave-lazy-flush") && argc == 2) {
+ if ((server.repl_slave_lazy_flush = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"activedefrag") && argc == 2) {
+ if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"hz") && argc == 2) {
server.hz = atoi(argv[1]);
- if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
- if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
+ if (server.hz < CONFIG_MIN_HZ) server.hz = CONFIG_MIN_HZ;
+ if (server.hz > CONFIG_MAX_HZ) server.hz = CONFIG_MAX_HZ;
} else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
int yes;
if ((yes = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
- server.aof_state = yes ? REDIS_AOF_ON : REDIS_AOF_OFF;
+ server.aof_state = yes ? AOF_ON : AOF_OFF;
} else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
if (!pathIsBaseName(argv[1])) {
err = "appendfilename can't be a path, just a filename";
@@ -333,13 +455,8 @@ void loadServerConfigFromString(char *config) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
- if (!strcasecmp(argv[1],"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(argv[1],"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else if (!strcasecmp(argv[1],"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else {
+ server.aof_fsync = configEnumGetValue(aof_fsync_enum,argv[1]);
+ if (server.aof_fsync == INT_MIN) {
err = "argument must be 'no', 'always' or 'everysec'";
goto loaderr;
}
@@ -358,12 +475,21 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"aof-rewrite-incremental-fsync") &&
argc == 2)
{
- if ((server.aof_rewrite_incremental_fsync = yesnotoi(argv[1])) == -1) {
+ if ((server.aof_rewrite_incremental_fsync =
+ yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) {
+ if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"aof-use-rdb-preamble") && argc == 2) {
+ if ((server.aof_use_rdb_preamble = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
- if (strlen(argv[1]) > REDIS_AUTHPASS_MAX_LEN) {
- err = "Password is longer than REDIS_AUTHPASS_MAX_LEN";
+ if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) {
+ err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
goto loaderr;
}
server.requirepass = zstrdup(argv[1]);
@@ -377,14 +503,48 @@ void loadServerConfigFromString(char *config) {
}
zfree(server.rdb_filename);
server.rdb_filename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"active-defrag-threshold-lower") && argc == 2) {
+ server.active_defrag_threshold_lower = atoi(argv[1]);
+ if (server.active_defrag_threshold_lower < 0) {
+ err = "active-defrag-threshold-lower must be 0 or greater";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-threshold-upper") && argc == 2) {
+ server.active_defrag_threshold_upper = atoi(argv[1]);
+ if (server.active_defrag_threshold_upper < 0) {
+ err = "active-defrag-threshold-upper must be 0 or greater";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-ignore-bytes") && argc == 2) {
+ server.active_defrag_ignore_bytes = memtoll(argv[1], NULL);
+ if (server.active_defrag_ignore_bytes <= 0) {
+ err = "active-defrag-ignore-bytes must above 0";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-cycle-min") && argc == 2) {
+ server.active_defrag_cycle_min = atoi(argv[1]);
+ if (server.active_defrag_cycle_min < 1 || server.active_defrag_cycle_min > 99) {
+ err = "active-defrag-cycle-min must be between 1 and 99";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-cycle-max") && argc == 2) {
+ server.active_defrag_cycle_max = atoi(argv[1]);
+ if (server.active_defrag_cycle_max < 1 || server.active_defrag_cycle_max > 99) {
+ err = "active-defrag-cycle-max must be between 1 and 99";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) {
server.hash_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) {
server.hash_max_ziplist_value = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
- server.list_max_ziplist_entries = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) {
- server.list_max_ziplist_value = memtoll(argv[1], NULL);
+ /* DEAD OPTION */
+ } else if (!strcasecmp(argv[0],"list-max-ziplist-size") && argc == 2) {
+ server.list_max_ziplist_size = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"list-compress-depth") && argc == 2) {
+ server.list_compress_depth = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"set-max-intset-entries") && argc == 2) {
server.set_max_intset_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"zset-max-ziplist-entries") && argc == 2) {
@@ -405,7 +565,7 @@ void loadServerConfigFromString(char *config) {
/* If the target command name is the empty string we just
* remove it from the command table. */
retval = dictDelete(server.commands, argv[1]);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
/* Otherwise we re-add the command under a different name. */
if (sdslen(argv[2]) != 0) {
@@ -424,6 +584,32 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"cluster-config-file") && argc == 2) {
zfree(server.cluster_configfile);
server.cluster_configfile = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"cluster-announce-ip") && argc == 2) {
+ zfree(server.cluster_announce_ip);
+ server.cluster_announce_ip = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"cluster-announce-port") && argc == 2) {
+ server.cluster_announce_port = atoi(argv[1]);
+ if (server.cluster_announce_port < 0 ||
+ server.cluster_announce_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"cluster-announce-bus-port") &&
+ argc == 2)
+ {
+ server.cluster_announce_bus_port = atoi(argv[1]);
+ if (server.cluster_announce_bus_port < 0 ||
+ server.cluster_announce_bus_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"cluster-require-full-coverage") &&
+ argc == 2)
+ {
+ if ((server.cluster_require_full_coverage = yesnotoi(argv[1])) == -1)
+ {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"cluster-node-timeout") && argc == 2) {
server.cluster_node_timeout = strtoll(argv[1],NULL,10);
if (server.cluster_node_timeout <= 0) {
@@ -468,8 +654,9 @@ void loadServerConfigFromString(char *config) {
unsigned long long hard, soft;
int soft_seconds;
- if (class == -1) {
- err = "Unrecognized client limit class";
+ if (class == -1 || class == CLIENT_TYPE_MASTER) {
+ err = "Unrecognized client limit class: the user specified "
+ "an invalid one, or 'master' which has no buffer limits.";
goto loaderr;
}
hard = memtoll(argv[2],NULL);
@@ -489,6 +676,16 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"slave-priority") && argc == 2) {
server.slave_priority = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"slave-announce-ip") && argc == 2) {
+ zfree(server.slave_announce_ip);
+ server.slave_announce_ip = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"slave-announce-port") && argc == 2) {
+ server.slave_announce_port = atoi(argv[1]);
+ if (server.slave_announce_port < 0 ||
+ server.slave_announce_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"min-slaves-to-write") && argc == 2) {
server.repl_min_slaves_to_write = atoi(argv[1]);
if (server.repl_min_slaves_to_write < 0) {
@@ -507,6 +704,17 @@ void loadServerConfigFromString(char *config) {
goto loaderr;
}
server.notify_keyspace_events = flags;
+ } else if (!strcasecmp(argv[0],"supervised") && argc == 2) {
+ server.supervised_mode =
+ configEnumGetValue(supervised_mode_enum,argv[1]);
+
+ if (server.supervised_mode == INT_MIN) {
+ err = "Invalid option for 'supervised'. "
+ "Allowed values: 'upstart', 'systemd', 'auto', or 'no'";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) {
+ queueLoadModule(argv[1],&argv[2],argc-2);
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@@ -552,7 +760,7 @@ loaderr:
* just load a string. */
void loadServerConfig(char *filename, char *options) {
sds config = sdsempty();
- char buf[REDIS_CONFIGLINE_MAX+1];
+ char buf[CONFIG_MAX_LINE+1];
/* Load the file content */
if (filename) {
@@ -562,12 +770,12 @@ void loadServerConfig(char *filename, char *options) {
fp = stdin;
} else {
if ((fp = fopen(filename,"r")) == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Fatal error, can't open config file '%s'", filename);
exit(1);
}
}
- while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL)
+ while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL)
config = sdscat(config,buf);
if (fp != stdin) fclose(fp);
}
@@ -584,41 +792,68 @@ void loadServerConfig(char *filename, char *options) {
* CONFIG SET implementation
*----------------------------------------------------------------------------*/
-void configSetCommand(redisClient *c) {
+#define config_set_bool_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int yn = yesnotoi(o->ptr); \
+ if (yn == -1) goto badfmt; \
+ _var = yn;
+
+#define config_set_numerical_field(_name,_var,min,max) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ if (getLongLongFromObject(o,&ll) == C_ERR) goto badfmt; \
+ if (min != LLONG_MIN && ll < min) goto badfmt; \
+ if (max != LLONG_MAX && ll > max) goto badfmt; \
+ _var = ll;
+
+#define config_set_memory_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ ll = memtoll(o->ptr,&err); \
+ if (err || ll < 0) goto badfmt; \
+ _var = ll;
+
+#define config_set_enum_field(_name,_var,_enumvar) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int enumval = configEnumGetValue(_enumvar,o->ptr); \
+ if (enumval == INT_MIN) goto badfmt; \
+ _var = enumval;
+
+#define config_set_special_field(_name) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) {
+
+#define config_set_else } else
+
+void configSetCommand(client *c) {
robj *o;
long long ll;
- redisAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
- redisAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
+ int err;
+ serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
+ serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
- if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
+ if (0) { /* this starts the config_set macros else-if chain. */
+
+ /* Special fields that can't be handled with general macros. */
+ config_set_special_field("dbfilename") {
if (!pathIsBaseName(o->ptr)) {
addReplyError(c, "dbfilename can't be a path, just a filename");
return;
}
zfree(server.rdb_filename);
server.rdb_filename = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
- if (sdslen(o->ptr) > REDIS_AUTHPASS_MAX_LEN) goto badfmt;
+ } config_set_special_field("requirepass") {
+ if (sdslen(o->ptr) > CONFIG_AUTHPASS_MAX_LEN) goto badfmt;
zfree(server.requirepass);
server.requirepass = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
+ } config_set_special_field("masterauth") {
zfree(server.masterauth);
server.masterauth = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.maxmemory = ll;
- if (server.maxmemory) {
- if (server.maxmemory < zmalloc_used_memory()) {
- redisLog(REDIS_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
- }
- freeMemoryIfNeeded();
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxclients")) {
+ } config_set_special_field("cluster-announce-ip") {
+ zfree(server.cluster_announce_ip);
+ server.cluster_announce_ip = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
+ } config_set_special_field("maxclients") {
int orig_value = server.maxclients;
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 1) goto badfmt;
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll < 1) goto badfmt;
/* Try to check if the OS is capable of supporting so many FDs. */
server.maxclients = ll;
@@ -629,11 +864,11 @@ void configSetCommand(redisClient *c) {
server.maxclients = orig_value;
return;
}
- if (aeGetSetSize(server.el) <
- server.maxclients + REDIS_EVENTLOOP_FDSET_INCR)
+ if ((unsigned int) aeGetSetSize(server.el) <
+ server.maxclients + CONFIG_FDSET_INCR)
{
if (aeResizeSetSize(server.el,
- server.maxclients + REDIS_EVENTLOOP_FDSET_INCR) == AE_ERR)
+ server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
{
addReplyError(c,"The event loop API used by Redis is not able to handle the specified number of clients");
server.maxclients = orig_value;
@@ -641,79 +876,20 @@ void configSetCommand(redisClient *c) {
}
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hz")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hz = ll;
- if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
- if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-policy")) {
- if (!strcasecmp(o->ptr,"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(o->ptr,"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(o->ptr,"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(o->ptr,"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(o->ptr,"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(o->ptr,"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-samples")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.maxmemory_samples = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > LONG_MAX) goto badfmt;
- server.maxidletime = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"tcp-keepalive")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > INT_MAX) goto badfmt;
- server.tcpkeepalive = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
- if (!strcasecmp(o->ptr,"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(o->ptr,"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else if (!strcasecmp(o->ptr,"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_no_fsync_on_rewrite = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
+ } config_set_special_field("appendonly") {
int enable = yesnotoi(o->ptr);
if (enable == -1) goto badfmt;
- if (enable == 0 && server.aof_state != REDIS_AOF_OFF) {
+ if (enable == 0 && server.aof_state != AOF_OFF) {
stopAppendOnly();
- } else if (enable && server.aof_state == REDIS_AOF_OFF) {
- if (startAppendOnly() == REDIS_ERR) {
+ } else if (enable && server.aof_state == AOF_OFF) {
+ if (startAppendOnly() == C_ERR) {
addReplyError(c,
"Unable to turn on AOF. Check server logs.");
return;
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-percentage")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_perc = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-min-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_min_size = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"aof-rewrite-incremental-fsync")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_rewrite_incremental_fsync = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
+ } config_set_special_field("save") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -747,70 +923,12 @@ void configSetCommand(redisClient *c) {
appendServerSaveParams(seconds, changes);
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-serve-stale-data")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_serve_stale_data = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-read-only")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_slave_ro = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"dir")) {
+ } config_set_special_field("dir") {
if (chdir((char*)o->ptr) == -1) {
addReplyErrorFormat(c,"Changing directory: %s", strerror(errno));
return;
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"set-max-intset-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.set_max_intset_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hll_sparse_max_bytes = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.lua_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-log-slower-than")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR) goto badfmt;
- server.slowlog_log_slower_than = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-max-len")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.slowlog_max_len = (unsigned)ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"latency-monitor-threshold")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.latency_monitor_threshold = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"loglevel")) {
- if (!strcasecmp(o->ptr,"warning")) {
- server.verbosity = REDIS_WARNING;
- } else if (!strcasecmp(o->ptr,"notice")) {
- server.verbosity = REDIS_NOTICE;
- } else if (!strcasecmp(o->ptr,"verbose")) {
- server.verbosity = REDIS_VERBOSE;
- } else if (!strcasecmp(o->ptr,"debug")) {
- server.verbosity = REDIS_DEBUG;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"client-output-buffer-limit")) {
+ } config_set_special_field("client-output-buffer-limit") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -824,17 +942,17 @@ void configSetCommand(redisClient *c) {
* whole configuration string or accept it all, even if a single
* error in a single client class is present. */
for (j = 0; j < vlen; j++) {
- char *eptr;
long val;
if ((j % 4) == 0) {
- if (getClientTypeByName(v[j]) == -1) {
+ int class = getClientTypeByName(v[j]);
+ if (class == -1 || class == CLIENT_TYPE_MASTER) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
} else {
- val = strtoll(v[j], &eptr, 10);
- if (eptr[0] != '\0' || val < 0) {
+ val = memtoll(v[j], &err);
+ if (err || val < 0) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
@@ -856,75 +974,186 @@ void configSetCommand(redisClient *c) {
server.client_obuf_limits[class].soft_limit_seconds = soft_seconds;
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"stop-writes-on-bgsave-error")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.stop_writes_on_bgsave_err = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-ping-slave-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_ping_slave_period = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- resizeReplicationBacklog(ll);
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-ttl")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.repl_backlog_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- if (ll)
- enableWatchdog(ll);
- else
- disableWatchdog();
- } else if (!strcasecmp(c->argv[2]->ptr,"rdbcompression")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.rdb_compression = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"notify-keyspace-events")) {
+ } config_set_special_field("notify-keyspace-events") {
int flags = keyspaceEventsStringToFlags(o->ptr);
if (flags == -1) goto badfmt;
server.notify_keyspace_events = flags;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-disable-tcp-nodelay")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_disable_tcp_nodelay = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-priority")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.slave_priority = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-to-write")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_to_write = ll;
+ } config_set_special_field("slave-announce-ip") {
+ zfree(server.slave_announce_ip);
+ server.slave_announce_ip = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
+
+ /* Boolean fields.
+ * config_set_bool_field(name,var). */
+ } config_set_bool_field(
+ "rdbcompression", server.rdb_compression) {
+ } config_set_bool_field(
+ "repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay) {
+ } config_set_bool_field(
+ "repl-diskless-sync",server.repl_diskless_sync) {
+ } config_set_bool_field(
+ "cluster-require-full-coverage",server.cluster_require_full_coverage) {
+ } config_set_bool_field(
+ "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
+ } config_set_bool_field(
+ "aof-load-truncated",server.aof_load_truncated) {
+ } config_set_bool_field(
+ "aof-use-rdb-preamble",server.aof_use_rdb_preamble) {
+ } config_set_bool_field(
+ "slave-serve-stale-data",server.repl_serve_stale_data) {
+ } config_set_bool_field(
+ "slave-read-only",server.repl_slave_ro) {
+ } config_set_bool_field(
+ "activerehashing",server.activerehashing) {
+ } config_set_bool_field(
+ "activedefrag",server.active_defrag_enabled) {
+#ifndef HAVE_DEFRAG
+ if (server.active_defrag_enabled) {
+ server.active_defrag_enabled = 0;
+ addReplyError(c,
+ "Active defragmentation cannot be enabled: it requires a "
+ "Redis server compiled with a modified Jemalloc like the "
+ "one shipped by default with the Redis source distribution");
+ return;
+ }
+#endif
+ } config_set_bool_field(
+ "protected-mode",server.protected_mode) {
+ } config_set_bool_field(
+ "stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err) {
+ } config_set_bool_field(
+ "lazyfree-lazy-eviction",server.lazyfree_lazy_eviction) {
+ } config_set_bool_field(
+ "lazyfree-lazy-expire",server.lazyfree_lazy_expire) {
+ } config_set_bool_field(
+ "lazyfree-lazy-server-del",server.lazyfree_lazy_server_del) {
+ } config_set_bool_field(
+ "slave-lazy-flush",server.repl_slave_lazy_flush) {
+ } config_set_bool_field(
+ "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) {
+
+ /* Numerical fields.
+ * config_set_numerical_field(name,var,min,max) */
+ } config_set_numerical_field(
+ "tcp-keepalive",server.tcpkeepalive,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "maxmemory-samples",server.maxmemory_samples,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "lfu-log-factor",server.lfu_log_factor,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "lfu-decay-time",server.lfu_decay_time,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "timeout",server.maxidletime,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "active-defrag-threshold-lower",server.active_defrag_threshold_lower,0,1000) {
+ } config_set_numerical_field(
+ "active-defrag-threshold-upper",server.active_defrag_threshold_upper,0,1000) {
+ } config_set_memory_field(
+ "active-defrag-ignore-bytes",server.active_defrag_ignore_bytes) {
+ } config_set_numerical_field(
+ "active-defrag-cycle-min",server.active_defrag_cycle_min,1,99) {
+ } config_set_numerical_field(
+ "active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) {
+ } config_set_numerical_field(
+ "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hash-max-ziplist-value",server.hash_max_ziplist_value,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "list-max-ziplist-size",server.list_max_ziplist_size,INT_MIN,INT_MAX) {
+ } config_set_numerical_field(
+ "list-compress-depth",server.list_compress_depth,0,INT_MAX) {
+ } config_set_numerical_field(
+ "set-max-intset-entries",server.set_max_intset_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-value",server.zset_max_ziplist_value,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "lua-time-limit",server.lua_time_limit,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-log-slower-than",server.slowlog_log_slower_than,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-max-len",ll,0,LLONG_MAX) {
+ /* Cast to unsigned. */
+ server.slowlog_max_len = (unsigned)ll;
+ } config_set_numerical_field(
+ "latency-monitor-threshold",server.latency_monitor_threshold,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "repl-ping-slave-period",server.repl_ping_slave_period,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-timeout",server.repl_timeout,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-backlog-ttl",server.repl_backlog_time_limit,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slave-priority",server.slave_priority,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slave-announce-port",server.slave_announce_port,0,65535) {
+ } config_set_numerical_field(
+ "min-slaves-to-write",server.repl_min_slaves_to_write,0,LLONG_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-max-lag")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_max_lag = ll;
+ } config_set_numerical_field(
+ "min-slaves-max-lag",server.repl_min_slaves_max_lag,0,LLONG_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-node-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.cluster_node_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-migration-barrier")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_migration_barrier = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-slave-validity-factor")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_slave_validity_factor = ll;
- } else {
+ } config_set_numerical_field(
+ "cluster-node-timeout",server.cluster_node_timeout,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "cluster-announce-port",server.cluster_announce_port,0,65535) {
+ } config_set_numerical_field(
+ "cluster-announce-bus-port",server.cluster_announce_bus_port,0,65535) {
+ } config_set_numerical_field(
+ "cluster-migration-barrier",server.cluster_migration_barrier,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hz",server.hz,0,LLONG_MAX) {
+ /* Hz is more an hint from the user, so we accept values out of range
+ * but cap them to reasonable values. */
+ if (server.hz < CONFIG_MIN_HZ) server.hz = CONFIG_MIN_HZ;
+ if (server.hz > CONFIG_MAX_HZ) server.hz = CONFIG_MAX_HZ;
+ } config_set_numerical_field(
+ "watchdog-period",ll,0,LLONG_MAX) {
+ if (ll)
+ enableWatchdog(ll);
+ else
+ disableWatchdog();
+
+ /* Memory fields.
+ * config_set_memory_field(name,var) */
+ } config_set_memory_field("maxmemory",server.maxmemory) {
+ if (server.maxmemory) {
+ if (server.maxmemory < zmalloc_used_memory()) {
+ serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
+ }
+ freeMemoryIfNeeded();
+ }
+ } config_set_memory_field("repl-backlog-size",ll) {
+ resizeReplicationBacklog(ll);
+ } config_set_memory_field("auto-aof-rewrite-min-size",ll) {
+ server.aof_rewrite_min_size = ll;
+
+ /* Enumeration fields.
+ * config_set_enum_field(name,var,enum_var) */
+ } config_set_enum_field(
+ "loglevel",server.verbosity,loglevel_enum) {
+ } config_set_enum_field(
+ "maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum) {
+ } config_set_enum_field(
+ "appendfsync",server.aof_fsync,aof_fsync_enum) {
+
+ /* Everyhing else is an error... */
+ } config_set_else {
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
(char*)c->argv[2]->ptr);
return;
}
+
+ /* On success we just return a generic OK for all the options. */
addReply(c,shared.ok);
return;
@@ -939,7 +1168,7 @@ badfmt: /* Bad format errors */
*----------------------------------------------------------------------------*/
#define config_get_string_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,_var ? _var : ""); \
matches++; \
@@ -947,7 +1176,7 @@ badfmt: /* Bad format errors */
} while(0);
#define config_get_bool_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,_var ? "yes" : "no"); \
matches++; \
@@ -955,7 +1184,7 @@ badfmt: /* Bad format errors */
} while(0);
#define config_get_numerical_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
ll2string(buf,sizeof(buf),_var); \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,buf); \
@@ -963,27 +1192,41 @@ badfmt: /* Bad format errors */
} \
} while(0);
-void configGetCommand(redisClient *c) {
+#define config_get_enum_field(_name,_var,_enumvar) do { \
+ if (stringmatch(pattern,_name,1)) { \
+ addReplyBulkCString(c,_name); \
+ addReplyBulkCString(c,configEnumGetNameOrUnknown(_enumvar,_var)); \
+ matches++; \
+ } \
+} while(0);
+
+void configGetCommand(client *c) {
robj *o = c->argv[2];
void *replylen = addDeferredMultiBulkLength(c);
char *pattern = o->ptr;
char buf[128];
int matches = 0;
- redisAssertWithInfo(c,o,sdsEncodedObject(o));
+ serverAssertWithInfo(c,o,sdsEncodedObject(o));
/* String values */
config_get_string_field("dbfilename",server.rdb_filename);
config_get_string_field("requirepass",server.requirepass);
config_get_string_field("masterauth",server.masterauth);
+ config_get_string_field("cluster-announce-ip",server.cluster_announce_ip);
config_get_string_field("unixsocket",server.unixsocket);
config_get_string_field("logfile",server.logfile);
config_get_string_field("pidfile",server.pidfile);
+ config_get_string_field("slave-announce-ip",server.slave_announce_ip);
/* Numerical values */
config_get_numerical_field("maxmemory",server.maxmemory);
config_get_numerical_field("maxmemory-samples",server.maxmemory_samples);
config_get_numerical_field("timeout",server.maxidletime);
- config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
+ config_get_numerical_field("active-defrag-threshold-lower",server.active_defrag_threshold_lower);
+ config_get_numerical_field("active-defrag-threshold-upper",server.active_defrag_threshold_upper);
+ config_get_numerical_field("active-defrag-ignore-bytes",server.active_defrag_ignore_bytes);
+ config_get_numerical_field("active-defrag-cycle-min",server.active_defrag_cycle_min);
+ config_get_numerical_field("active-defrag-cycle-max",server.active_defrag_cycle_max);
config_get_numerical_field("auto-aof-rewrite-percentage",
server.aof_rewrite_perc);
config_get_numerical_field("auto-aof-rewrite-min-size",
@@ -992,10 +1235,10 @@ void configGetCommand(redisClient *c) {
server.hash_max_ziplist_entries);
config_get_numerical_field("hash-max-ziplist-value",
server.hash_max_ziplist_value);
- config_get_numerical_field("list-max-ziplist-entries",
- server.list_max_ziplist_entries);
- config_get_numerical_field("list-max-ziplist-value",
- server.list_max_ziplist_value);
+ config_get_numerical_field("list-max-ziplist-size",
+ server.list_max_ziplist_size);
+ config_get_numerical_field("list-compress-depth",
+ server.list_compress_depth);
config_get_numerical_field("set-max-intset-entries",
server.set_max_intset_entries);
config_get_numerical_field("zset-max-ziplist-entries",
@@ -1012,6 +1255,8 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("slowlog-max-len",
server.slowlog_max_len);
config_get_numerical_field("port",server.port);
+ config_get_numerical_field("cluster-announce-port",server.cluster_announce_port);
+ config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port);
config_get_numerical_field("tcp-backlog",server.tcp_backlog);
config_get_numerical_field("databases",server.dbnum);
config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period);
@@ -1021,14 +1266,19 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("maxclients",server.maxclients);
config_get_numerical_field("watchdog-period",server.watchdog_period);
config_get_numerical_field("slave-priority",server.slave_priority);
+ config_get_numerical_field("slave-announce-port",server.slave_announce_port);
config_get_numerical_field("min-slaves-to-write",server.repl_min_slaves_to_write);
config_get_numerical_field("min-slaves-max-lag",server.repl_min_slaves_max_lag);
config_get_numerical_field("hz",server.hz);
config_get_numerical_field("cluster-node-timeout",server.cluster_node_timeout);
config_get_numerical_field("cluster-migration-barrier",server.cluster_migration_barrier);
config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor);
+ config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay);
+ config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
/* Bool (yes/no) values */
+ config_get_bool_field("cluster-require-full-coverage",
+ server.cluster_require_full_coverage);
config_get_bool_field("no-appendfsync-on-rewrite",
server.aof_no_fsync_on_rewrite);
config_get_bool_field("slave-serve-stale-data",
@@ -1041,19 +1291,47 @@ void configGetCommand(redisClient *c) {
config_get_bool_field("rdbcompression", server.rdb_compression);
config_get_bool_field("rdbchecksum", server.rdb_checksum);
config_get_bool_field("activerehashing", server.activerehashing);
+ config_get_bool_field("activedefrag", server.active_defrag_enabled);
+ config_get_bool_field("protected-mode", server.protected_mode);
config_get_bool_field("repl-disable-tcp-nodelay",
server.repl_disable_tcp_nodelay);
+ config_get_bool_field("repl-diskless-sync",
+ server.repl_diskless_sync);
config_get_bool_field("aof-rewrite-incremental-fsync",
server.aof_rewrite_incremental_fsync);
+ config_get_bool_field("aof-load-truncated",
+ server.aof_load_truncated);
+ config_get_bool_field("aof-use-rdb-preamble",
+ server.aof_use_rdb_preamble);
+ config_get_bool_field("lazyfree-lazy-eviction",
+ server.lazyfree_lazy_eviction);
+ config_get_bool_field("lazyfree-lazy-expire",
+ server.lazyfree_lazy_expire);
+ config_get_bool_field("lazyfree-lazy-server-del",
+ server.lazyfree_lazy_server_del);
+ config_get_bool_field("slave-lazy-flush",
+ server.repl_slave_lazy_flush);
+
+ /* Enum values */
+ config_get_enum_field("maxmemory-policy",
+ server.maxmemory_policy,maxmemory_policy_enum);
+ config_get_enum_field("loglevel",
+ server.verbosity,loglevel_enum);
+ config_get_enum_field("supervised",
+ server.supervised_mode,supervised_mode_enum);
+ config_get_enum_field("appendfsync",
+ server.aof_fsync,aof_fsync_enum);
+ config_get_enum_field("syslog-facility",
+ server.syslog_facility,syslog_facility_enum);
/* Everything we can't handle with macros follows. */
- if (stringmatch(pattern,"appendonly",0)) {
+ if (stringmatch(pattern,"appendonly",1)) {
addReplyBulkCString(c,"appendonly");
- addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes");
+ addReplyBulkCString(c,server.aof_state == AOF_OFF ? "no" : "yes");
matches++;
}
- if (stringmatch(pattern,"dir",0)) {
+ if (stringmatch(pattern,"dir",1)) {
char buf[1024];
if (getcwd(buf,sizeof(buf)) == NULL)
@@ -1063,36 +1341,7 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"maxmemory-policy",0)) {
- char *s;
-
- switch(server.maxmemory_policy) {
- case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
- case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
- case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
- case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
- case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
- case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"maxmemory-policy");
- addReplyBulkCString(c,s);
- matches++;
- }
- if (stringmatch(pattern,"appendfsync",0)) {
- char *policy;
-
- switch(server.aof_fsync) {
- case AOF_FSYNC_NO: policy = "no"; break;
- case AOF_FSYNC_EVERYSEC: policy = "everysec"; break;
- case AOF_FSYNC_ALWAYS: policy = "always"; break;
- default: policy = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"appendfsync");
- addReplyBulkCString(c,policy);
- matches++;
- }
- if (stringmatch(pattern,"save",0)) {
+ if (stringmatch(pattern,"save",1)) {
sds buf = sdsempty();
int j;
@@ -1108,31 +1357,17 @@ void configGetCommand(redisClient *c) {
sdsfree(buf);
matches++;
}
- if (stringmatch(pattern,"loglevel",0)) {
- char *s;
-
- switch(server.verbosity) {
- case REDIS_WARNING: s = "warning"; break;
- case REDIS_VERBOSE: s = "verbose"; break;
- case REDIS_NOTICE: s = "notice"; break;
- case REDIS_DEBUG: s = "debug"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"loglevel");
- addReplyBulkCString(c,s);
- matches++;
- }
- if (stringmatch(pattern,"client-output-buffer-limit",0)) {
+ if (stringmatch(pattern,"client-output-buffer-limit",1)) {
sds buf = sdsempty();
int j;
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++) {
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
buf = sdscatprintf(buf,"%s %llu %llu %ld",
getClientTypeName(j),
server.client_obuf_limits[j].hard_limit_bytes,
server.client_obuf_limits[j].soft_limit_bytes,
(long) server.client_obuf_limits[j].soft_limit_seconds);
- if (j != REDIS_CLIENT_TYPE_COUNT-1)
+ if (j != CLIENT_TYPE_OBUF_COUNT-1)
buf = sdscatlen(buf," ",1);
}
addReplyBulkCString(c,"client-output-buffer-limit");
@@ -1140,14 +1375,14 @@ void configGetCommand(redisClient *c) {
sdsfree(buf);
matches++;
}
- if (stringmatch(pattern,"unixsocketperm",0)) {
+ if (stringmatch(pattern,"unixsocketperm",1)) {
char buf[32];
snprintf(buf,sizeof(buf),"%o",server.unixsocketperm);
addReplyBulkCString(c,"unixsocketperm");
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"slaveof",0)) {
+ if (stringmatch(pattern,"slaveof",1)) {
char buf[256];
addReplyBulkCString(c,"slaveof");
@@ -1159,8 +1394,8 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"notify-keyspace-events",0)) {
- robj *flagsobj = createObject(REDIS_STRING,
+ if (stringmatch(pattern,"notify-keyspace-events",1)) {
+ robj *flagsobj = createObject(OBJ_STRING,
keyspaceEventsFlagsToString(server.notify_keyspace_events));
addReplyBulkCString(c,"notify-keyspace-events");
@@ -1168,7 +1403,7 @@ void configGetCommand(redisClient *c) {
decrRefCount(flagsobj);
matches++;
}
- if (stringmatch(pattern,"bind",0)) {
+ if (stringmatch(pattern,"bind",1)) {
sds aux = sdsjoin(server.bindaddr,server.bindaddr_count," ");
addReplyBulkCString(c,"bind");
@@ -1188,7 +1423,7 @@ void configGetCommand(redisClient *c) {
/* We use the following dictionary type to store where a configuration
* option is mentioned in the old configuration file, so it's
* like "maxmemory" -> list of line numbers (first line is zero). */
-unsigned int dictSdsCaseHash(const void *key);
+uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void dictSdsDestructor(void *privdata, void *val);
void dictListDestructor(void *privdata, void *val);
@@ -1246,7 +1481,7 @@ void rewriteConfigAddLineNumberToOption(struct rewriteConfigState *state, sds op
* This is useful as only unused lines of processed options will be blanked
* in the config file, while options the rewrite process does not understand
* remain untouched. */
-void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, char *option) {
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option) {
sds opt = sdsnew(option);
if (dictAdd(state->rewritten,opt,NULL) != DICT_OK) sdsfree(opt);
@@ -1260,7 +1495,7 @@ void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, char *option
struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
FILE *fp = fopen(path,"r");
struct rewriteConfigState *state = zmalloc(sizeof(*state));
- char buf[REDIS_CONFIGLINE_MAX+1];
+ char buf[CONFIG_MAX_LINE+1];
int linenum = -1;
if (fp == NULL && errno != ENOENT) return NULL;
@@ -1273,7 +1508,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
if (fp == NULL) return state;
/* Read the old file line by line, populate the state. */
- while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
+ while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL) {
int argc;
sds *argv;
sds line = sdstrim(sdsnew(buf),"\r\n\t ");
@@ -1330,7 +1565,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
*
* "line" is either used, or freed, so the caller does not need to free it
* in any way. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force) {
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force) {
sds o = sdsnew(option);
list *l = dictFetchValue(state->option_to_line,o);
@@ -1415,7 +1650,7 @@ void rewriteConfigStringOption(struct rewriteConfigState *state, char *option, c
return;
}
- /* Compare the strings as sds strings to have a binary safe comparison. */
+ /* Set force to zero if the value is set to its default. */
if (defvalue && strcmp(value,defvalue) == 0) force = 0;
line = sdsnew(option);
@@ -1441,45 +1676,26 @@ void rewriteConfigOctalOption(struct rewriteConfigState *state, char *option, in
rewriteConfigRewriteLine(state,option,line,force);
}
-/* Rewrite an enumeration option, after the "value" every enum/value pair
- * is specified, terminated by NULL. After NULL the default value is
- * specified. See how the function is used for more information. */
-void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, ...) {
- va_list ap;
- char *enum_name, *matching_name = NULL;
- int enum_val, def_val, force;
+/* Rewrite an enumeration option. It takes as usually state and option name,
+ * and in addition the enumeration array and the default value for the
+ * option. */
+void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, configEnum *ce, int defval) {
sds line;
+ const char *name = configEnumGetNameOrUnknown(ce,value);
+ int force = value != defval;
- va_start(ap, value);
- while(1) {
- enum_name = va_arg(ap,char*);
- enum_val = va_arg(ap,int);
- if (enum_name == NULL) {
- def_val = enum_val;
- break;
- }
- if (value == enum_val) matching_name = enum_name;
- }
- va_end(ap);
-
- force = value != def_val;
- line = sdscatprintf(sdsempty(),"%s %s",option,matching_name);
+ line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
-/* Rewrite the syslog-fability option. */
+/* Rewrite the syslog-facility option. */
void rewriteConfigSyslogfacilityOption(struct rewriteConfigState *state) {
- int value = server.syslog_facility, j;
+ int value = server.syslog_facility;
int force = value != LOG_LOCAL0;
- char *name = NULL, *option = "syslog-facility";
+ const char *name = NULL, *option = "syslog-facility";
sds line;
- for (j = 0; validSyslogFacilities[j].name; j++) {
- if (validSyslogFacilities[j].value == value) {
- name = (char*) validSyslogFacilities[j].name;
- break;
- }
- }
+ name = configEnumGetNameOrUnknown(syslog_facility_enum,value);
line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
@@ -1548,7 +1764,7 @@ void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state
int j;
char *option = "client-output-buffer-limit";
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++) {
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
int force = (server.client_obuf_limits[j].hard_limit_bytes !=
clientBufferLimitsDefaults[j].hard_limit_bytes) ||
(server.client_obuf_limits[j].soft_limit_bytes !=
@@ -1639,7 +1855,7 @@ void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) {
/* Don't blank lines about options the rewrite process
* don't understand. */
if (dictFind(state->rewritten,option) == NULL) {
- redisLog(REDIS_DEBUG,"Not rewritten option: %s", option);
+ serverLog(LL_DEBUG,"Not rewritten option: %s", option);
continue;
}
@@ -1732,87 +1948,95 @@ int rewriteConfig(char *path) {
* the rewrite state. */
rewriteConfigYesNoOption(state,"daemonize",server.daemonize,0);
- rewriteConfigStringOption(state,"pidfile",server.pidfile,REDIS_DEFAULT_PID_FILE);
- rewriteConfigNumericalOption(state,"port",server.port,REDIS_SERVERPORT);
- rewriteConfigNumericalOption(state,"tcp-backlog",server.tcp_backlog,REDIS_TCP_BACKLOG);
+ rewriteConfigStringOption(state,"pidfile",server.pidfile,CONFIG_DEFAULT_PID_FILE);
+ rewriteConfigNumericalOption(state,"port",server.port,CONFIG_DEFAULT_SERVER_PORT);
+ rewriteConfigNumericalOption(state,"cluster-announce-port",server.cluster_announce_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT);
+ rewriteConfigNumericalOption(state,"cluster-announce-bus-port",server.cluster_announce_bus_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT);
+ rewriteConfigNumericalOption(state,"tcp-backlog",server.tcp_backlog,CONFIG_DEFAULT_TCP_BACKLOG);
rewriteConfigBindOption(state);
rewriteConfigStringOption(state,"unixsocket",server.unixsocket,NULL);
- rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,REDIS_DEFAULT_UNIX_SOCKET_PERM);
- rewriteConfigNumericalOption(state,"timeout",server.maxidletime,REDIS_MAXIDLETIME);
- rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,REDIS_DEFAULT_TCP_KEEPALIVE);
- rewriteConfigEnumOption(state,"loglevel",server.verbosity,
- "debug", REDIS_DEBUG,
- "verbose", REDIS_VERBOSE,
- "notice", REDIS_NOTICE,
- "warning", REDIS_WARNING,
- NULL, REDIS_DEFAULT_VERBOSITY);
- rewriteConfigStringOption(state,"logfile",server.logfile,REDIS_DEFAULT_LOGFILE);
- rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,REDIS_DEFAULT_SYSLOG_ENABLED);
- rewriteConfigStringOption(state,"syslog-ident",server.syslog_ident,REDIS_DEFAULT_SYSLOG_IDENT);
+ rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,CONFIG_DEFAULT_UNIX_SOCKET_PERM);
+ rewriteConfigNumericalOption(state,"timeout",server.maxidletime,CONFIG_DEFAULT_CLIENT_TIMEOUT);
+ rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,CONFIG_DEFAULT_TCP_KEEPALIVE);
+ rewriteConfigNumericalOption(state,"slave-announce-port",server.slave_announce_port,CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT);
+ rewriteConfigEnumOption(state,"loglevel",server.verbosity,loglevel_enum,CONFIG_DEFAULT_VERBOSITY);
+ rewriteConfigStringOption(state,"logfile",server.logfile,CONFIG_DEFAULT_LOGFILE);
+ rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,CONFIG_DEFAULT_SYSLOG_ENABLED);
+ rewriteConfigStringOption(state,"syslog-ident",server.syslog_ident,CONFIG_DEFAULT_SYSLOG_IDENT);
rewriteConfigSyslogfacilityOption(state);
rewriteConfigSaveOption(state);
- rewriteConfigNumericalOption(state,"databases",server.dbnum,REDIS_DEFAULT_DBNUM);
- rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR);
- rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,REDIS_DEFAULT_RDB_COMPRESSION);
- rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,REDIS_DEFAULT_RDB_CHECKSUM);
- rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,REDIS_DEFAULT_RDB_FILENAME);
+ rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM);
+ rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR);
+ rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION);
+ rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM);
+ rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,CONFIG_DEFAULT_RDB_FILENAME);
rewriteConfigDirOption(state);
rewriteConfigSlaveofOption(state);
+ rewriteConfigStringOption(state,"slave-announce-ip",server.slave_announce_ip,CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP);
rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL);
- rewriteConfigYesNoOption(state,"slave-serve-stale-data",server.repl_serve_stale_data,REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA);
- rewriteConfigYesNoOption(state,"slave-read-only",server.repl_slave_ro,REDIS_DEFAULT_SLAVE_READ_ONLY);
- rewriteConfigNumericalOption(state,"repl-ping-slave-period",server.repl_ping_slave_period,REDIS_REPL_PING_SLAVE_PERIOD);
- rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,REDIS_REPL_TIMEOUT);
- rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,REDIS_DEFAULT_REPL_BACKLOG_SIZE);
- rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT);
- rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY);
- rewriteConfigNumericalOption(state,"slave-priority",server.slave_priority,REDIS_DEFAULT_SLAVE_PRIORITY);
- rewriteConfigNumericalOption(state,"min-slaves-to-write",server.repl_min_slaves_to_write,REDIS_DEFAULT_MIN_SLAVES_TO_WRITE);
- rewriteConfigNumericalOption(state,"min-slaves-max-lag",server.repl_min_slaves_max_lag,REDIS_DEFAULT_MIN_SLAVES_MAX_LAG);
+ rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL);
+ rewriteConfigYesNoOption(state,"slave-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA);
+ rewriteConfigYesNoOption(state,"slave-read-only",server.repl_slave_ro,CONFIG_DEFAULT_SLAVE_READ_ONLY);
+ rewriteConfigNumericalOption(state,"repl-ping-slave-period",server.repl_ping_slave_period,CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD);
+ rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT);
+ rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE);
+ rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT);
+ rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY);
+ rewriteConfigYesNoOption(state,"repl-diskless-sync",server.repl_diskless_sync,CONFIG_DEFAULT_REPL_DISKLESS_SYNC);
+ rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY);
+ rewriteConfigNumericalOption(state,"slave-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY);
+ rewriteConfigNumericalOption(state,"min-slaves-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE);
+ rewriteConfigNumericalOption(state,"min-slaves-max-lag",server.repl_min_slaves_max_lag,CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG);
rewriteConfigStringOption(state,"requirepass",server.requirepass,NULL);
- rewriteConfigNumericalOption(state,"maxclients",server.maxclients,REDIS_MAX_CLIENTS);
- rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,REDIS_DEFAULT_MAXMEMORY);
- rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,
- "volatile-lru", REDIS_MAXMEMORY_VOLATILE_LRU,
- "allkeys-lru", REDIS_MAXMEMORY_ALLKEYS_LRU,
- "volatile-random", REDIS_MAXMEMORY_VOLATILE_RANDOM,
- "allkeys-random", REDIS_MAXMEMORY_ALLKEYS_RANDOM,
- "volatile-ttl", REDIS_MAXMEMORY_VOLATILE_TTL,
- "noeviction", REDIS_MAXMEMORY_NO_EVICTION,
- NULL, REDIS_DEFAULT_MAXMEMORY_POLICY);
- rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,REDIS_DEFAULT_MAXMEMORY_SAMPLES);
- rewriteConfigYesNoOption(state,"appendonly",server.aof_state != REDIS_AOF_OFF,0);
- rewriteConfigStringOption(state,"appendfilename",server.aof_filename,REDIS_DEFAULT_AOF_FILENAME);
- rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,
- "everysec", AOF_FSYNC_EVERYSEC,
- "always", AOF_FSYNC_ALWAYS,
- "no", AOF_FSYNC_NO,
- NULL, REDIS_DEFAULT_AOF_FSYNC);
- rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE);
- rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,REDIS_AOF_REWRITE_PERC);
- rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,REDIS_AOF_REWRITE_MIN_SIZE);
- rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,REDIS_LUA_TIME_LIMIT);
+ rewriteConfigNumericalOption(state,"maxclients",server.maxclients,CONFIG_DEFAULT_MAX_CLIENTS);
+ rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,CONFIG_DEFAULT_MAXMEMORY);
+ rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum,CONFIG_DEFAULT_MAXMEMORY_POLICY);
+ rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,CONFIG_DEFAULT_MAXMEMORY_SAMPLES);
+ rewriteConfigNumericalOption(state,"active-defrag-threshold-lower",server.active_defrag_threshold_lower,CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER);
+ rewriteConfigNumericalOption(state,"active-defrag-threshold-upper",server.active_defrag_threshold_upper,CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER);
+ rewriteConfigBytesOption(state,"active-defrag-ignore-bytes",server.active_defrag_ignore_bytes,CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES);
+ rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN);
+ rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX);
+ rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0);
+ rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME);
+ rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC);
+ rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE);
+ rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC);
+ rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,AOF_REWRITE_MIN_SIZE);
+ rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,LUA_SCRIPT_TIME_LIMIT);
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
- rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
- rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT);
- rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER);
- rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY);
- rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,REDIS_SLOWLOG_LOG_SLOWER_THAN);
- rewriteConfigNumericalOption(state,"latency-monitor-threshold",server.latency_monitor_threshold,REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD);
- rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,REDIS_SLOWLOG_MAX_LEN);
+ rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
+ rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE);
+ rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT);
+ rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER);
+ rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY);
+ rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN);
+ rewriteConfigNumericalOption(state,"latency-monitor-threshold",server.latency_monitor_threshold,CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD);
+ rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,CONFIG_DEFAULT_SLOWLOG_MAX_LEN);
rewriteConfigNotifykeyspaceeventsOption(state);
- rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,REDIS_HASH_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,REDIS_HASH_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"list-max-ziplist-entries",server.list_max_ziplist_entries,REDIS_LIST_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"list-max-ziplist-value",server.list_max_ziplist_value,REDIS_LIST_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
- rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES);
- rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING);
+ rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,OBJ_HASH_MAX_ZIPLIST_ENTRIES);
+ rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,OBJ_HASH_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,OBJ_LIST_MAX_ZIPLIST_SIZE);
+ rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,OBJ_LIST_COMPRESS_DEPTH);
+ rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,OBJ_SET_MAX_INTSET_ENTRIES);
+ rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,OBJ_ZSET_MAX_ZIPLIST_ENTRIES);
+ rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,OBJ_ZSET_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES);
+ rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING);
+ rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG);
+ rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE);
rewriteConfigClientoutputbufferlimitOption(state);
- rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
- rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
+ rewriteConfigNumericalOption(state,"hz",server.hz,CONFIG_DEFAULT_HZ);
+ rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
+ rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED);
+ rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE);
+ rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-eviction",server.lazyfree_lazy_eviction,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL);
+ rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH);
+
+ /* Rewrite Sentinel config if in Sentinel mode. */
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
/* Step 3: remove all the orphaned lines in the old file, that is, lines
@@ -1834,7 +2058,13 @@ int rewriteConfig(char *path) {
* CONFIG command entry point
*----------------------------------------------------------------------------*/
-void configCommand(redisClient *c) {
+void configCommand(client *c) {
+ /* Only allow CONFIG GET while loading. */
+ if (server.loading && strcasecmp(c->argv[1]->ptr,"get")) {
+ addReplyError(c,"Only CONFIG GET is allowed during loading");
+ return;
+ }
+
if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc != 4) goto badarity;
configSetCommand(c);
@@ -1853,10 +2083,10 @@ void configCommand(redisClient *c) {
return;
}
if (rewriteConfig(server.configfile) == -1) {
- redisLog(REDIS_WARNING,"CONFIG REWRITE failed: %s", strerror(errno));
+ serverLog(LL_WARNING,"CONFIG REWRITE failed: %s", strerror(errno));
addReplyErrorFormat(c,"Rewriting config file: %s", strerror(errno));
} else {
- redisLog(REDIS_WARNING,"CONFIG REWRITE executed with success.");
+ serverLog(LL_WARNING,"CONFIG REWRITE executed with success.");
addReply(c,shared.ok);
}
} else {
diff --git a/src/config.h b/src/config.h
index 1bc70a13e..c23f1c789 100644
--- a/src/config.h
+++ b/src/config.h
@@ -34,6 +34,11 @@
#include <AvailabilityMacros.h>
#endif
+#ifdef __linux__
+#include <linux/version.h>
+#include <features.h>
+#endif
+
/* Define redis_fstat to fstat or fstat64() */
#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
#define redis_fstat fstat64
@@ -48,6 +53,7 @@
#define HAVE_PROC_STAT 1
#define HAVE_PROC_MAPS 1
#define HAVE_PROC_SMAPS 1
+#define HAVE_PROC_SOMAXCONN 1
#endif
/* Test for task_info() */
@@ -56,10 +62,15 @@
#endif
/* Test for backtrace() */
-#if defined(__APPLE__) || defined(__linux__)
+#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__))
#define HAVE_BACKTRACE 1
#endif
+/* MSG_NOSIGNAL. */
+#ifdef __linux__
+#define HAVE_MSG_NOSIGNAL 1
+#endif
+
/* Test for polling API */
#ifdef __linux__
#define HAVE_EPOLL 1
@@ -86,8 +97,6 @@
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
* the plain fsync() call. */
#ifdef __linux__
-#include <linux/version.h>
-#include <features.h>
#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if (LINUX_VERSION_CODE >= 0x020611 && __GLIBC_PREREQ(2, 6))
#define HAVE_SYNC_FILE_RANGE 1
@@ -112,7 +121,7 @@
#define USE_SETPROCTITLE
#endif
-#if (defined __linux || defined __APPLE__)
+#if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__)
#define USE_SETPROCTITLE
#define INIT_SETPROCTITLE_REPLACEMENT
void spt_init(int argc, char *argv[]);
@@ -187,9 +196,32 @@ void setproctitle(const char *fmt, ...);
#if (__i386 || __amd64 || __powerpc__) && __GNUC__
#define GNUC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#if (GNUC_VERSION >= 40100) || defined(__clang__)
+#if defined(__clang__)
#define HAVE_ATOMIC
#endif
+#if (defined(__GLIBC__) && defined(__GLIBC_PREREQ))
+#if (GNUC_VERSION >= 40100 && __GLIBC_PREREQ(2, 6))
+#define HAVE_ATOMIC
+#endif
+#endif
+#endif
+
+/* Make sure we can test for ARM just checking for __arm__, since sometimes
+ * __arm is defined but __arm__ is not. */
+#if defined(__arm) && !defined(__arm__)
+#define __arm__
+#endif
+#if defined (__aarch64__) && !defined(__arm64__)
+#define __arm64__
+#endif
+
+/* Make sure we can test for SPARC just checking for __sparc__. */
+#if defined(__sparc) && !defined(__sparc__)
+#define __sparc__
+#endif
+
+#if defined(__sparc__) || defined(__arm__)
+#define USE_ALIGNED_ACCESS
#endif
#endif
diff --git a/src/crc16.c b/src/crc16.c
index 1ec9161c9..7b8c1dad0 100644
--- a/src/crc16.c
+++ b/src/crc16.c
@@ -1,4 +1,4 @@
-#include "redis.h"
+#include "server.h"
/*
* Copyright 2001-2010 Georges Menie (www.menie.org)
diff --git a/src/crc64.c b/src/crc64.c
index ecdba90e0..f1f764922 100644
--- a/src/crc64.c
+++ b/src/crc64.c
@@ -181,9 +181,13 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
}
/* Test main */
-#ifdef TEST_MAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+
+#define UNUSED(x) (void)(x)
+int crc64Test(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
printf("e9c6d914c4b8d9ca == %016llx\n",
(unsigned long long) crc64(0,(unsigned char*)"123456789",9));
return 0;
diff --git a/src/crc64.h b/src/crc64.h
index ab375d3f4..c9fca519d 100644
--- a/src/crc64.h
+++ b/src/crc64.h
@@ -5,4 +5,8 @@
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+#ifdef REDIS_TEST
+int crc64Test(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/db.c b/src/db.c
index c83ab2ee6..7d1504d30 100644
--- a/src/db.c
+++ b/src/db.c
@@ -27,21 +27,21 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
+#include "atomicvar.h"
#include <signal.h>
#include <ctype.h>
-void slotToKeyAdd(robj *key);
-void slotToKeyDel(robj *key);
-void slotToKeyFlush(void);
-
/*-----------------------------------------------------------------------------
* C-level DB API
*----------------------------------------------------------------------------*/
-robj *lookupKey(redisDb *db, robj *key) {
+/* Low level key lookup API, not actually called directly from commands
+ * implementations that should instead rely on lookupKeyRead(),
+ * lookupKeyWrite() and lookupKeyReadWithFlags(). */
+robj *lookupKey(redisDb *db, robj *key, int flags) {
dictEntry *de = dictFind(db->dict,key->ptr);
if (de) {
robj *val = dictGetVal(de);
@@ -49,19 +49,75 @@ robj *lookupKey(redisDb *db, robj *key) {
/* Update the access time for the ageing algorithm.
* Don't do it if we have a saving child, as this will trigger
* a copy on write madness. */
- if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
- val->lru = LRU_CLOCK();
+ if (server.rdb_child_pid == -1 &&
+ server.aof_child_pid == -1 &&
+ !(flags & LOOKUP_NOTOUCH))
+ {
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ unsigned long ldt = val->lru >> 8;
+ unsigned long counter = LFULogIncr(val->lru & 255);
+ val->lru = (ldt << 8) | counter;
+ } else {
+ val->lru = LRU_CLOCK();
+ }
+ }
return val;
} else {
return NULL;
}
}
-robj *lookupKeyRead(redisDb *db, robj *key) {
+/* Lookup a key for read operations, or return NULL if the key is not found
+ * in the specified DB.
+ *
+ * As a side effect of calling this function:
+ * 1. A key gets expired if it reached it's TTL.
+ * 2. The key last access time is updated.
+ * 3. The global keys hits/misses stats are updated (reported in INFO).
+ *
+ * This API should not be used when we write to the key after obtaining
+ * the object linked to the key, but only for read only operations.
+ *
+ * Flags change the behavior of this command:
+ *
+ * LOOKUP_NONE (or zero): no special flags are passed.
+ * LOOKUP_NOTOUCH: don't alter the last access time of the key.
+ *
+ * Note: this function also returns NULL is the key is logically expired
+ * but still existing, in case this is a slave, since this API is called only
+ * for read operations. Even if the key expiry is master-driven, we can
+ * correctly report a key is expired on slaves even if the master is lagging
+ * expiring our key via DELs in the replication link. */
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
robj *val;
- expireIfNeeded(db,key);
- val = lookupKey(db,key);
+ if (expireIfNeeded(db,key) == 1) {
+ /* Key expired. If we are in the context of a master, expireIfNeeded()
+ * returns 0 only when the key does not exist at all, so it's safe
+ * to return NULL ASAP. */
+ if (server.masterhost == NULL) return NULL;
+
+ /* However if we are in the context of a slave, expireIfNeeded() will
+ * not really try to expire the key, it only returns information
+ * about the "logical" status of the key: key expiring is up to the
+ * master in order to have a consistent view of master's data set.
+ *
+ * However, if the command caller is not the master, and as additional
+ * safety measure, the command invoked is a read-only command, we can
+ * safely return NULL here, and provide a more consistent behavior
+ * to clients accessign expired values in a read-only fashion, that
+ * will say the key as non exisitng.
+ *
+ * Notably this covers GETs when slaves are used to scale reads. */
+ if (server.current_client &&
+ server.current_client != server.master &&
+ server.current_client->cmd &&
+ server.current_client->cmd->flags & CMD_READONLY)
+ {
+ return NULL;
+ }
+ }
+ val = lookupKey(db,key,flags);
if (val == NULL)
server.stat_keyspace_misses++;
else
@@ -69,18 +125,29 @@ robj *lookupKeyRead(redisDb *db, robj *key) {
return val;
}
+/* Like lookupKeyReadWithFlags(), but does not use any flag, which is the
+ * common case. */
+robj *lookupKeyRead(redisDb *db, robj *key) {
+ return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
+}
+
+/* Lookup a key for write operations, and as a side effect, if needed, expires
+ * the key if its TTL is reached.
+ *
+ * Returns the linked value object if the key exists or NULL if the key
+ * does not exist in the specified DB. */
robj *lookupKeyWrite(redisDb *db, robj *key) {
expireIfNeeded(db,key);
- return lookupKey(db,key);
+ return lookupKey(db,key,LOOKUP_NONE);
}
-robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyRead(c->db, key);
if (!o) addReply(c,reply);
return o;
}
-robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyWrite(c->db, key);
if (!o) addReply(c,reply);
return o;
@@ -94,8 +161,8 @@ void dbAdd(redisDb *db, robj *key, robj *val) {
sds copy = sdsdup(key->ptr);
int retval = dictAdd(db->dict, copy, val);
- redisAssertWithInfo(NULL,key,retval == REDIS_OK);
- if (val->type == REDIS_LIST) signalListAsReady(db, key);
+ serverAssertWithInfo(NULL,key,retval == DICT_OK);
+ if (val->type == OBJ_LIST) signalListAsReady(db, key);
if (server.cluster_enabled) slotToKeyAdd(key);
}
@@ -107,8 +174,15 @@ void dbAdd(redisDb *db, robj *key, robj *val) {
void dbOverwrite(redisDb *db, robj *key, robj *val) {
dictEntry *de = dictFind(db->dict,key->ptr);
- redisAssertWithInfo(NULL,key,de != NULL);
- dictReplace(db->dict, key->ptr, val);
+ serverAssertWithInfo(NULL,key,de != NULL);
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ robj *old = dictGetVal(de);
+ int saved_lru = old->lru;
+ dictReplace(db->dict, key->ptr, val);
+ val->lru = saved_lru;
+ } else {
+ dictReplace(db->dict, key->ptr, val);
+ }
}
/* High level Set operation. This function can be used in order to set
@@ -116,7 +190,9 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) {
*
* 1) The ref count of the value object is incremented.
* 2) clients WATCHing for the destination key notified.
- * 3) The expire time of the key is reset (the key is made persistent). */
+ * 3) The expire time of the key is reset (the key is made persistent).
+ *
+ * All the new keys in the database should be craeted via this interface. */
void setKey(redisDb *db, robj *key, robj *val) {
if (lookupKeyWrite(db,key) == NULL) {
dbAdd(db,key,val);
@@ -159,7 +235,7 @@ robj *dbRandomKey(redisDb *db) {
}
/* Delete a key, value, and associated expiration entry if any, from the DB */
-int dbDelete(redisDb *db, robj *key) {
+int dbSyncDelete(redisDb *db, robj *key) {
/* Deleting an entry from the expires dict will not free the sds of
* the key, because it is shared with the main dictionary. */
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
@@ -171,6 +247,13 @@ int dbDelete(redisDb *db, robj *key) {
}
}
+/* This is a wrapper whose behavior depends on the Redis lazy free
+ * configuration. Deletes the key synchronously or asynchronously. */
+int dbDelete(redisDb *db, robj *key) {
+ return server.lazyfree_lazy_server_del ? dbAsyncDelete(db,key) :
+ dbSyncDelete(db,key);
+}
+
/* Prepare the string object stored at 'key' to be modified destructively
* to implement commands like SETBIT or APPEND.
*
@@ -192,15 +275,15 @@ int dbDelete(redisDb *db, robj *key) {
* in 'db', the usage pattern looks like this:
*
* o = lookupKeyWrite(db,key);
- * if (checkType(c,o,REDIS_STRING)) return;
+ * if (checkType(c,o,OBJ_STRING)) return;
* o = dbUnshareStringValue(db,key,o);
*
* At this point the caller is ready to modify the object, for example
* using an sdscat() call to append some data, or anything else.
*/
robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
- redisAssert(o->type == REDIS_STRING);
- if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
+ serverAssert(o->type == OBJ_STRING);
+ if (o->refcount != 1 || o->encoding != OBJ_ENCODING_RAW) {
robj *decoded = getDecodedObject(o);
o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
decrRefCount(decoded);
@@ -209,24 +292,55 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
return o;
}
-long long emptyDb(void(callback)(void*)) {
- int j;
+/* Remove all keys from all the databases in a Redis server.
+ * If callback is given the function is called from time to time to
+ * signal that work is in progress.
+ *
+ * The dbnum can be -1 if all teh DBs should be flushed, or the specified
+ * DB number if we want to flush only a single Redis database number.
+ *
+ * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
+ * EMPTYDB_ASYNC if we want the memory to be freed in a different thread
+ * and the function to return ASAP.
+ *
+ * On success the fuction returns the number of keys removed from the
+ * database(s). Otherwise -1 is returned in the specific case the
+ * DB number is out of range, and errno is set to EINVAL. */
+long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
+ int j, async = (flags & EMPTYDB_ASYNC);
long long removed = 0;
+ if (dbnum < -1 || dbnum >= server.dbnum) {
+ errno = EINVAL;
+ return -1;
+ }
+
for (j = 0; j < server.dbnum; j++) {
+ if (dbnum != -1 && dbnum != j) continue;
removed += dictSize(server.db[j].dict);
- dictEmpty(server.db[j].dict,callback);
- dictEmpty(server.db[j].expires,callback);
+ if (async) {
+ emptyDbAsync(&server.db[j]);
+ } else {
+ dictEmpty(server.db[j].dict,callback);
+ dictEmpty(server.db[j].expires,callback);
+ }
+ }
+ if (server.cluster_enabled) {
+ if (async) {
+ slotToKeyFlushAsync();
+ } else {
+ slotToKeyFlush();
+ }
}
- if (server.cluster_enabled) slotToKeyFlush();
+ if (dbnum == -1) flushSlaveKeysWithExpireList();
return removed;
}
-int selectDb(redisClient *c, int id) {
+int selectDb(client *c, int id) {
if (id < 0 || id >= server.dbnum)
- return REDIS_ERR;
+ return C_ERR;
c->db = &server.db[id];
- return REDIS_OK;
+ return C_OK;
}
/*-----------------------------------------------------------------------------
@@ -250,18 +364,49 @@ void signalFlushedDb(int dbid) {
* Type agnostic commands operating on the key space
*----------------------------------------------------------------------------*/
-void flushdbCommand(redisClient *c) {
- server.dirty += dictSize(c->db->dict);
+/* Return the set of flags to use for the emptyDb() call for FLUSHALL
+ * and FLUSHDB commands.
+ *
+ * Currently the command just attempts to parse the "ASYNC" option. It
+ * also checks if the command arity is wrong.
+ *
+ * On success C_OK is returned and the flags are stored in *flags, otherwise
+ * C_ERR is returned and the function sends an error to the client. */
+int getFlushCommandFlags(client *c, int *flags) {
+ /* Parse the optional ASYNC option. */
+ if (c->argc > 1) {
+ if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
+ addReply(c,shared.syntaxerr);
+ return C_ERR;
+ }
+ *flags = EMPTYDB_ASYNC;
+ } else {
+ *flags = EMPTYDB_NO_FLAGS;
+ }
+ return C_OK;
+}
+
+/* FLUSHDB [ASYNC]
+ *
+ * Flushes the currently SELECTed Redis DB. */
+void flushdbCommand(client *c) {
+ int flags;
+
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
signalFlushedDb(c->db->id);
- dictEmpty(c->db->dict,NULL);
- dictEmpty(c->db->expires,NULL);
- if (server.cluster_enabled) slotToKeyFlush();
+ server.dirty += emptyDb(c->db->id,flags,NULL);
addReply(c,shared.ok);
}
-void flushallCommand(redisClient *c) {
+/* FLUSHALL [ASYNC]
+ *
+ * Flushes the whole server data set. */
+void flushallCommand(client *c) {
+ int flags;
+
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
signalFlushedDb(-1);
- server.dirty += emptyDb(NULL);
+ server.dirty += emptyDb(-1,flags,NULL);
addReply(c,shared.ok);
if (server.rdb_child_pid != -1) {
kill(server.rdb_child_pid,SIGUSR1);
@@ -271,56 +416,71 @@ void flushallCommand(redisClient *c) {
/* Normally rdbSave() will reset dirty, but we don't want this here
* as otherwise FLUSHALL will not be replicated nor put into the AOF. */
int saved_dirty = server.dirty;
- rdbSave(server.rdb_filename);
+ rdbSave(server.rdb_filename,NULL);
server.dirty = saved_dirty;
}
server.dirty++;
}
-void delCommand(redisClient *c) {
- int deleted = 0, j;
+/* This command implements DEL and LAZYDEL. */
+void delGenericCommand(client *c, int lazy) {
+ int numdel = 0, j;
for (j = 1; j < c->argc; j++) {
expireIfNeeded(c->db,c->argv[j]);
- if (dbDelete(c->db,c->argv[j])) {
+ int deleted = lazy ? dbAsyncDelete(c->db,c->argv[j]) :
+ dbSyncDelete(c->db,c->argv[j]);
+ if (deleted) {
signalModifiedKey(c->db,c->argv[j]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,
+ notifyKeyspaceEvent(NOTIFY_GENERIC,
"del",c->argv[j],c->db->id);
server.dirty++;
- deleted++;
+ numdel++;
}
}
- addReplyLongLong(c,deleted);
+ addReplyLongLong(c,numdel);
}
-void existsCommand(redisClient *c) {
- expireIfNeeded(c->db,c->argv[1]);
- if (dbExists(c->db,c->argv[1])) {
- addReply(c, shared.cone);
- } else {
- addReply(c, shared.czero);
+void delCommand(client *c) {
+ delGenericCommand(c,0);
+}
+
+void unlinkCommand(client *c) {
+ delGenericCommand(c,1);
+}
+
+/* EXISTS key1 key2 ... key_N.
+ * Return value is the number of keys existing. */
+void existsCommand(client *c) {
+ long long count = 0;
+ int j;
+
+ for (j = 1; j < c->argc; j++) {
+ expireIfNeeded(c->db,c->argv[j]);
+ if (dbExists(c->db,c->argv[j])) count++;
}
+ addReplyLongLong(c,count);
}
-void selectCommand(redisClient *c) {
+void selectCommand(client *c) {
long id;
if (getLongFromObjectOrReply(c, c->argv[1], &id,
- "invalid DB index") != REDIS_OK)
+ "invalid DB index") != C_OK)
return;
if (server.cluster_enabled && id != 0) {
addReplyError(c,"SELECT is not allowed in cluster mode");
return;
}
- if (selectDb(c,id) == REDIS_ERR) {
- addReplyError(c,"invalid DB index");
+ if (selectDb(c,id) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
} else {
addReply(c,shared.ok);
}
}
-void randomkeyCommand(redisClient *c) {
+void randomkeyCommand(client *c) {
robj *key;
if ((key = dbRandomKey(c->db)) == NULL) {
@@ -332,7 +492,7 @@ void randomkeyCommand(redisClient *c) {
decrRefCount(key);
}
-void keysCommand(redisClient *c) {
+void keysCommand(client *c) {
dictIterator *di;
dictEntry *de;
sds pattern = c->argv[1]->ptr;
@@ -370,20 +530,20 @@ void scanCallback(void *privdata, const dictEntry *de) {
if (o == NULL) {
sds sdskey = dictGetKey(de);
key = createStringObject(sdskey, sdslen(sdskey));
- } else if (o->type == REDIS_SET) {
- key = dictGetKey(de);
- incrRefCount(key);
- } else if (o->type == REDIS_HASH) {
- key = dictGetKey(de);
- incrRefCount(key);
- val = dictGetVal(de);
- incrRefCount(val);
- } else if (o->type == REDIS_ZSET) {
- key = dictGetKey(de);
- incrRefCount(key);
- val = createStringObjectFromLongDouble(*(double*)dictGetVal(de));
+ } else if (o->type == OBJ_SET) {
+ sds keysds = dictGetKey(de);
+ key = createStringObject(keysds,sdslen(keysds));
+ } else if (o->type == OBJ_HASH) {
+ sds sdskey = dictGetKey(de);
+ sds sdsval = dictGetVal(de);
+ key = createStringObject(sdskey,sdslen(sdskey));
+ val = createStringObject(sdsval,sdslen(sdsval));
+ } else if (o->type == OBJ_ZSET) {
+ sds sdskey = dictGetKey(de);
+ key = createStringObject(sdskey,sdslen(sdskey));
+ val = createStringObjectFromLongDouble(*(double*)dictGetVal(de),0);
} else {
- redisPanic("Type not handled in SCAN callback.");
+ serverPanic("Type not handled in SCAN callback.");
}
listAddNodeTail(keys, key);
@@ -392,9 +552,9 @@ void scanCallback(void *privdata, const dictEntry *de) {
/* Try to parse a SCAN cursor stored at object 'o':
* if the cursor is valid, store it as unsigned integer into *cursor and
- * returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the
+ * returns C_OK. Otherwise return C_ERR and send an error to the
* client. */
-int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
char *eptr;
/* Use strtoul() because we need an *unsigned* long, so
@@ -404,9 +564,9 @@ int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
{
addReplyError(c, "invalid cursor");
- return REDIS_ERR;
+ return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* This command implements SCAN, HSCAN and SSCAN commands.
@@ -420,21 +580,19 @@ int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
*
* In the case of a Hash object the function returns both the field and value
* of every element on the Hash. */
-void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
- int rv;
+void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
int i, j;
- char buf[REDIS_LONGSTR_SIZE];
list *keys = listCreate();
listNode *node, *nextnode;
long count = 10;
- sds pat;
- int patlen, use_pattern = 0;
+ sds pat = NULL;
+ int patlen = 0, use_pattern = 0;
dict *ht;
/* Object must be NULL (to iterate keys names), or the type of the object
* must be Set, Sorted Set, or Hash. */
- redisAssert(o == NULL || o->type == REDIS_SET || o->type == REDIS_HASH ||
- o->type == REDIS_ZSET);
+ serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH ||
+ o->type == OBJ_ZSET);
/* Set i to the first option argument. The previous one is the cursor. */
i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */
@@ -444,7 +602,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
j = c->argc - i;
if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) {
if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL)
- != REDIS_OK)
+ != C_OK)
{
goto cleanup;
}
@@ -482,12 +640,12 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
ht = NULL;
if (o == NULL) {
ht = c->db->dict;
- } else if (o->type == REDIS_SET && o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
- } else if (o->type == REDIS_HASH && o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
count *= 2; /* We return key / value for this type. */
- } else if (o->type == REDIS_ZSET && o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
ht = zs->dict;
count *= 2; /* We return key / value for this type. */
@@ -495,6 +653,11 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
if (ht) {
void *privdata[2];
+ /* We set the max number of iterations to ten times the specified
+ * COUNT, so if the hash table is in a pathological state (very
+ * sparsely populated) we avoid to block too much time at the cost
+ * of returning no or very few elements. */
+ long maxiterations = count*10;
/* We pass two pointers to the callback: the list to which it will
* add new elements, and the object containing the dictionary so that
@@ -502,16 +665,18 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
privdata[0] = keys;
privdata[1] = o;
do {
- cursor = dictScan(ht, cursor, scanCallback, privdata);
- } while (cursor && listLength(keys) < count);
- } else if (o->type == REDIS_SET) {
+ cursor = dictScan(ht, cursor, scanCallback, NULL, privdata);
+ } while (cursor &&
+ maxiterations-- &&
+ listLength(keys) < (unsigned long)count);
+ } else if (o->type == OBJ_SET) {
int pos = 0;
int64_t ll;
while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
- } else if (o->type == REDIS_HASH || o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
unsigned char *p = ziplistIndex(o->ptr,0);
unsigned char *vstr;
unsigned int vlen;
@@ -526,7 +691,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
}
cursor = 0;
} else {
- redisPanic("Not handled encoding in SCAN.");
+ serverPanic("Not handled encoding in SCAN.");
}
/* Step 3: Filter elements. */
@@ -542,10 +707,10 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
filter = 1;
} else {
- char buf[REDIS_LONGSTR_SIZE];
+ char buf[LONG_STR_SIZE];
int len;
- redisAssert(kobj->encoding == REDIS_ENCODING_INT);
+ serverAssert(kobj->encoding == OBJ_ENCODING_INT);
len = ll2string(buf,sizeof(buf),(long)kobj->ptr);
if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1;
}
@@ -563,7 +728,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
/* If this is a hash or a sorted set, we have a flat list of
* key-value elements, so if this element was filtered, remove the
* value, or skip it if it was not filtered: we only match keys. */
- if (o && (o->type == REDIS_ZSET || o->type == REDIS_HASH)) {
+ if (o && (o->type == OBJ_ZSET || o->type == OBJ_HASH)) {
node = nextnode;
nextnode = listNextNode(node);
if (filter) {
@@ -577,9 +742,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
/* Step 4: Reply to the client. */
addReplyMultiBulkLen(c, 2);
- rv = snprintf(buf, sizeof(buf), "%lu", cursor);
- redisAssert(rv < sizeof(buf));
- addReplyBulkCBuffer(c, buf, rv);
+ addReplyBulkLongLong(c,cursor);
addReplyMultiBulkLen(c, listLength(keys));
while ((node = listFirst(keys)) != NULL) {
@@ -595,41 +758,45 @@ cleanup:
}
/* The SCAN command completely relies on scanGenericCommand. */
-void scanCommand(redisClient *c) {
+void scanCommand(client *c) {
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[1],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
scanGenericCommand(c,NULL,cursor);
}
-void dbsizeCommand(redisClient *c) {
+void dbsizeCommand(client *c) {
addReplyLongLong(c,dictSize(c->db->dict));
}
-void lastsaveCommand(redisClient *c) {
+void lastsaveCommand(client *c) {
addReplyLongLong(c,server.lastsave);
}
-void typeCommand(redisClient *c) {
+void typeCommand(client *c) {
robj *o;
char *type;
- o = lookupKeyRead(c->db,c->argv[1]);
+ o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
if (o == NULL) {
type = "none";
} else {
switch(o->type) {
- case REDIS_STRING: type = "string"; break;
- case REDIS_LIST: type = "list"; break;
- case REDIS_SET: type = "set"; break;
- case REDIS_ZSET: type = "zset"; break;
- case REDIS_HASH: type = "hash"; break;
+ case OBJ_STRING: type = "string"; break;
+ case OBJ_LIST: type = "list"; break;
+ case OBJ_SET: type = "set"; break;
+ case OBJ_ZSET: type = "zset"; break;
+ case OBJ_HASH: type = "hash"; break;
+ case OBJ_MODULE: {
+ moduleValue *mv = o->ptr;
+ type = mv->type->name;
+ }; break;
default: type = "unknown"; break;
}
}
addReplyStatus(c,type);
}
-void shutdownCommand(redisClient *c) {
+void shutdownCommand(client *c) {
int flags = 0;
if (c->argc > 2) {
@@ -637,9 +804,9 @@ void shutdownCommand(redisClient *c) {
return;
} else if (c->argc == 2) {
if (!strcasecmp(c->argv[1]->ptr,"nosave")) {
- flags |= REDIS_SHUTDOWN_NOSAVE;
+ flags |= SHUTDOWN_NOSAVE;
} else if (!strcasecmp(c->argv[1]->ptr,"save")) {
- flags |= REDIS_SHUTDOWN_SAVE;
+ flags |= SHUTDOWN_SAVE;
} else {
addReply(c,shared.syntaxerr);
return;
@@ -652,24 +819,28 @@ void shutdownCommand(redisClient *c) {
*
* Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
if (server.loading || server.sentinel_mode)
- flags = (flags & ~REDIS_SHUTDOWN_SAVE) | REDIS_SHUTDOWN_NOSAVE;
- if (prepareForShutdown(flags) == REDIS_OK) exit(0);
+ flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
+ if (prepareForShutdown(flags) == C_OK) exit(0);
addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
}
-void renameGenericCommand(redisClient *c, int nx) {
+void renameGenericCommand(client *c, int nx) {
robj *o;
long long expire;
+ int samekey = 0;
- /* To use the same key as src and dst is probably an error */
- if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
- addReply(c,shared.sameobjecterr);
- return;
- }
+ /* When source and dest key is the same, no operation is performed,
+ * if the key exists, however we still return an error on unexisting key. */
+ if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
return;
+ if (samekey) {
+ addReply(c,nx ? shared.czero : shared.ok);
+ return;
+ }
+
incrRefCount(o);
expire = getExpire(c->db,c->argv[1]);
if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
@@ -683,30 +854,31 @@ void renameGenericCommand(redisClient *c, int nx) {
dbDelete(c->db,c->argv[2]);
}
dbAdd(c->db,c->argv[2],o);
- if (expire != -1) setExpire(c->db,c->argv[2],expire);
+ if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
dbDelete(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[2]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"rename_from",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
c->argv[1],c->db->id);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"rename_to",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to",
c->argv[2],c->db->id);
server.dirty++;
addReply(c,nx ? shared.cone : shared.ok);
}
-void renameCommand(redisClient *c) {
+void renameCommand(client *c) {
renameGenericCommand(c,0);
}
-void renamenxCommand(redisClient *c) {
+void renamenxCommand(client *c) {
renameGenericCommand(c,1);
}
-void moveCommand(redisClient *c) {
+void moveCommand(client *c) {
robj *o;
redisDb *src, *dst;
int srcid;
+ long long dbid, expire;
if (server.cluster_enabled) {
addReplyError(c,"MOVE is not allowed in cluster mode");
@@ -716,7 +888,11 @@ void moveCommand(redisClient *c) {
/* Obtain source and target DB pointers */
src = c->db;
srcid = c->db->id;
- if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
+
+ if (getLongLongFromObject(c->argv[2],&dbid) == C_ERR ||
+ dbid < INT_MIN || dbid > INT_MAX ||
+ selectDb(c,dbid) == C_ERR)
+ {
addReply(c,shared.outofrangeerr);
return;
}
@@ -736,6 +912,7 @@ void moveCommand(redisClient *c) {
addReply(c,shared.czero);
return;
}
+ expire = getExpire(c->db,c->argv[1]);
/* Return zero if the key already exists in the target DB */
if (lookupKeyWrite(dst,c->argv[1]) != NULL) {
@@ -743,6 +920,7 @@ void moveCommand(redisClient *c) {
return;
}
dbAdd(dst,c->argv[1],o);
+ if (expire != -1) setExpire(c,dst,c->argv[1],expire);
incrRefCount(o);
/* OK! key moved, free the entry in the source DB */
@@ -751,6 +929,91 @@ void moveCommand(redisClient *c) {
addReply(c,shared.cone);
}
+/* Helper function for dbSwapDatabases(): scans the list of keys that have
+ * one or more blocked clients for B[LR]POP or other list blocking commands
+ * and signal the keys are ready if they are lists. See the comment where
+ * the function is used for more info. */
+void scanDatabaseForReadyLists(redisDb *db) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(db->blocking_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ robj *value = lookupKey(db,key,LOOKUP_NOTOUCH);
+ if (value && value->type == OBJ_LIST)
+ signalListAsReady(db, key);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Swap two databases at runtime so that all clients will magically see
+ * the new database even if already connected. Note that the client
+ * structure c->db points to a given DB, so we need to be smarter and
+ * swap the underlying referenced structures, otherwise we would need
+ * to fix all the references to the Redis DB structure.
+ *
+ * Returns C_ERR if at least one of the DB ids are out of range, otherwise
+ * C_OK is returned. */
+int dbSwapDatabases(int id1, int id2) {
+ if (id1 < 0 || id1 >= server.dbnum ||
+ id2 < 0 || id2 >= server.dbnum) return C_ERR;
+ if (id1 == id2) return C_OK;
+ redisDb aux = server.db[id1];
+ redisDb *db1 = &server.db[id1], *db2 = &server.db[id2];
+
+ /* Swap hash tables. Note that we don't swap blocking_keys,
+ * ready_keys and watched_keys, since we want clients to
+ * remain in the same DB they were. */
+ db1->dict = db2->dict;
+ db1->expires = db2->expires;
+ db1->avg_ttl = db2->avg_ttl;
+
+ db2->dict = aux.dict;
+ db2->expires = aux.expires;
+ db2->avg_ttl = aux.avg_ttl;
+
+ /* Now we need to handle clients blocked on lists: as an effect
+ * of swapping the two DBs, a client that was waiting for list
+ * X in a given DB, may now actually be unblocked if X happens
+ * to exist in the new version of the DB, after the swap.
+ *
+ * However normally we only do this check for efficiency reasons
+ * in dbAdd() when a list is created. So here we need to rescan
+ * the list of clients blocked on lists and signal lists as ready
+ * if needed. */
+ scanDatabaseForReadyLists(db1);
+ scanDatabaseForReadyLists(db2);
+ return C_OK;
+}
+
+/* SWAPDB db1 db2 */
+void swapdbCommand(client *c) {
+ long id1, id2;
+
+ /* Not allowed in cluster mode: we have just DB 0 there. */
+ if (server.cluster_enabled) {
+ addReplyError(c,"SWAPDB is not allowed in cluster mode");
+ return;
+ }
+
+ /* Get the two DBs indexes. */
+ if (getLongFromObjectOrReply(c, c->argv[1], &id1,
+ "invalid first DB index") != C_OK)
+ return;
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &id2,
+ "invalid second DB index") != C_OK)
+ return;
+
+ /* Swap... */
+ if (dbSwapDatabases(id1,id2) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ return;
+ } else {
+ server.dirty++;
+ addReply(c,shared.ok);
+ }
+}
+
/*-----------------------------------------------------------------------------
* Expires API
*----------------------------------------------------------------------------*/
@@ -758,18 +1021,26 @@ void moveCommand(redisClient *c) {
int removeExpire(redisDb *db, robj *key) {
/* An expire may only be removed if there is a corresponding entry in the
* main dict. Otherwise, the key will never be freed. */
- redisAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
+ serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
return dictDelete(db->expires,key->ptr) == DICT_OK;
}
-void setExpire(redisDb *db, robj *key, long long when) {
+/* Set an expire to the specified key. If the expire is set in the context
+ * of an user calling a command 'c' is the client, otherwise 'c' is set
+ * to NULL. The 'when' parameter is the absolute unix time in milliseconds
+ * after which the key will no longer be considered valid. */
+void setExpire(client *c, redisDb *db, robj *key, long long when) {
dictEntry *kde, *de;
/* Reuse the sds from the main dict in the expire dict */
kde = dictFind(db->dict,key->ptr);
- redisAssertWithInfo(NULL,key,kde != NULL);
- de = dictReplaceRaw(db->expires,dictGetKey(kde));
+ serverAssertWithInfo(NULL,key,kde != NULL);
+ de = dictAddOrFind(db->expires,dictGetKey(kde));
dictSetSignedIntegerVal(de,when);
+
+ int writable_slave = server.masterhost && server.repl_slave_ro == 0;
+ if (c && writable_slave && !(c->flags & CLIENT_MASTER))
+ rememberSlaveKeyWithExpire(db,key);
}
/* Return the expire time of the specified key, or -1 if no expire
@@ -783,7 +1054,7 @@ long long getExpire(redisDb *db, robj *key) {
/* The entry was found in the expire dict, this means it should also
* be present in the main dict (safety check). */
- redisAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
+ serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
return dictGetSignedIntegerVal(de);
}
@@ -795,15 +1066,15 @@ long long getExpire(redisDb *db, robj *key) {
* AOF and the master->slave link guarantee operation ordering, everything
* will be consistent even if we allow write operations against expiring
* keys. */
-void propagateExpire(redisDb *db, robj *key) {
+void propagateExpire(redisDb *db, robj *key, int lazy) {
robj *argv[2];
- argv[0] = shared.del;
+ argv[0] = lazy ? shared.unlink : shared.del;
argv[1] = key;
incrRefCount(argv[0]);
incrRefCount(argv[1]);
- if (server.aof_state != REDIS_AOF_OFF)
+ if (server.aof_state != AOF_OFF)
feedAppendOnlyFile(server.delCommand,db->id,argv,2);
replicationFeedSlaves(server.slaves,db->id,argv,2);
@@ -841,129 +1112,11 @@ int expireIfNeeded(redisDb *db, robj *key) {
/* Delete the key */
server.stat_expiredkeys++;
- propagateExpire(db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_EXPIRED,
+ propagateExpire(db,key,server.lazyfree_lazy_expire);
+ notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",key,db->id);
- return dbDelete(db,key);
-}
-
-/*-----------------------------------------------------------------------------
- * Expires Commands
- *----------------------------------------------------------------------------*/
-
-/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
- * and PEXPIREAT. Because the commad second argument may be relative or absolute
- * the "basetime" argument is used to signal what the base time is (either 0
- * for *AT variants of the command, or the current time for relative expires).
- *
- * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
- * the argv[2] parameter. The basetime is always specified in milliseconds. */
-void expireGenericCommand(redisClient *c, long long basetime, int unit) {
- robj *key = c->argv[1], *param = c->argv[2];
- long long when; /* unix time in milliseconds when the key will expire. */
-
- if (getLongLongFromObjectOrReply(c, param, &when, NULL) != REDIS_OK)
- return;
-
- if (unit == UNIT_SECONDS) when *= 1000;
- when += basetime;
-
- /* No key, return zero. */
- if (lookupKeyRead(c->db,key) == NULL) {
- addReply(c,shared.czero);
- return;
- }
-
- /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
- * should never be executed as a DEL when load the AOF or in the context
- * of a slave instance.
- *
- * Instead we take the other branch of the IF statement setting an expire
- * (possibly in the past) and wait for an explicit DEL from the master. */
- if (when <= mstime() && !server.loading && !server.masterhost) {
- robj *aux;
-
- redisAssertWithInfo(c,key,dbDelete(c->db,key));
- server.dirty++;
-
- /* Replicate/AOF this as an explicit DEL. */
- aux = createStringObject("DEL",3);
- rewriteClientCommandVector(c,2,aux,key);
- decrRefCount(aux);
- signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
- addReply(c, shared.cone);
- return;
- } else {
- setExpire(c->db,key,when);
- addReply(c,shared.cone);
- signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"expire",key,c->db->id);
- server.dirty++;
- return;
- }
-}
-
-void expireCommand(redisClient *c) {
- expireGenericCommand(c,mstime(),UNIT_SECONDS);
-}
-
-void expireatCommand(redisClient *c) {
- expireGenericCommand(c,0,UNIT_SECONDS);
-}
-
-void pexpireCommand(redisClient *c) {
- expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
-}
-
-void pexpireatCommand(redisClient *c) {
- expireGenericCommand(c,0,UNIT_MILLISECONDS);
-}
-
-void ttlGenericCommand(redisClient *c, int output_ms) {
- long long expire, ttl = -1;
-
- /* If the key does not exist at all, return -2 */
- if (lookupKeyRead(c->db,c->argv[1]) == NULL) {
- addReplyLongLong(c,-2);
- return;
- }
- /* The key exists. Return -1 if it has no expire, or the actual
- * TTL value otherwise. */
- expire = getExpire(c->db,c->argv[1]);
- if (expire != -1) {
- ttl = expire-mstime();
- if (ttl < 0) ttl = 0;
- }
- if (ttl == -1) {
- addReplyLongLong(c,-1);
- } else {
- addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
- }
-}
-
-void ttlCommand(redisClient *c) {
- ttlGenericCommand(c, 0);
-}
-
-void pttlCommand(redisClient *c) {
- ttlGenericCommand(c, 1);
-}
-
-void persistCommand(redisClient *c) {
- dictEntry *de;
-
- de = dictFind(c->db->dict,c->argv[1]->ptr);
- if (de == NULL) {
- addReply(c,shared.czero);
- } else {
- if (removeExpire(c->db,c->argv[1])) {
- addReply(c,shared.cone);
- server.dirty++;
- } else {
- addReply(c,shared.czero);
- }
- }
+ return server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
+ dbSyncDelete(db,key);
}
/* -----------------------------------------------------------------------------
@@ -974,17 +1127,30 @@ void persistCommand(redisClient *c) {
* (firstkey, lastkey, step). */
int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) {
int j, i = 0, last, *keys;
- REDIS_NOTUSED(argv);
+ UNUSED(argv);
if (cmd->firstkey == 0) {
*numkeys = 0;
return NULL;
}
+
last = cmd->lastkey;
if (last < 0) last = argc+last;
keys = zmalloc(sizeof(int)*((last - cmd->firstkey)+1));
for (j = cmd->firstkey; j <= last; j += cmd->keystep) {
- redisAssert(j < argc);
+ if (j >= argc) {
+ /* Modules command do not have dispatch time arity checks, so
+ * we need to handle the case where the user passed an invalid
+ * number of arguments here. In this case we return no keys
+ * and expect the module command to report an arity error. */
+ if (cmd->flags & CMD_MODULE) {
+ zfree(keys);
+ *numkeys = 0;
+ return NULL;
+ } else {
+ serverPanic("Redis built-in command declared keys positions not matching the arity requirements.");
+ }
+ }
keys[i++] = j;
}
*numkeys = i;
@@ -1003,7 +1169,9 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in
* This function uses the command table if a command-specific helper function
* is not required, otherwise it calls the command-specific function. */
int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
- if (cmd->getkeys_proc) {
+ if (cmd->flags & CMD_MODULE_GETKEYS) {
+ return moduleGetCommandKeysViaAPI(cmd,argv,argc,numkeys);
+ } else if (!(cmd->flags & CMD_MODULE) && cmd->getkeys_proc) {
return cmd->getkeys_proc(cmd,argv,argc,numkeys);
} else {
return getKeysUsingCommandTable(cmd,argv,argc,numkeys);
@@ -1020,7 +1188,7 @@ void getKeysFreeResult(int *result) {
* ZINTERSTORE <destkey> <num-keys> <key> <key> ... <key> <options> */
int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
- REDIS_NOTUSED(cmd);
+ UNUSED(cmd);
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
@@ -1049,7 +1217,7 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu
* EVALSHA <script> <num-keys> <key> <key> ... <key> [more stuff] */
int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
- REDIS_NOTUSED(cmd);
+ UNUSED(cmd);
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
@@ -1076,8 +1244,8 @@ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
* follow in SQL-alike style. Here we parse just the minimum in order to
* correctly identify keys in the "STORE" option. */
int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
- int i, j, num, *keys;
- REDIS_NOTUSED(cmd);
+ int i, j, num, *keys, found_store = 0;
+ UNUSED(cmd);
num = 0;
keys = zmalloc(sizeof(int)*2); /* Alloc 2 places for the worst case. */
@@ -1107,99 +1275,162 @@ int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
/* Note: we don't increment "num" here and continue the loop
* to be sure to process the *last* "STORE" option if multiple
* ones are provided. This is same behavior as SORT. */
+ found_store = 1;
keys[num] = i+1; /* <store-key> */
break;
}
}
}
+ *numkeys = num + found_store;
+ return keys;
+}
+
+int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ int i, num, first, *keys;
+ UNUSED(cmd);
+
+ /* Assume the obvious form. */
+ first = 3;
+ num = 1;
+
+ /* But check for the extended one with the KEYS option. */
+ if (argc > 6) {
+ for (i = 6; i < argc; i++) {
+ if (!strcasecmp(argv[i]->ptr,"keys") &&
+ sdslen(argv[3]->ptr) == 0)
+ {
+ first = i+1;
+ num = argc-first;
+ break;
+ }
+ }
+ }
+
+ keys = zmalloc(sizeof(int)*num);
+ for (i = 0; i < num; i++) keys[i] = first+i;
*numkeys = num;
return keys;
}
+/* Helper function to extract keys from following commands:
+ * GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count] [STORE key] [STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ... */
+int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ int i, num, *keys;
+ UNUSED(cmd);
+
+ /* Check for the presence of the stored key in the command */
+ int stored_key = -1;
+ for (i = 5; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ /* For the case when user specifies both "store" and "storedist" options, the
+ * second key specified would override the first key. This behavior is kept
+ * the same as in georadiusCommand method.
+ */
+ if ((!strcasecmp(arg, "store") || !strcasecmp(arg, "storedist")) && ((i+1) < argc)) {
+ stored_key = i+1;
+ i++;
+ }
+ }
+ num = 1 + (stored_key == -1 ? 0 : 1);
+
+ /* Keys in the command come from two places:
+ * argv[1] = key,
+ * argv[5...n] = stored key if present
+ */
+ keys = zmalloc(sizeof(int) * num);
+
+ /* Add all key positions to keys[] */
+ keys[0] = 1;
+ if(num > 1) {
+ keys[1] = stored_key;
+ }
+ *numkeys = num;
+ return keys;
+}
+
/* Slot to Key API. This is used by Redis Cluster in order to obtain in
* a fast way a key that belongs to a specified hash slot. This is useful
- * while rehashing the cluster. */
-void slotToKeyAdd(robj *key) {
+ * while rehashing the cluster and in other conditions when we need to
+ * understand if we have keys for a given hash slot. */
+void slotToKeyUpdateKey(robj *key, int add) {
unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr));
+ unsigned char buf[64];
+ unsigned char *indexed = buf;
+ size_t keylen = sdslen(key->ptr);
+
+ server.cluster->slots_keys_count[hashslot] += add ? 1 : -1;
+ if (keylen+2 > 64) indexed = zmalloc(keylen+2);
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ memcpy(indexed+2,key->ptr,keylen);
+ if (add) {
+ raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL,NULL);
+ } else {
+ raxRemove(server.cluster->slots_to_keys,indexed,keylen+2,NULL);
+ }
+ if (indexed != buf) zfree(indexed);
+}
- zslInsert(server.cluster->slots_to_keys,hashslot,key);
- incrRefCount(key);
+void slotToKeyAdd(robj *key) {
+ slotToKeyUpdateKey(key,1);
}
void slotToKeyDel(robj *key) {
- unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr));
-
- zslDelete(server.cluster->slots_to_keys,hashslot,key);
+ slotToKeyUpdateKey(key,0);
}
void slotToKeyFlush(void) {
- zslFree(server.cluster->slots_to_keys);
- server.cluster->slots_to_keys = zslCreate();
+ raxFree(server.cluster->slots_to_keys);
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
}
+/* Pupulate the specified array of objects with keys in the specified slot.
+ * New objects are returned to represent keys, it's up to the caller to
+ * decrement the reference count to release the keys names. */
unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count) {
- zskiplistNode *n;
- zrangespec range;
+ raxIterator iter;
int j = 0;
-
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
-
- n = zslFirstInRange(server.cluster->slots_to_keys, &range);
- while(n && n->score == hashslot && count--) {
- keys[j++] = n->obj;
- n = n->level[0].forward;
+ unsigned char indexed[2];
+
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_keys);
+ raxSeek(&iter,">=",indexed,2);
+ while(count-- && raxNext(&iter)) {
+ if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
+ keys[j++] = createStringObject((char*)iter.key+2,iter.key_len-2);
}
+ raxStop(&iter);
return j;
}
/* Remove all the keys in the specified hash slot.
* The number of removed items is returned. */
unsigned int delKeysInSlot(unsigned int hashslot) {
- zskiplistNode *n;
- zrangespec range;
+ raxIterator iter;
int j = 0;
+ unsigned char indexed[2];
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_keys);
+ while(server.cluster->slots_keys_count[hashslot]) {
+ raxSeek(&iter,">=",indexed,2);
+ raxNext(&iter);
- n = zslFirstInRange(server.cluster->slots_to_keys, &range);
- while(n && n->score == hashslot) {
- robj *key = n->obj;
- n = n->level[0].forward; /* Go to the next item before freeing it. */
- incrRefCount(key); /* Protect the object while freeing it. */
+ robj *key = createStringObject((char*)iter.key+2,iter.key_len-2);
dbDelete(&server.db[0],key);
decrRefCount(key);
j++;
}
+ raxStop(&iter);
return j;
}
unsigned int countKeysInSlot(unsigned int hashslot) {
- zskiplist *zsl = server.cluster->slots_to_keys;
- zskiplistNode *zn;
- zrangespec range;
- int rank, count = 0;
-
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
-
- /* Find first element in range */
- zn = zslFirstInRange(zsl, &range);
-
- /* Use rank of first element, if any, to determine preliminary count */
- if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
- count = (zsl->length - (rank - 1));
-
- /* Find last element in range */
- zn = zslLastInRange(zsl, &range);
-
- /* Use rank of last element, if any, to determine the actual count */
- if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
- count -= (zsl->length - rank);
- }
- }
- return count;
+ return server.cluster->slots_keys_count[hashslot];
}
diff --git a/src/debug.c b/src/debug.c
index 50c9d9b28..d6e12ec2a 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -27,18 +27,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
#include "crc64.h"
#include <arpa/inet.h>
#include <signal.h>
+#include <dlfcn.h>
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#include <ucontext.h>
#include <fcntl.h>
#include "bio.h"
+#include <unistd.h>
#endif /* HAVE_BACKTRACE */
#ifdef __CYGWIN__
@@ -124,7 +126,7 @@ void computeDatasetDigest(unsigned char *final) {
redisDb *db = server.db+j;
if (dictSize(db->dict) == 0) continue;
- di = dictGetIterator(db->dict);
+ di = dictGetSafeIterator(db->dict);
/* hash the DB id, so the same dataset moved in a different
* DB will lead to a different digest */
@@ -150,10 +152,10 @@ void computeDatasetDigest(unsigned char *final) {
expiretime = getExpire(db,keyobj);
/* Save the key and associated value */
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
mixObjectDigest(digest,o);
- } else if (o->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(o,0,REDIS_TAIL);
+ } else if (o->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(o,0,LIST_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
robj *eleobj = listTypeGet(&entry);
@@ -161,18 +163,18 @@ void computeDatasetDigest(unsigned char *final) {
decrRefCount(eleobj);
}
listTypeReleaseIterator(li);
- } else if (o->type == REDIS_SET) {
+ } else if (o->type == OBJ_SET) {
setTypeIterator *si = setTypeInitIterator(o);
- robj *ele;
- while((ele = setTypeNextObject(si)) != NULL) {
- xorObjectDigest(digest,ele);
- decrRefCount(ele);
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ xorDigest(digest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
}
setTypeReleaseIterator(si);
- } else if (o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_ZSET) {
unsigned char eledigest[20];
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = o->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -181,12 +183,12 @@ void computeDatasetDigest(unsigned char *final) {
double score;
eptr = ziplistIndex(zl,0);
- redisAssert(eptr != NULL);
+ serverAssert(eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
while (eptr != NULL) {
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
score = zzlGetScore(sptr);
memset(eledigest,0,20);
@@ -202,45 +204,52 @@ void computeDatasetDigest(unsigned char *final) {
xorDigest(digest,eledigest,20);
zzlNext(zl,&eptr,&sptr);
}
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
dictIterator *di = dictGetIterator(zs->dict);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds sdsele = dictGetKey(de);
double *score = dictGetVal(de);
snprintf(buf,sizeof(buf),"%.17g",*score);
memset(eledigest,0,20);
- mixObjectDigest(eledigest,eleobj);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
mixDigest(eledigest,buf,strlen(buf));
xorDigest(digest,eledigest,20);
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
- } else if (o->type == REDIS_HASH) {
- hashTypeIterator *hi;
- robj *obj;
-
- hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
+ } else if (o->type == OBJ_HASH) {
+ hashTypeIterator *hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
unsigned char eledigest[20];
+ sds sdsele;
memset(eledigest,0,20);
- obj = hashTypeCurrentObject(hi,REDIS_HASH_KEY);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
- obj = hashTypeCurrentObject(hi,REDIS_HASH_VALUE);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
xorDigest(digest,eledigest,20);
}
hashTypeReleaseIterator(hi);
+ } else if (o->type == OBJ_MODULE) {
+ RedisModuleDigest md;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitDigestContext(md);
+ if (mt->digest) {
+ mt->digest(&md,mv->value);
+ xorDigest(digest,md.x,sizeof(md.x));
+ }
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
/* If the key has an expire, add it to the mix */
if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
@@ -252,36 +261,100 @@ void computeDatasetDigest(unsigned char *final) {
}
}
-void debugCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
+void debugCommand(client *c) {
+ if (c->argc == 1) {
+ addReplyError(c,"You must specify a subcommand for DEBUG. Try DEBUG HELP for info.");
+ return;
+ }
+
+ if (!strcasecmp(c->argv[1]->ptr,"help")) {
+ void *blenp = addDeferredMultiBulkLength(c);
+ int blen = 0;
+ blen++; addReplyStatus(c,
+ "DEBUG <subcommand> arg arg ... arg. Subcommands:");
+ blen++; addReplyStatus(c,
+ "segfault -- Crash the server with sigsegv.");
+ blen++; addReplyStatus(c,
+ "panic -- Crash the server simulating a panic.");
+ blen++; addReplyStatus(c,
+ "restart -- Graceful restart: save config, db, restart.");
+ blen++; addReplyStatus(c,
+ "crash-and-recovery <milliseconds> -- Hard crash and restart after <milliseconds> delay.");
+ blen++; addReplyStatus(c,
+ "assert -- Crash by assertion failed.");
+ blen++; addReplyStatus(c,
+ "reload -- Save the RDB on disk and reload it back in memory.");
+ blen++; addReplyStatus(c,
+ "loadaof -- Flush the AOF buffers on disk and reload the AOF in memory.");
+ blen++; addReplyStatus(c,
+ "object <key> -- Show low level info about key and associated value.");
+ blen++; addReplyStatus(c,
+ "sdslen <key> -- Show low level SDS string info representing key and value.");
+ blen++; addReplyStatus(c,
+ "ziplist <key> -- Show low level info about the ziplist encoding.");
+ blen++; addReplyStatus(c,
+ "populate <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.");
+ blen++; addReplyStatus(c,
+ "digest -- Outputs an hex signature representing the current DB content.");
+ blen++; addReplyStatus(c,
+ "sleep <seconds> -- Stop the server for <seconds>. Decimals allowed.");
+ blen++; addReplyStatus(c,
+ "set-active-expire (0|1) -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.");
+ blen++; addReplyStatus(c,
+ "lua-always-replicate-commands (0|1) -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.");
+ blen++; addReplyStatus(c,
+ "error <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.");
+ blen++; addReplyStatus(c,
+ "structsize -- Return the size of different Redis core C structures.");
+ blen++; addReplyStatus(c,
+ "htstats <dbid> -- Return hash table statistics of the specified Redis database.");
+ setDeferredMultiBulkLength(c,blenp,blen);
+ } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
*((char*)-1) = 'x';
+ } else if (!strcasecmp(c->argv[1]->ptr,"panic")) {
+ serverPanic("DEBUG PANIC called at Unix time %ld", time(NULL));
+ } else if (!strcasecmp(c->argv[1]->ptr,"restart") ||
+ !strcasecmp(c->argv[1]->ptr,"crash-and-recover"))
+ {
+ long long delay = 0;
+ if (c->argc >= 3) {
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &delay, NULL)
+ != C_OK) return;
+ if (delay < 0) delay = 0;
+ }
+ int flags = !strcasecmp(c->argv[1]->ptr,"restart") ?
+ (RESTART_SERVER_GRACEFULLY|RESTART_SERVER_CONFIG_REWRITE) :
+ RESTART_SERVER_NONE;
+ restartServer(flags,delay);
+ addReplyError(c,"failed to restart the server. Check server logs.");
} else if (!strcasecmp(c->argv[1]->ptr,"oom")) {
void *ptr = zmalloc(ULONG_MAX); /* Should trigger an out of memory. */
zfree(ptr);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"assert")) {
if (c->argc >= 3) c->argv[2] = tryObjectEncoding(c->argv[2]);
- redisAssertWithInfo(c,c->argv[0],1 == 2);
+ serverAssertWithInfo(c,c->argv[0],1 == 2);
} else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
- if (rdbSave(server.rdb_filename) != REDIS_OK) {
+ if (rdbSave(server.rdb_filename,NULL) != C_OK) {
addReply(c,shared.err);
return;
}
- emptyDb(NULL);
- if (rdbLoad(server.rdb_filename) != REDIS_OK) {
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
+ if (rdbLoad(server.rdb_filename,NULL) != C_OK) {
addReplyError(c,"Error trying to load the RDB dump");
return;
}
- redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
+ serverLog(LL_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
- emptyDb(NULL);
- if (loadAppendOnlyFile(server.aof_filename) != REDIS_OK) {
+ if (server.aof_state == AOF_ON) flushAppendOnlyFile(1);
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
+ if (loadAppendOnlyFile(server.aof_filename) != C_OK) {
addReply(c,shared.err);
return;
}
server.dirty = 0; /* Prevent AOF / replication */
- redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
+ serverLog(LL_WARNING,"Append Only File loaded by DEBUG LOADAOF");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
dictEntry *de;
@@ -295,13 +368,46 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
strenc = strEncoding(val->encoding);
+ char extra[128] = {0};
+ if (val->encoding == OBJ_ENCODING_QUICKLIST) {
+ char *nextra = extra;
+ int remaining = sizeof(extra);
+ quicklist *ql = val->ptr;
+ /* Add number of quicklist nodes */
+ int used = snprintf(nextra, remaining, " ql_nodes:%u", ql->len);
+ nextra += used;
+ remaining -= used;
+ /* Add average quicklist fill factor */
+ double avg = (double)ql->count/ql->len;
+ used = snprintf(nextra, remaining, " ql_avg_node:%.2f", avg);
+ nextra += used;
+ remaining -= used;
+ /* Add quicklist fill level / max ziplist size */
+ used = snprintf(nextra, remaining, " ql_ziplist_max:%d", ql->fill);
+ nextra += used;
+ remaining -= used;
+ /* Add isCompressed? */
+ int compressed = ql->compress != 0;
+ used = snprintf(nextra, remaining, " ql_compressed:%d", compressed);
+ nextra += used;
+ remaining -= used;
+ /* Add total uncompressed size */
+ unsigned long sz = 0;
+ for (quicklistNode *node = ql->head; node; node = node->next) {
+ sz += node->sz;
+ }
+ used = snprintf(nextra, remaining, " ql_uncompressed_size:%lu", sz);
+ nextra += used;
+ remaining -= used;
+ }
+
addReplyStatusFormat(c,
"Value at:%p refcount:%d "
- "encoding:%s serializedlength:%lld "
- "lru:%d lru_seconds_idle:%llu",
+ "encoding:%s serializedlength:%zu "
+ "lru:%d lru_seconds_idle:%llu%s",
(void*)val, val->refcount,
- strenc, (long long) rdbSavedObjectLen(val),
- val->lru, estimateObjectIdleTime(val));
+ strenc, rdbSavedObjectLen(val),
+ val->lru, estimateObjectIdleTime(val)/1000, extra);
} else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
dictEntry *de;
robj *val;
@@ -314,35 +420,62 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
key = dictGetKey(de);
- if (val->type != REDIS_STRING || !sdsEncodedObject(val)) {
+ if (val->type != OBJ_STRING || !sdsEncodedObject(val)) {
addReplyError(c,"Not an sds encoded string.");
} else {
addReplyStatusFormat(c,
- "key_sds_len:%lld, key_sds_avail:%lld, "
- "val_sds_len:%lld, val_sds_avail:%lld",
+ "key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, "
+ "val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld",
(long long) sdslen(key),
(long long) sdsavail(key),
+ (long long) sdsZmallocSize(key),
(long long) sdslen(val->ptr),
- (long long) sdsavail(val->ptr));
+ (long long) sdsavail(val->ptr),
+ (long long) getStringObjectSdsUsedMemory(val));
}
- } else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) {
+ } else if (!strcasecmp(c->argv[1]->ptr,"ziplist") && c->argc == 3) {
+ robj *o;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ if (o->encoding != OBJ_ENCODING_ZIPLIST) {
+ addReplyError(c,"Not an sds encoded string.");
+ } else {
+ ziplistRepr(o->ptr);
+ addReplyStatus(c,"Ziplist structure printed on stdout");
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"populate") &&
+ c->argc >= 3 && c->argc <= 5) {
long keys, j;
robj *key, *val;
char buf[128];
- if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != C_OK)
return;
dictExpand(c->db->dict,keys);
for (j = 0; j < keys; j++) {
- snprintf(buf,sizeof(buf),"key:%lu",j);
+ long valsize = 0;
+ snprintf(buf,sizeof(buf),"%s:%lu",
+ (c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j);
key = createStringObject(buf,strlen(buf));
- if (lookupKeyRead(c->db,key) != NULL) {
+ if (c->argc == 5)
+ if (getLongFromObjectOrReply(c, c->argv[4], &valsize, NULL) != C_OK)
+ return;
+ if (lookupKeyWrite(c->db,key) != NULL) {
decrRefCount(key);
continue;
}
snprintf(buf,sizeof(buf),"value:%lu",j);
- val = createStringObject(buf,strlen(buf));
+ if (valsize==0)
+ val = createStringObject(buf,strlen(buf));
+ else {
+ int buflen = strlen(buf);
+ val = createStringObject(NULL,valsize);
+ memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen);
+ }
dbAdd(c->db,key,val);
+ signalModifiedKey(c->db,key);
decrRefCount(key);
}
addReply(c,shared.ok);
@@ -370,6 +503,11 @@ void debugCommand(redisClient *c) {
{
server.active_expire_enabled = atoi(c->argv[2]->ptr);
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"lua-always-replicate-commands") &&
+ c->argc == 3)
+ {
+ server.lua_always_replicate_commands = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"error") && c->argc == 3) {
sds errstr = sdsnewlen("-",1);
@@ -377,6 +515,38 @@ void debugCommand(redisClient *c) {
errstr = sdsmapchars(errstr,"\n\r"," ",2); /* no newlines in errors. */
errstr = sdscatlen(errstr,"\r\n",2);
addReplySds(c,errstr);
+ } else if (!strcasecmp(c->argv[1]->ptr,"structsize") && c->argc == 2) {
+ sds sizes = sdsempty();
+ sizes = sdscatprintf(sizes,"bits:%d ",(sizeof(void*) == 8)?64:32);
+ sizes = sdscatprintf(sizes,"robj:%d ",(int)sizeof(robj));
+ sizes = sdscatprintf(sizes,"dictentry:%d ",(int)sizeof(dictEntry));
+ sizes = sdscatprintf(sizes,"sdshdr5:%d ",(int)sizeof(struct sdshdr5));
+ sizes = sdscatprintf(sizes,"sdshdr8:%d ",(int)sizeof(struct sdshdr8));
+ sizes = sdscatprintf(sizes,"sdshdr16:%d ",(int)sizeof(struct sdshdr16));
+ sizes = sdscatprintf(sizes,"sdshdr32:%d ",(int)sizeof(struct sdshdr32));
+ sizes = sdscatprintf(sizes,"sdshdr64:%d ",(int)sizeof(struct sdshdr64));
+ addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats") && c->argc == 3) {
+ long dbid;
+ sds stats = sdsempty();
+ char buf[4096];
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &dbid, NULL) != C_OK)
+ return;
+ if (dbid < 0 || dbid >= server.dbnum) {
+ addReplyError(c,"Out of range database");
+ return;
+ }
+
+ stats = sdscatprintf(stats,"[Dictionary HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].dict);
+ stats = sdscat(stats,buf);
+
+ stats = sdscatprintf(stats,"[Expires HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].expires);
+ stats = sdscat(stats,buf);
+
+ addReplyBulkSds(c,stats);
} else {
addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'",
(char*)c->argv[1]->ptr);
@@ -385,95 +555,101 @@ void debugCommand(redisClient *c) {
/* =========================== Crash handling ============================== */
-void _redisAssert(char *estr, char *file, int line) {
+void _serverAssert(const char *estr, const char *file, int line) {
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
- redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED ===");
+ serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
#ifdef HAVE_BACKTRACE
server.assert_failed = estr;
server.assert_file = file;
server.assert_line = line;
- redisLog(REDIS_WARNING,"(forcing SIGSEGV to print the bug report.)");
+ serverLog(LL_WARNING,"(forcing SIGSEGV to print the bug report.)");
#endif
*((char*)-1) = 'x';
}
-void _redisAssertPrintClientInfo(redisClient *c) {
+void _serverAssertPrintClientInfo(const client *c) {
int j;
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
- redisLog(REDIS_WARNING,"client->flags = %d", c->flags);
- redisLog(REDIS_WARNING,"client->fd = %d", c->fd);
- redisLog(REDIS_WARNING,"client->argc = %d", c->argc);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
+ serverLog(LL_WARNING,"client->flags = %d", c->flags);
+ serverLog(LL_WARNING,"client->fd = %d", c->fd);
+ serverLog(LL_WARNING,"client->argc = %d", c->argc);
for (j=0; j < c->argc; j++) {
char buf[128];
char *arg;
- if (c->argv[j]->type == REDIS_STRING && sdsEncodedObject(c->argv[j])) {
+ if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) {
arg = (char*) c->argv[j]->ptr;
} else {
- snprintf(buf,sizeof(buf),"Object type: %d, encoding: %d",
+ snprintf(buf,sizeof(buf),"Object type: %u, encoding: %u",
c->argv[j]->type, c->argv[j]->encoding);
arg = buf;
}
- redisLog(REDIS_WARNING,"client->argv[%d] = \"%s\" (refcount: %d)",
+ serverLog(LL_WARNING,"client->argv[%d] = \"%s\" (refcount: %d)",
j, arg, c->argv[j]->refcount);
}
}
-void redisLogObjectDebugInfo(robj *o) {
- redisLog(REDIS_WARNING,"Object type: %d", o->type);
- redisLog(REDIS_WARNING,"Object encoding: %d", o->encoding);
- redisLog(REDIS_WARNING,"Object refcount: %d", o->refcount);
- if (o->type == REDIS_STRING && sdsEncodedObject(o)) {
- redisLog(REDIS_WARNING,"Object raw string len: %zu", sdslen(o->ptr));
+void serverLogObjectDebugInfo(const robj *o) {
+ serverLog(LL_WARNING,"Object type: %d", o->type);
+ serverLog(LL_WARNING,"Object encoding: %d", o->encoding);
+ serverLog(LL_WARNING,"Object refcount: %d", o->refcount);
+ if (o->type == OBJ_STRING && sdsEncodedObject(o)) {
+ serverLog(LL_WARNING,"Object raw string len: %zu", sdslen(o->ptr));
if (sdslen(o->ptr) < 4096) {
sds repr = sdscatrepr(sdsempty(),o->ptr,sdslen(o->ptr));
- redisLog(REDIS_WARNING,"Object raw string content: %s", repr);
+ serverLog(LL_WARNING,"Object raw string content: %s", repr);
sdsfree(repr);
}
- } else if (o->type == REDIS_LIST) {
- redisLog(REDIS_WARNING,"List length: %d", (int) listTypeLength(o));
- } else if (o->type == REDIS_SET) {
- redisLog(REDIS_WARNING,"Set size: %d", (int) setTypeSize(o));
- } else if (o->type == REDIS_HASH) {
- redisLog(REDIS_WARNING,"Hash size: %d", (int) hashTypeLength(o));
- } else if (o->type == REDIS_ZSET) {
- redisLog(REDIS_WARNING,"Sorted set size: %d", (int) zsetLength(o));
- if (o->encoding == REDIS_ENCODING_SKIPLIST)
- redisLog(REDIS_WARNING,"Skiplist level: %d", (int) ((zset*)o->ptr)->zsl->level);
+ } else if (o->type == OBJ_LIST) {
+ serverLog(LL_WARNING,"List length: %d", (int) listTypeLength(o));
+ } else if (o->type == OBJ_SET) {
+ serverLog(LL_WARNING,"Set size: %d", (int) setTypeSize(o));
+ } else if (o->type == OBJ_HASH) {
+ serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o));
+ } else if (o->type == OBJ_ZSET) {
+ serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o));
+ if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level);
}
}
-void _redisAssertPrintObject(robj *o) {
+void _serverAssertPrintObject(const robj *o) {
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ===");
- redisLogObjectDebugInfo(o);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ===");
+ serverLogObjectDebugInfo(o);
}
-void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line) {
- if (c) _redisAssertPrintClientInfo(c);
- if (o) _redisAssertPrintObject(o);
- _redisAssert(estr,file,line);
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line) {
+ if (c) _serverAssertPrintClientInfo(c);
+ if (o) _serverAssertPrintObject(o);
+ _serverAssert(estr,file,line);
}
-void _redisPanic(char *msg, char *file, int line) {
+void _serverPanic(const char *file, int line, const char *msg, ...) {
+ va_list ap;
+ va_start(ap,msg);
+ char fmtmsg[256];
+ vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap);
+ va_end(ap);
+
bugReportStart();
- redisLog(REDIS_WARNING,"------------------------------------------------");
- redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
- redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
+ serverLog(LL_WARNING,"------------------------------------------------");
+ serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue");
+ serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line);
#ifdef HAVE_BACKTRACE
- redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
+ serverLog(LL_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
#endif
- redisLog(REDIS_WARNING,"------------------------------------------------");
+ serverLog(LL_WARNING,"------------------------------------------------");
*((char*)-1) = 'x';
}
void bugReportStart(void) {
if (server.bug_report_start == 0) {
- redisLog(REDIS_WARNING,
- "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n");
server.bug_report_start = 1;
}
}
@@ -504,6 +680,8 @@ static void *getMcontextEip(ucontext_t *uc) {
return (void*) uc->uc_mcontext.gregs[16]; /* Linux 64 */
#elif defined(__ia64__) /* Linux IA64 */
return (void*) uc->uc_mcontext.sc_ip;
+ #elif defined(__arm__) /* Linux ARM */
+ return (void*) uc->uc_mcontext.arm_pc;
#endif
#else
return NULL;
@@ -517,20 +695,20 @@ void logStackContent(void **sp) {
unsigned long val = (unsigned long) sp[i];
if (sizeof(long) == 4)
- redisLog(REDIS_WARNING, "(%08lx) -> %08lx", addr, val);
+ serverLog(LL_WARNING, "(%08lx) -> %08lx", addr, val);
else
- redisLog(REDIS_WARNING, "(%016lx) -> %016lx", addr, val);
+ serverLog(LL_WARNING, "(%016lx) -> %016lx", addr, val);
}
}
void logRegisters(ucontext_t *uc) {
- redisLog(REDIS_WARNING, "--- REGISTERS");
+ serverLog(LL_WARNING|LL_RAW, "\n------ REGISTERS ------\n");
/* OSX */
#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
/* OSX AMD64 */
#if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
"RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
@@ -562,7 +740,7 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext->__ss.__rsp);
#else
/* OSX x86 */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
"EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
@@ -591,7 +769,7 @@ void logRegisters(ucontext_t *uc) {
#elif defined(__linux__)
/* Linux x86 */
#if defined(__i386__)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
"EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
@@ -617,7 +795,7 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext.gregs[7]);
#elif defined(__X86_64__) || defined(__x86_64__)
/* Linux AMD64 */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
"RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
@@ -647,36 +825,56 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext.gregs[15]);
#endif
#else
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
" Dumping of registers not supported for this OS/arch");
#endif
}
+/* Return a file descriptor to write directly to the Redis log with the
+ * write(2) syscall, that can be used in critical sections of the code
+ * where the rest of Redis can't be trusted (for example during the memory
+ * test) or when an API call requires a raw fd.
+ *
+ * Close it with closeDirectLogFiledes(). */
+int openDirectLogFiledes(void) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ int fd = log_to_stdout ?
+ STDOUT_FILENO :
+ open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
+ return fd;
+}
+
+/* Used to close what closeDirectLogFiledes() returns. */
+void closeDirectLogFiledes(int fd) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ if (!log_to_stdout) close(fd);
+}
+
/* Logs the stack trace using the backtrace() call. This function is designed
* to be called from signal handlers safely. */
void logStackTrace(ucontext_t *uc) {
- void *trace[100];
- int trace_size = 0, fd;
- int log_to_stdout = server.logfile[0] == '\0';
+ void *trace[101];
+ int trace_size = 0, fd = openDirectLogFiledes();
- /* Open the log file in append mode. */
- fd = log_to_stdout ?
- STDOUT_FILENO :
- open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
- if (fd == -1) return;
+ if (fd == -1) return; /* If we can't log there is anything to do. */
/* Generate the stack trace */
- trace_size = backtrace(trace, 100);
-
- /* overwrite sigaction with caller's address */
- if (getMcontextEip(uc) != NULL)
- trace[1] = getMcontextEip(uc);
+ trace_size = backtrace(trace+1, 100);
+
+ if (getMcontextEip(uc) != NULL) {
+ char *msg1 = "EIP:\n";
+ char *msg2 = "\nBacktrace:\n";
+ if (write(fd,msg1,strlen(msg1)) == -1) {/* Avoid warning. */};
+ trace[0] = getMcontextEip(uc);
+ backtrace_symbols_fd(trace, 1, fd);
+ if (write(fd,msg2,strlen(msg2)) == -1) {/* Avoid warning. */};
+ }
/* Write symbols to log file */
- backtrace_symbols_fd(trace, trace_size, fd);
+ backtrace_symbols_fd(trace+1, trace_size, fd);
/* Cleanup */
- if (!log_to_stdout) close(fd);
+ closeDirectLogFiledes(fd);
}
/* Log information about the "current" client, that is, the client that is
@@ -685,19 +883,20 @@ void logStackTrace(ucontext_t *uc) {
void logCurrentClient(void) {
if (server.current_client == NULL) return;
- redisClient *cc = server.current_client;
+ client *cc = server.current_client;
sds client;
int j;
- redisLog(REDIS_WARNING, "--- CURRENT CLIENT INFO");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CURRENT CLIENT INFO ------\n");
client = catClientInfoString(sdsempty(),cc);
- redisLog(REDIS_WARNING,"client: %s", client);
+ serverLog(LL_WARNING|LL_RAW,"%s\n", client);
sdsfree(client);
for (j = 0; j < cc->argc; j++) {
robj *decoded;
decoded = getDecodedObject(cc->argv[j]);
- redisLog(REDIS_WARNING,"argv[%d]: '%s'", j, (char*)decoded->ptr);
+ serverLog(LL_WARNING|LL_RAW,"argv[%d]: '%s'\n", j,
+ (char*)decoded->ptr);
decrRefCount(decoded);
}
/* Check if the first argument, usually a key, is found inside the
@@ -710,27 +909,32 @@ void logCurrentClient(void) {
de = dictFind(cc->db->dict, key->ptr);
if (de) {
val = dictGetVal(de);
- redisLog(REDIS_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
- redisLogObjectDebugInfo(val);
+ serverLog(LL_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
+ serverLogObjectDebugInfo(val);
}
decrRefCount(key);
}
}
#if defined(HAVE_PROC_MAPS)
-void memtest_non_destructive_invert(void *addr, size_t size);
-void memtest_non_destructive_swap(void *addr, size_t size);
+
#define MEMTEST_MAX_REGIONS 128
+/* A non destructive memory test executed during segfauls. */
int memtest_test_linux_anonymous_maps(void) {
- FILE *fp = fopen("/proc/self/maps","r");
+ FILE *fp;
char line[1024];
+ char logbuf[1024];
size_t start_addr, end_addr, size;
size_t start_vect[MEMTEST_MAX_REGIONS];
size_t size_vect[MEMTEST_MAX_REGIONS];
int regions = 0, j;
- uint64_t crc1 = 0, crc2 = 0, crc3 = 0;
+ int fd = openDirectLogFiledes();
+ if (!fd) return 0;
+
+ fp = fopen("/proc/self/maps","r");
+ if (!fp) return 0;
while(fgets(line,sizeof(line),fp) != NULL) {
char *start, *end, *p = line;
@@ -754,78 +958,90 @@ int memtest_test_linux_anonymous_maps(void) {
start_vect[regions] = start_addr;
size_vect[regions] = size;
- printf("Testing %lx %lu\n", (unsigned long) start_vect[regions],
- (unsigned long) size_vect[regions]);
+ snprintf(logbuf,sizeof(logbuf),
+ "*** Preparing to test memory region %lx (%lu bytes)\n",
+ (unsigned long) start_vect[regions],
+ (unsigned long) size_vect[regions]);
+ if (write(fd,logbuf,strlen(logbuf)) == -1) { /* Nothing to do. */ }
regions++;
}
- /* Test all the regions as an unique sequential region.
- * 1) Take the CRC64 of the memory region. */
+ int errors = 0;
for (j = 0; j < regions; j++) {
- crc1 = crc64(crc1,(void*)start_vect[j],size_vect[j]);
+ if (write(fd,".",1) == -1) { /* Nothing to do. */ }
+ errors += memtest_preserving_test((void*)start_vect[j],size_vect[j],1);
+ if (write(fd, errors ? "E" : "O",1) == -1) { /* Nothing to do. */ }
}
-
- /* 2) Invert bits, swap adjacent words, swap again, invert bits.
- * This is the error amplification step. */
- for (j = 0; j < regions; j++)
- memtest_non_destructive_invert((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_invert((void*)start_vect[j],size_vect[j]);
-
- /* 3) Take the CRC64 sum again. */
- for (j = 0; j < regions; j++)
- crc2 = crc64(crc2,(void*)start_vect[j],size_vect[j]);
-
- /* 4) Swap + Swap again */
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
-
- /* 5) Take the CRC64 sum again. */
- for (j = 0; j < regions; j++)
- crc3 = crc64(crc3,(void*)start_vect[j],size_vect[j]);
+ if (write(fd,"\n",1) == -1) { /* Nothing to do. */ }
/* NOTE: It is very important to close the file descriptor only now
* because closing it before may result into unmapping of some memory
* region that we are testing. */
fclose(fp);
-
- /* If the two CRC are not the same, we trapped a memory error. */
- return crc1 != crc2 || crc2 != crc3;
+ closeDirectLogFiledes(fd);
+ return errors;
}
#endif
+/* Scans the (assumed) x86 code starting at addr, for a max of `len`
+ * bytes, searching for E8 (callq) opcodes, and dumping the symbols
+ * and the call offset if they appear to be valid. */
+void dumpX86Calls(void *addr, size_t len) {
+ size_t j;
+ unsigned char *p = addr;
+ Dl_info info;
+ /* Hash table to best-effort avoid printing the same symbol
+ * multiple times. */
+ unsigned long ht[256] = {0};
+
+ if (len < 5) return;
+ for (j = 0; j < len-4; j++) {
+ if (p[j] != 0xE8) continue; /* Not an E8 CALL opcode. */
+ unsigned long target = (unsigned long)addr+j+5;
+ target += *((int32_t*)(p+j+1));
+ if (dladdr((void*)target, &info) != 0 && info.dli_sname != NULL) {
+ if (ht[target&0xff] != target) {
+ printf("Function at 0x%lx is %s\n",target,info.dli_sname);
+ ht[target&0xff] = target;
+ }
+ j += 4; /* Skip the 32 bit immediate. */
+ }
+ }
+}
+
void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
ucontext_t *uc = (ucontext_t*) secret;
+ void *eip = getMcontextEip(uc);
sds infostring, clients;
struct sigaction act;
- REDIS_NOTUSED(info);
+ UNUSED(info);
bugReportStart();
- redisLog(REDIS_WARNING,
- " Redis %s crashed by signal: %d", REDIS_VERSION, sig);
- redisLog(REDIS_WARNING,
- " Failed assertion: %s (%s:%d)", server.assert_failed,
+ serverLog(LL_WARNING,
+ "Redis %s crashed by signal: %d", REDIS_VERSION, sig);
+ if (eip != NULL) {
+ serverLog(LL_WARNING,
+ "Crashed running the instuction at: %p", eip);
+ }
+ if (sig == SIGSEGV || sig == SIGBUS) {
+ serverLog(LL_WARNING,
+ "Accessing address: %p", (void*)info->si_addr);
+ }
+ serverLog(LL_WARNING,
+ "Failed assertion: %s (%s:%d)", server.assert_failed,
server.assert_file, server.assert_line);
/* Log the stack trace */
- redisLog(REDIS_WARNING, "--- STACK TRACE");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ STACK TRACE ------\n");
logStackTrace(uc);
/* Log INFO and CLIENT LIST */
- redisLog(REDIS_WARNING, "--- INFO OUTPUT");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n");
infostring = genRedisInfoString("all");
- infostring = sdscatprintf(infostring, "hash_init_value: %u\n",
- dictGetHashFunctionSeed());
- redisLogRaw(REDIS_WARNING, infostring);
- redisLog(REDIS_WARNING, "--- CLIENT LIST OUTPUT");
+ serverLogRaw(LL_WARNING|LL_RAW, infostring);
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
clients = getAllClientsInfoString();
- redisLogRaw(REDIS_WARNING, clients);
+ serverLogRaw(LL_WARNING|LL_RAW, clients);
sdsfree(infostring);
sdsfree(clients);
@@ -837,25 +1053,55 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
#if defined(HAVE_PROC_MAPS)
/* Test memory */
- redisLog(REDIS_WARNING, "--- FAST MEMORY TEST");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n");
bioKillThreads();
if (memtest_test_linux_anonymous_maps()) {
- redisLog(REDIS_WARNING,
- "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n");
} else {
- redisLog(REDIS_WARNING,
- "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n");
}
#endif
- redisLog(REDIS_WARNING,
+ if (eip != NULL) {
+ Dl_info info;
+ if (dladdr(eip, &info) != 0) {
+ serverLog(LL_WARNING|LL_RAW,
+ "\n------ DUMPING CODE AROUND EIP ------\n"
+ "Symbol: %s (base: %p)\n"
+ "Module: %s (base %p)\n"
+ "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n"
+ "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n"
+ "------\n",
+ info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase,
+ info.dli_saddr);
+ size_t len = (long)eip - (long)info.dli_saddr;
+ unsigned long sz = sysconf(_SC_PAGESIZE);
+ if (len < 1<<13) { /* we don't have functions over 8k (verified) */
+ /* Find the address of the next page, which is our "safety"
+ * limit when dumping. Then try to dump just 128 bytes more
+ * than EIP if there is room, or stop sooner. */
+ unsigned long next = ((unsigned long)eip + sz) & ~(sz-1);
+ unsigned long end = (unsigned long)eip + 128;
+ if (end > next) end = next;
+ len = end - (unsigned long)info.dli_saddr;
+ serverLogHexDump(LL_WARNING, "dump of function",
+ info.dli_saddr ,len);
+ dumpX86Calls(info.dli_saddr,len);
+ }
+ }
+ }
+
+ serverLogRaw(LL_WARNING|LL_RAW,
"\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n"
-" Please report the crash opening an issue on github:\n\n"
+" Please report the crash by opening an issue on github:\n\n"
" http://github.com/antirez/redis/issues\n\n"
" Suspect RAM error? Use redis-server --test-memory to verify it.\n\n"
);
+
/* free(messages); Don't call free() with possibly corrupted memory. */
- if (server.daemonize) unlink(server.pidfile);
+ if (server.daemonize && server.supervised == 0) unlink(server.pidfile);
/* Make sure we exit with the right signal at the end. So for instance
* the core will be dumped if enabled. */
@@ -869,12 +1115,12 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
/* ==================== Logging functions for debugging ===================== */
-void redisLogHexDump(int level, char *descr, void *value, size_t len) {
+void serverLogHexDump(int level, char *descr, void *value, size_t len) {
char buf[65], *b;
unsigned char *v = value;
char charset[] = "0123456789abcdef";
- redisLog(level,"%s (hexdump):", descr);
+ serverLog(level,"%s (hexdump of %zu bytes):", descr, len);
b = buf;
while(len) {
b[0] = charset[(*v)>>4];
@@ -884,11 +1130,11 @@ void redisLogHexDump(int level, char *descr, void *value, size_t len) {
len--;
v++;
if (b-buf == 64 || len == 0) {
- redisLogRaw(level|REDIS_LOG_RAW,buf);
+ serverLogRaw(level|LL_RAW,buf);
b = buf;
}
}
- redisLogRaw(level|REDIS_LOG_RAW,"\n");
+ serverLogRaw(level|LL_RAW,"\n");
}
/* =========================== Software Watchdog ============================ */
@@ -898,16 +1144,16 @@ void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) {
#ifdef HAVE_BACKTRACE
ucontext_t *uc = (ucontext_t*) secret;
#endif
- REDIS_NOTUSED(info);
- REDIS_NOTUSED(sig);
+ UNUSED(info);
+ UNUSED(sig);
- redisLogFromHandler(REDIS_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
+ serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
#ifdef HAVE_BACKTRACE
logStackTrace(uc);
#else
- redisLogFromHandler(REDIS_WARNING,"Sorry: no support for backtrace().");
+ serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace().");
#endif
- redisLogFromHandler(REDIS_WARNING,"--------\n");
+ serverLogFromHandler(LL_WARNING,"--------\n");
}
/* Schedule a SIGALRM delivery after the specified period in milliseconds.
diff --git a/src/debugmacro.h b/src/debugmacro.h
new file mode 100644
index 000000000..ded2d2667
--- /dev/null
+++ b/src/debugmacro.h
@@ -0,0 +1,41 @@
+/* This file contains debugging macros to be used when investigating issues.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#define D(...) \
+ do { \
+ FILE *fp = fopen("/tmp/log.txt","a"); \
+ fprintf(fp,"%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ fprintf(fp,__VA_ARGS__); \
+ fprintf(fp,"\n"); \
+ fclose(fp); \
+ } while (0);
diff --git a/src/defrag.c b/src/defrag.c
new file mode 100644
index 000000000..4a1dcefe4
--- /dev/null
+++ b/src/defrag.c
@@ -0,0 +1,579 @@
+/*
+ * Active memory defragmentation
+ * Try to find key / value allocations that need to be re-allocated in order
+ * to reduce external fragmentation.
+ * We do that by scanning the keyspace and for each pointer we have, we can try to
+ * ask the allocator if moving it to a new address will help reduce fragmentation.
+ *
+ * Copyright (c) 2017, Oran Agra
+ * Copyright (c) 2017, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <time.h>
+#include <assert.h>
+#include <stddef.h>
+
+#ifdef HAVE_DEFRAG
+
+/* this method was added to jemalloc in order to help us understand which
+ * pointers are worthwhile moving and which aren't */
+int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util);
+
+/* Defrag helper for generic allocations.
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+void* activeDefragAlloc(void *ptr) {
+ int bin_util, run_util;
+ size_t size;
+ void *newptr;
+ if(!je_get_defrag_hint(ptr, &bin_util, &run_util)) {
+ server.stat_active_defrag_misses++;
+ return NULL;
+ }
+ /* if this run is more utilized than the average utilization in this bin
+ * (or it is full), skip it. This will eventually move all the allocations
+ * from relatively empty runs into relatively full runs. */
+ if (run_util > bin_util || run_util == 1<<16) {
+ server.stat_active_defrag_misses++;
+ return NULL;
+ }
+ /* move this allocation to a new allocation.
+ * make sure not to use the thread cache. so that we don't get back the same
+ * pointers we try to free */
+ size = zmalloc_size(ptr);
+ newptr = zmalloc_no_tcache(size);
+ memcpy(newptr, ptr, size);
+ zfree_no_tcache(ptr);
+ return newptr;
+}
+
+/*Defrag helper for sds strings
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+sds activeDefragSds(sds sdsptr) {
+ void* ptr = sdsAllocPtr(sdsptr);
+ void* newptr = activeDefragAlloc(ptr);
+ if (newptr) {
+ size_t offset = sdsptr - (char*)ptr;
+ sdsptr = (char*)newptr + offset;
+ return sdsptr;
+ }
+ return NULL;
+}
+
+/* Defrag helper for robj and/or string objects
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+robj *activeDefragStringOb(robj* ob, int *defragged) {
+ robj *ret = NULL;
+ if (ob->refcount!=1)
+ return NULL;
+
+ /* try to defrag robj (only if not an EMBSTR type (handled below). */
+ if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) {
+ if ((ret = activeDefragAlloc(ob))) {
+ ob = ret;
+ (*defragged)++;
+ }
+ }
+
+ /* try to defrag string object */
+ if (ob->type == OBJ_STRING) {
+ if(ob->encoding==OBJ_ENCODING_RAW) {
+ sds newsds = activeDefragSds((sds)ob->ptr);
+ if (newsds) {
+ ob->ptr = newsds;
+ (*defragged)++;
+ }
+ } else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
+ /* The sds is embedded in the object allocation, calculate the
+ * offset and update the pointer in the new allocation. */
+ long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
+ if ((ret = activeDefragAlloc(ob))) {
+ ret->ptr = (void*)((intptr_t)ret + ofs);
+ (*defragged)++;
+ }
+ } else if (ob->encoding!=OBJ_ENCODING_INT) {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ return ret;
+}
+
+/* Defrag helper for dictEntries to be used during dict iteration (called on
+ * each step). Teturns a stat of how many pointers were moved. */
+int dictIterDefragEntry(dictIterator *iter) {
+ /* This function is a little bit dirty since it messes with the internals
+ * of the dict and it's iterator, but the benefit is that it is very easy
+ * to use, and require no other chagnes in the dict. */
+ int defragged = 0;
+ dictht *ht;
+ /* Handle the next entry (if there is one), and update the pointer in the
+ * current entry. */
+ if (iter->nextEntry) {
+ dictEntry *newde = activeDefragAlloc(iter->nextEntry);
+ if (newde) {
+ defragged++;
+ iter->nextEntry = newde;
+ iter->entry->next = newde;
+ }
+ }
+ /* handle the case of the first entry in the hash bucket. */
+ ht = &iter->d->ht[iter->table];
+ if (ht->table[iter->index] == iter->entry) {
+ dictEntry *newde = activeDefragAlloc(iter->entry);
+ if (newde) {
+ iter->entry = newde;
+ ht->table[iter->index] = newde;
+ defragged++;
+ }
+ }
+ return defragged;
+}
+
+/* Defrag helper for dict main allocations (dict struct, and hash tables).
+ * receives a pointer to the dict* and implicitly updates it when the dict
+ * struct itself was moved. Returns a stat of how many pointers were moved. */
+int dictDefragTables(dict** dictRef) {
+ dict *d = *dictRef;
+ dictEntry **newtable;
+ int defragged = 0;
+ /* handle the dict struct */
+ dict *newd = activeDefragAlloc(d);
+ if (newd)
+ defragged++, *dictRef = d = newd;
+ /* handle the first hash table */
+ newtable = activeDefragAlloc(d->ht[0].table);
+ if (newtable)
+ defragged++, d->ht[0].table = newtable;
+ /* handle the second hash table */
+ if (d->ht[1].table) {
+ newtable = activeDefragAlloc(d->ht[1].table);
+ if (newtable)
+ defragged++, d->ht[1].table = newtable;
+ }
+ return defragged;
+}
+
+/* Internal function used by zslDefrag */
+void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnode, zskiplistNode **update) {
+ int i;
+ for (i = 0; i < zsl->level; i++) {
+ if (update[i]->level[i].forward == oldnode)
+ update[i]->level[i].forward = newnode;
+ }
+ serverAssert(zsl->header!=oldnode);
+ if (newnode->level[0].forward) {
+ serverAssert(newnode->level[0].forward->backward==oldnode);
+ newnode->level[0].forward->backward = newnode;
+ } else {
+ serverAssert(zsl->tail==oldnode);
+ zsl->tail = newnode;
+ }
+}
+
+/* Defrag helper for sorted set.
+ * Update the robj pointer, defrag the skiplist struct and return the new score
+ * reference. We may not access oldele pointer (not even the pointer stored in
+ * the skiplist), as it was already freed. Newele may be null, in which case we
+ * only need to defrag the skiplist, but not update the obj pointer.
+ * When return value is non-NULL, it is the score reference that must be updated
+ * in the dict record. */
+double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
+ int i;
+ sds ele = newele? newele: oldele;
+
+ /* find the skiplist node referring to the object that was moved,
+ * and all pointers that need to be updated if we'll end up moving the skiplist node. */
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ x->level[i].forward->ele != oldele && /* make sure not to access the
+ ->obj pointer if it matches
+ oldele */
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ x = x->level[i].forward;
+ update[i] = x;
+ }
+
+ /* update the robj pointer inside the skip list record. */
+ x = x->level[0].forward;
+ serverAssert(x && score == x->score && x->ele==oldele);
+ if (newele)
+ x->ele = newele;
+
+ /* try to defrag the skiplist record itself */
+ newx = activeDefragAlloc(x);
+ if (newx) {
+ zslUpdateNode(zsl, x, newx, update);
+ return &newx->score;
+ }
+ return NULL;
+}
+
+/* Utility function that replaces an old key pointer in the dictionary with a
+ * new pointer. Additionally, we try to defrag the dictEntry in that dict.
+ * Oldkey mey be a dead pointer and should not be accessed (we get a
+ * pre-calculated hash value). Newkey may be null if the key pointer wasn't
+ * moved. Return value is the the dictEntry if found, or NULL if not found.
+ * NOTE: this is very ugly code, but it let's us avoid the complication of
+ * doing a scan on another dict. */
+dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, int *defragged) {
+ dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash);
+ if (deref) {
+ dictEntry *de = *deref;
+ dictEntry *newde = activeDefragAlloc(de);
+ if (newde) {
+ de = *deref = newde;
+ (*defragged)++;
+ }
+ if (newkey)
+ de->key = newkey;
+ return de;
+ }
+ return NULL;
+}
+
+/* for each key we scan in the main dict, this function will attempt to defrag
+ * all the various pointers it has. Returns a stat of how many pointers were
+ * moved. */
+int defragKey(redisDb *db, dictEntry *de) {
+ sds keysds = dictGetKey(de);
+ robj *newob, *ob;
+ unsigned char *newzl;
+ dict *d;
+ dictIterator *di;
+ int defragged = 0;
+ sds newsds;
+
+ /* Try to defrag the key name. */
+ newsds = activeDefragSds(keysds);
+ if (newsds)
+ defragged++, de->key = newsds;
+ if (dictSize(db->expires)) {
+ /* Dirty code:
+ * I can't search in db->expires for that key after i already released
+ * the pointer it holds it won't be able to do the string compare */
+ unsigned int hash = dictGetHash(db->dict, de->key);
+ replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged);
+ }
+
+ /* Try to defrag robj and / or string value. */
+ ob = dictGetVal(de);
+ if ((newob = activeDefragStringOb(ob, &defragged))) {
+ de->v.val = newob;
+ ob = newob;
+ }
+
+ if (ob->type == OBJ_STRING) {
+ /* Already handled in activeDefragStringOb. */
+ } else if (ob->type == OBJ_LIST) {
+ if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = ob->ptr, *newql;
+ quicklistNode *node = ql->head, *newnode;
+ if ((newql = activeDefragAlloc(ql)))
+ defragged++, ob->ptr = ql = newql;
+ while (node) {
+ if ((newnode = activeDefragAlloc(node))) {
+ if (newnode->prev)
+ newnode->prev->next = newnode;
+ else
+ ql->head = newnode;
+ if (newnode->next)
+ newnode->next->prev = newnode;
+ else
+ ql->tail = newnode;
+ node = newnode;
+ defragged++;
+ }
+ if ((newzl = activeDefragAlloc(node->zl)))
+ defragged++, node->zl = newzl;
+ node = node->next;
+ }
+ } else if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (ob->type == OBJ_SET) {
+ if (ob->encoding == OBJ_ENCODING_HT) {
+ d = ob->ptr;
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL) {
+ sds sdsele = dictGetKey(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ defragged++, de->key = newsds;
+ defragged += dictIterDefragEntry(di);
+ }
+ dictReleaseIterator(di);
+ dictDefragTables((dict**)&ob->ptr);
+ } else if (ob->encoding == OBJ_ENCODING_INTSET) {
+ intset *is = ob->ptr;
+ intset *newis = activeDefragAlloc(is);
+ if (newis)
+ defragged++, ob->ptr = newis;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (ob->type == OBJ_ZSET) {
+ if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = (zset*)ob->ptr;
+ zset *newzs;
+ zskiplist *newzsl;
+ struct zskiplistNode *newheader;
+ if ((newzs = activeDefragAlloc(zs)))
+ defragged++, ob->ptr = zs = newzs;
+ if ((newzsl = activeDefragAlloc(zs->zsl)))
+ defragged++, zs->zsl = newzsl;
+ if ((newheader = activeDefragAlloc(zs->zsl->header)))
+ defragged++, zs->zsl->header = newheader;
+ d = zs->dict;
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL) {
+ double* newscore;
+ sds sdsele = dictGetKey(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ defragged++, de->key = newsds;
+ newscore = zslDefrag(zs->zsl, *(double*)dictGetVal(de), sdsele, newsds);
+ if (newscore) {
+ dictSetVal(d, de, newscore);
+ defragged++;
+ }
+ defragged += dictIterDefragEntry(di);
+ }
+ dictReleaseIterator(di);
+ dictDefragTables(&zs->dict);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (ob->type == OBJ_HASH) {
+ if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_HT) {
+ d = ob->ptr;
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL) {
+ sds sdsele = dictGetKey(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ defragged++, de->key = newsds;
+ sdsele = dictGetVal(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ defragged++, de->v.val = newsds;
+ defragged += dictIterDefragEntry(di);
+ }
+ dictReleaseIterator(di);
+ dictDefragTables((dict**)&ob->ptr);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (ob->type == OBJ_MODULE) {
+ /* Currently defragmenting modules private data types
+ * is not supported. */
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return defragged;
+}
+
+/* Defrag scan callback for the main db dictionary. */
+void defragScanCallback(void *privdata, const dictEntry *de) {
+ int defragged = defragKey((redisDb*)privdata, (dictEntry*)de);
+ server.stat_active_defrag_hits += defragged;
+ if(defragged)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+}
+
+/* Defrag scan callback for for each hash table bicket,
+ * used in order to defrag the dictEntry allocations. */
+void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
+ UNUSED(privdata);
+ while(*bucketref) {
+ dictEntry *de = *bucketref, *newde;
+ if ((newde = activeDefragAlloc(de))) {
+ *bucketref = newde;
+ }
+ bucketref = &(*bucketref)->next;
+ }
+}
+
+/* Utility function to get the fragmentation ratio from jemalloc.
+ * It is critical to do that by comparing only heap maps that belown to
+ * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
+ * fragmentation ratio in order to decide if a defrag action should be taken
+ * or not, a false detection can cause the defragmenter to waste a lot of CPU
+ * without the possibility of getting any results. */
+float getAllocatorFragmentation(size_t *out_frag_bytes) {
+ size_t epoch = 1, allocated = 0, resident = 0, active = 0, sz = sizeof(size_t);
+ /* Update the statistics cached by mallctl. */
+ je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+ /* Unlike RSS, this does not include RSS from shared libraries and other non
+ * heap mappings. */
+ je_mallctl("stats.resident", &resident, &sz, NULL, 0);
+ /* Unlike resident, this doesn't not include the pages jemalloc reserves
+ * for re-use (purge will clean that). */
+ je_mallctl("stats.active", &active, &sz, NULL, 0);
+ /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
+ * into account all allocations done by this process (not only zmalloc). */
+ je_mallctl("stats.allocated", &allocated, &sz, NULL, 0);
+ float frag_pct = ((float)active / allocated)*100 - 100;
+ size_t frag_bytes = active - allocated;
+ float rss_pct = ((float)resident / allocated)*100 - 100;
+ size_t rss_bytes = resident - allocated;
+ if(out_frag_bytes)
+ *out_frag_bytes = frag_bytes;
+ serverLog(LL_DEBUG,
+ "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu%% rss)",
+ allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes);
+ return frag_pct;
+}
+
+#define INTERPOLATE(x, x1, x2, y1, y2) ( (y1) + ((x)-(x1)) * ((y2)-(y1)) / ((x2)-(x1)) )
+#define LIMIT(y, min, max) ((y)<(min)? min: ((y)>(max)? max: (y)))
+
+/* Perform incremental defragmentation work from the serverCron.
+ * This works in a similar way to activeExpireCycle, in the sense that
+ * we do incremental work across calls. */
+void activeDefragCycle(void) {
+ static int current_db = -1;
+ static unsigned long cursor = 0;
+ static redisDb *db = NULL;
+ static long long start_scan, start_stat;
+ unsigned int iterations = 0;
+ unsigned long long defragged = server.stat_active_defrag_hits;
+ long long start, timelimit;
+
+ if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1)
+ return; /* Defragging memory while there's a fork will just do damage. */
+
+ /* Once a second, check if we the fragmentation justfies starting a scan
+ * or making it more aggressive. */
+ run_with_period(1000) {
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ /* If we're not already running, and below the threshold, exit. */
+ if (!server.active_defrag_running) {
+ if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes)
+ return;
+ }
+
+ /* Calculate the adaptive aggressiveness of the defrag */
+ int cpu_pct = INTERPOLATE(frag_pct,
+ server.active_defrag_threshold_lower,
+ server.active_defrag_threshold_upper,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ cpu_pct = LIMIT(cpu_pct,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ /* We allow increasing the aggressiveness during a scan, but don't
+ * reduce it. */
+ if (!server.active_defrag_running ||
+ cpu_pct > server.active_defrag_running)
+ {
+ server.active_defrag_running = cpu_pct;
+ serverLog(LL_VERBOSE,
+ "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
+ frag_pct, frag_bytes, cpu_pct);
+ }
+ }
+ if (!server.active_defrag_running)
+ return;
+
+ /* See activeExpireCycle for how timelimit is handled. */
+ start = ustime();
+ timelimit = 1000000*server.active_defrag_running/server.hz/100;
+ if (timelimit <= 0) timelimit = 1;
+
+ do {
+ if (!cursor) {
+ /* Move on to next database, and stop if we reached the last one. */
+ if (++current_db >= server.dbnum) {
+ long long now = ustime();
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ serverLog(LL_VERBOSE,
+ "Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu",
+ (int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes);
+
+ start_scan = now;
+ current_db = -1;
+ cursor = 0;
+ db = NULL;
+ server.active_defrag_running = 0;
+ return;
+ }
+ else if (current_db==0) {
+ /* Start a scan from the first database. */
+ start_scan = ustime();
+ start_stat = server.stat_active_defrag_hits;
+ }
+
+ db = &server.db[current_db];
+ cursor = 0;
+ }
+
+ do {
+ cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db);
+ /* Once in 16 scan iterations, or 1000 pointer reallocations
+ * (if we have a lot of pointers in one hash bucket), check if we
+ * reached the tiem limit. */
+ if (cursor && (++iterations > 16 || server.stat_active_defrag_hits - defragged > 1000)) {
+ if ((ustime() - start) > timelimit) {
+ return;
+ }
+ iterations = 0;
+ defragged = server.stat_active_defrag_hits;
+ }
+ } while(cursor);
+ } while(1);
+}
+
+#else /* HAVE_DEFRAG */
+
+void activeDefragCycle(void) {
+ /* Not implemented yet. */
+}
+
+#endif
diff --git a/src/dict.c b/src/dict.c
index b27920a44..69fb3b8f8 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -37,15 +37,19 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
-#include <ctype.h>
#include "dict.h"
#include "zmalloc.h"
+#ifndef DICT_BENCHMARK_MAIN
#include "redisassert.h"
+#else
+#include <assert.h>
+#endif
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
@@ -62,100 +66,33 @@ static unsigned int dict_force_resize_ratio = 5;
static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
-static int _dictKeyIndex(dict *ht, const void *key);
+static int _dictKeyIndex(dict *ht, const void *key, unsigned int hash, dictEntry **existing);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
/* -------------------------- hash functions -------------------------------- */
-/* Thomas Wang's 32 bit Mix Function */
-unsigned int dictIntHashFunction(unsigned int key)
-{
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
-}
+static uint8_t dict_hash_function_seed[16];
-/* Identity hash function for integer keys */
-unsigned int dictIdentityHashFunction(unsigned int key)
-{
- return key;
+void dictSetHashFunctionSeed(uint8_t *seed) {
+ memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}
-static uint32_t dict_hash_function_seed = 5381;
-
-void dictSetHashFunctionSeed(uint32_t seed) {
- dict_hash_function_seed = seed;
-}
-
-uint32_t dictGetHashFunctionSeed(void) {
+uint8_t *dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
-/* MurmurHash2, by Austin Appleby
- * Note - This code makes a few assumptions about how your machine behaves -
- * 1. We can read a 4-byte value from any address without crashing
- * 2. sizeof(int) == 4
- *
- * And it has a few limitations -
- *
- * 1. It will not work incrementally.
- * 2. It will not produce the same results on little-endian and big-endian
- * machines.
- */
-unsigned int dictGenHashFunction(const void *key, int len) {
- /* 'm' and 'r' are mixing constants generated offline.
- They're not really 'magic', they just happen to work well. */
- uint32_t seed = dict_hash_function_seed;
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- /* Initialize the hash to a 'random' value */
- uint32_t h = seed ^ len;
+/* The default hashing function uses SipHash implementation
+ * in siphash.c. */
- /* Mix 4 bytes at a time into the hash */
- const unsigned char *data = (const unsigned char *)key;
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
- while(len >= 4) {
- uint32_t k = *(uint32_t*)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- /* Handle the last few bytes of the input array */
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
-
- /* Do a few final mixes of the hash to ensure the last few
- * bytes are well-incorporated. */
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return (unsigned int)h;
+uint64_t dictGenHashFunction(const void *key, int len) {
+ return siphash(key,len,dict_hash_function_seed);
}
-/* And a case insensitive hash function (based on djb hash) */
-unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
- unsigned int hash = (unsigned int)dict_hash_function_seed;
-
- while (len--)
- hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
- return hash;
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
+ return siphash_nocase(buf,len,dict_hash_function_seed);
}
/* ----------------------------- API implementation ------------------------- */
@@ -217,6 +154,9 @@ int dictExpand(dict *d, unsigned long size)
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
+ /* Rehashing to the same table size is not useful. */
+ if (realsize == d->ht[0].size) return DICT_ERR;
+
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
@@ -238,27 +178,27 @@ int dictExpand(dict *d, unsigned long size)
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
+ *
* Note that a rehashing step consists in moving a bucket (that may have more
- * than one key as we use chaining) from the old to the new hash table. */
+ * than one key as we use chaining) from the old to the new hash table, however
+ * since part of the hash table may be composed of empty spaces, it is not
+ * guaranteed that this function will rehash even a single bucket, since it
+ * will visit at max N*10 empty buckets in total, otherwise the amount of
+ * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
+ int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
- while(n--) {
+ while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
- /* Check if we already rehashed the whole table... */
- if (d->ht[0].used == 0) {
- zfree(d->ht[0].table);
- d->ht[0] = d->ht[1];
- _dictReset(&d->ht[1]);
- d->rehashidx = -1;
- return 0;
- }
-
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
- assert(d->ht[0].size > (unsigned)d->rehashidx);
- while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
+ assert(d->ht[0].size > (unsigned long)d->rehashidx);
+ while(d->ht[0].table[d->rehashidx] == NULL) {
+ d->rehashidx++;
+ if (--empty_visits == 0) return 1;
+ }
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
@@ -276,6 +216,17 @@ int dictRehash(dict *d, int n) {
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
+
+ /* Check if we already rehashed the whole table... */
+ if (d->ht[0].used == 0) {
+ zfree(d->ht[0].table);
+ d->ht[0] = d->ht[1];
+ _dictReset(&d->ht[1]);
+ d->rehashidx = -1;
+ return 0;
+ }
+
+ /* More to rehash... */
return 1;
}
@@ -313,29 +264,32 @@ static void _dictRehashStep(dict *d) {
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
- dictEntry *entry = dictAddRaw(d,key);
+ dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
-/* Low level add. This function adds the entry but instead of setting
- * a value returns the dictEntry structure to the user, that will make
- * sure to fill the value field as he wishes.
+/* Low level add or find:
+ * This function adds the entry but instead of setting a value returns the
+ * dictEntry structure to the user, that will make sure to fill the value
+ * field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
- * entry = dictAddRaw(dict,mykey);
+ * entry = dictAddRaw(dict,mykey,NULL);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
- * If key already exists NULL is returned.
+ * If key already exists NULL is returned, and "*existing" is populated
+ * with the existing entry if existing is not NULL.
+ *
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
-dictEntry *dictAddRaw(dict *d, void *key)
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
int index;
dictEntry *entry;
@@ -345,10 +299,13 @@ dictEntry *dictAddRaw(dict *d, void *key)
/* Get the index of the new element, or -1 if
* the element already exists. */
- if ((index = _dictKeyIndex(d, key)) == -1)
+ if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
- /* Allocate the memory and store the new entry */
+ /* Allocate the memory and store the new entry.
+ * Insert the element in top, with the assumption that in a database
+ * system it is more likely that recently added entries are accessed
+ * more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
@@ -360,51 +317,57 @@ dictEntry *dictAddRaw(dict *d, void *key)
return entry;
}
-/* Add an element, discarding the old if the key already exists.
+/* Add or Overwrite:
+ * Add an element, discarding the old value if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
- dictEntry *entry, auxentry;
+ dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
- if (dictAdd(d, key, val) == DICT_OK)
+ entry = dictAddRaw(d,key,&existing);
+ if (entry) {
+ dictSetVal(d, entry, val);
return 1;
- /* It already exists, get the entry */
- entry = dictFind(d, key);
+ }
+
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
- auxentry = *entry;
- dictSetVal(d, entry, val);
+ auxentry = *existing;
+ dictSetVal(d, existing, val);
dictFreeVal(d, &auxentry);
return 0;
}
-/* dictReplaceRaw() is simply a version of dictAddRaw() that always
+/* Add or Find:
+ * dictAddOrFind() is simply a version of dictAddRaw() that always
* returns the hash entry of the specified key, even if the key already
* exists and can't be added (in that case the entry of the already
* existing key is returned.)
*
* See dictAddRaw() for more information. */
-dictEntry *dictReplaceRaw(dict *d, void *key) {
- dictEntry *entry = dictFind(d,key);
-
- return entry ? entry : dictAddRaw(d,key);
+dictEntry *dictAddOrFind(dict *d, void *key) {
+ dictEntry *entry, *existing;
+ entry = dictAddRaw(d,key,&existing);
+ return entry ? entry : existing;
}
-/* Search and remove an element */
-static int dictGenericDelete(dict *d, const void *key, int nofree)
-{
+/* Search and remove an element. This is an helper function for
+ * dictDelete() and dictUnlink(), please check the top comment
+ * of those functions. */
+static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;
- if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
+ if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
+
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
@@ -413,7 +376,7 @@ static int dictGenericDelete(dict *d, const void *key, int nofree)
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
- if (dictCompareKeys(d, key, he->key)) {
+ if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
@@ -422,27 +385,59 @@ static int dictGenericDelete(dict *d, const void *key, int nofree)
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
+ zfree(he);
}
- zfree(he);
d->ht[table].used--;
- return DICT_OK;
+ return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
- return DICT_ERR; /* not found */
+ return NULL; /* not found */
}
+/* Remove an element, returning DICT_OK on success or DICT_ERR if the
+ * element was not found. */
int dictDelete(dict *ht, const void *key) {
- return dictGenericDelete(ht,key,0);
+ return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
-int dictDeleteNoFree(dict *ht, const void *key) {
+/* Remove an element from the table, but without actually releasing
+ * the key, value and dictionary entry. The dictionary entry is returned
+ * if the element was found (and unlinked from the table), and the user
+ * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
+ * Otherwise if the key is not found, NULL is returned.
+ *
+ * This function is useful when we want to remove something from the hash
+ * table but want to use its value before actually deleting the entry.
+ * Without this function the pattern would require two lookups:
+ *
+ * entry = dictFind(...);
+ * // Do something with entry
+ * dictDelete(dictionary,entry);
+ *
+ * Thanks to this function it is possible to avoid this, and use
+ * instead:
+ *
+ * entry = dictUnlink(dictionary,entry);
+ * // Do something with entry
+ * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
+ */
+dictEntry *dictUnlink(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}
+/* You need to call this function to really free the entry after a call
+ * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
+ if (he == NULL) return;
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ zfree(he);
+}
+
/* Destroy an entire dictionary */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
unsigned long i;
@@ -483,14 +478,14 @@ dictEntry *dictFind(dict *d, const void *key)
dictEntry *he;
unsigned int h, idx, table;
- if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
+ if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareKeys(d, key, he->key))
+ if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
@@ -576,7 +571,7 @@ dictEntry *dictNext(dictIterator *iter)
iter->fingerprint = dictFingerprint(iter->d);
}
iter->index++;
- if (iter->index >= (signed) ht->size) {
+ if (iter->index >= (long) ht->size) {
if (dictIsRehashing(iter->d) && iter->table == 0) {
iter->table++;
iter->index = 0;
@@ -622,7 +617,11 @@ dictEntry *dictGetRandomKey(dict *d)
if (dictIsRehashing(d)) _dictRehashStep(d);
if (dictIsRehashing(d)) {
do {
- h = random() % (d->ht[0].size+d->ht[1].size);
+ /* We are sure there are no elements in indexes from 0
+ * to rehashidx-1 */
+ h = d->rehashidx + (random() % (d->ht[0].size +
+ d->ht[1].size -
+ d->rehashidx));
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
} while(he == NULL);
@@ -649,9 +648,12 @@ dictEntry *dictGetRandomKey(dict *d)
return he;
}
-/* This is a version of dictGetRandomKey() that is modified in order to
- * return multiple entries by jumping at a random place of the hash table
- * and scanning linearly for entries.
+/* This function samples the dictionary to return a few keys from random
+ * locations.
+ *
+ * It does not guarantee to return all the keys specified in 'count', nor
+ * it does guarantee to return non-duplicated elements, however it will make
+ * some effort to do both things.
*
* Returned pointers to hash table entries are stored into 'des' that
* points to an array of dictEntry pointers. The array must have room for
@@ -660,28 +662,65 @@ dictEntry *dictGetRandomKey(dict *d)
*
* The function returns the number of items stored into 'des', that may
* be less than 'count' if the hash table has less than 'count' elements
- * inside.
+ * inside, or if not enough elements were found in a reasonable amount of
+ * steps.
*
* Note that this function is not suitable when you need a good distribution
* of the returned items, but only when you need to "sample" a given number
* of continuous elements to run some kind of algorithm or to produce
* statistics. However the function is much faster than dictGetRandomKey()
- * at producing N elements, and the elements are guaranteed to be non
- * repeating. */
-int dictGetRandomKeys(dict *d, dictEntry **des, int count) {
- int j; /* internal hash table id, 0 or 1. */
- int stored = 0;
+ * at producing N elements. */
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
+ unsigned long j; /* internal hash table id, 0 or 1. */
+ unsigned long tables; /* 1 or 2 tables? */
+ unsigned long stored = 0, maxsizemask;
+ unsigned long maxsteps;
if (dictSize(d) < count) count = dictSize(d);
- while(stored < count) {
- for (j = 0; j < 2; j++) {
- /* Pick a random point inside the hash table 0 or 1. */
- unsigned int i = random() & d->ht[j].sizemask;
- int size = d->ht[j].size;
-
- /* Make sure to visit every bucket by iterating 'size' times. */
- while(size--) {
- dictEntry *he = d->ht[j].table[i];
+ maxsteps = count*10;
+
+ /* Try to do a rehashing work proportional to 'count'. */
+ for (j = 0; j < count; j++) {
+ if (dictIsRehashing(d))
+ _dictRehashStep(d);
+ else
+ break;
+ }
+
+ tables = dictIsRehashing(d) ? 2 : 1;
+ maxsizemask = d->ht[0].sizemask;
+ if (tables > 1 && maxsizemask < d->ht[1].sizemask)
+ maxsizemask = d->ht[1].sizemask;
+
+ /* Pick a random point inside the larger table. */
+ unsigned long i = random() & maxsizemask;
+ unsigned long emptylen = 0; /* Continuous empty entries so far. */
+ while(stored < count && maxsteps--) {
+ for (j = 0; j < tables; j++) {
+ /* Invariant of the dict.c rehashing: up to the indexes already
+ * visited in ht[0] during the rehashing, there are no populated
+ * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
+ if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
+ /* Moreover, if we are currently out of range in the second
+ * table, there will be no elements in both tables up to
+ * the current rehashing index, so we jump if possible.
+ * (this happens when going from big to small table). */
+ if (i >= d->ht[1].size) i = d->rehashidx;
+ continue;
+ }
+ if (i >= d->ht[j].size) continue; /* Out of range for this table. */
+ dictEntry *he = d->ht[j].table[i];
+
+ /* Count contiguous empty buckets, and jump to other
+ * locations if they reach 'count' (with a minimum of 5). */
+ if (he == NULL) {
+ emptylen++;
+ if (emptylen >= 5 && emptylen > count) {
+ i = random() & maxsizemask;
+ emptylen = 0;
+ }
+ } else {
+ emptylen = 0;
while (he) {
/* Collect all the elements of the buckets found non
* empty while iterating. */
@@ -691,14 +730,11 @@ int dictGetRandomKeys(dict *d, dictEntry **des, int count) {
stored++;
if (stored == count) return stored;
}
- i = (i+1) & d->ht[j].sizemask;
}
- /* If there is only one table and we iterated it all, we should
- * already have 'count' elements. Assert this condition. */
- assert(dictIsRehashing(d) != 0);
}
+ i = (i+1) & maxsizemask;
}
- return stored; /* Never reached. */
+ return stored;
}
/* Function to reverse bits. Algorithm from:
@@ -715,72 +751,72 @@ static unsigned long rev(unsigned long v) {
/* dictScan() is used to iterate over the elements of a dictionary.
*
- * Iterating works in the following way:
+ * Iterating works the following way:
*
* 1) Initially you call the function using a cursor (v) value of 0.
* 2) The function performs one step of the iteration, and returns the
- * new cursor value that you must use in the next call.
+ * new cursor value you must use in the next call.
* 3) When the returned cursor is 0, the iteration is complete.
*
- * The function guarantees that all the elements that are present in the
- * dictionary from the start to the end of the iteration are returned.
- * However it is possible that some element is returned multiple time.
+ * The function guarantees all elements present in the
+ * dictionary get returned between the start and end of the iteration.
+ * However it is possible some elements get returned multiple times.
*
- * For every element returned, the callback 'fn' passed as argument is
- * called, with 'privdata' as first argument and the dictionar entry
+ * For every element returned, the callback argument 'fn' is
+ * called with 'privdata' as first argument and the dictionary entry
* 'de' as second argument.
*
* HOW IT WORKS.
*
- * The algorithm used in the iteration was designed by Pieter Noordhuis.
+ * The iteration algorithm was designed by Pieter Noordhuis.
* The main idea is to increment a cursor starting from the higher order
- * bits, that is, instead of incrementing the cursor normally, the bits
+ * bits. That is, instead of incrementing the cursor normally, the bits
* of the cursor are reversed, then the cursor is incremented, and finally
* the bits are reversed again.
*
- * This strategy is needed because the hash table may be resized from one
- * call to the other call of the same iteration.
+ * This strategy is needed because the hash table may be resized between
+ * iteration calls.
*
* dict.c hash tables are always power of two in size, and they
* use chaining, so the position of an element in a given table is given
- * always by computing the bitwise AND between Hash(key) and SIZE-1
+ * by computing the bitwise AND between Hash(key) and SIZE-1
* (where SIZE-1 is always the mask that is equivalent to taking the rest
* of the division between the Hash of the key and SIZE).
*
* For example if the current hash table size is 16, the mask is
- * (in binary) 1111. The position of a key in the hash table will be always
+ * (in binary) 1111. The position of a key in the hash table will always be
* the last four bits of the hash output, and so forth.
*
* WHAT HAPPENS IF THE TABLE CHANGES IN SIZE?
*
- * If the hash table grows, elements can go anyway in one multiple of
- * the old bucket: for example let's say that we already iterated with
- * a 4 bit cursor 1100, since the mask is 1111 (hash table size = 16).
+ * If the hash table grows, elements can go anywhere in one multiple of
+ * the old bucket: for example let's say we already iterated with
+ * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16).
*
- * If the hash table will be resized to 64 elements, and the new mask will
- * be 111111, the new buckets that you obtain substituting in ??1100
- * either 0 or 1, can be targeted only by keys that we already visited
+ * If the hash table will be resized to 64 elements, then the new mask will
+ * be 111111. The new buckets you obtain by substituting in ??1100
+ * with either 0 or 1 can be targeted only by keys we already visited
* when scanning the bucket 1100 in the smaller hash table.
*
* By iterating the higher bits first, because of the inverted counter, the
- * cursor does not need to restart if the table size gets bigger, and will
- * just continue iterating with cursors that don't have '1100' at the end,
- * nor any other combination of final 4 bits already explored.
+ * cursor does not need to restart if the table size gets bigger. It will
+ * continue iterating using cursors without '1100' at the end, and also
+ * without any other combination of the final 4 bits already explored.
*
* Similarly when the table size shrinks over time, for example going from
- * 16 to 8, If a combination of the lower three bits (the mask for size 8
- * is 111) was already completely explored, it will not be visited again
- * as we are sure that, we tried for example, both 0111 and 1111 (all the
+ * 16 to 8, if a combination of the lower three bits (the mask for size 8
+ * is 111) were already completely explored, it would not be visited again
+ * because we are sure we tried, for example, both 0111 and 1111 (all the
* variations of the higher bit) so we don't need to test it again.
*
* WAIT... YOU HAVE *TWO* TABLES DURING REHASHING!
*
- * Yes, this is true, but we always iterate the smaller one of the tables,
- * testing also all the expansions of the current cursor into the larger
- * table. So for example if the current cursor is 101 and we also have a
+ * Yes, this is true, but we always iterate the smaller table first, then
+ * we test all the expansions of the current cursor into the larger
+ * table. For example if the current cursor is 101 and we also have a
* larger table of size 16, we also test (0)101 and (1)101 inside the larger
* table. This reduces the problem back to having only one table, where
- * the larger one, if exists, is just an expansion of the smaller one.
+ * the larger one, if it exists, is just an expansion of the smaller one.
*
* LIMITATIONS
*
@@ -789,21 +825,22 @@ static unsigned long rev(unsigned long v) {
*
* The disadvantages resulting from this design are:
*
- * 1) It is possible that we return duplicated elements. However this is usually
+ * 1) It is possible we return elements more than once. However this is usually
* easy to deal with in the application level.
* 2) The iterator must return multiple elements per call, as it needs to always
* return all the keys chained in a given bucket, and all the expansions, so
- * we are sure we don't miss keys moving.
+ * we are sure we don't miss keys moving during rehashing.
* 3) The reverse cursor is somewhat hard to understand at first, but this
* comment is supposed to help.
*/
unsigned long dictScan(dict *d,
unsigned long v,
dictScanFunction *fn,
+ dictScanBucketFunction* bucketfn,
void *privdata)
{
dictht *t0, *t1;
- const dictEntry *de;
+ const dictEntry *de, *next;
unsigned long m0, m1;
if (dictSize(d) == 0) return 0;
@@ -813,10 +850,12 @@ unsigned long dictScan(dict *d,
m0 = t0->sizemask;
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
} else {
@@ -833,20 +872,24 @@ unsigned long dictScan(dict *d,
m1 = t1->sizemask;
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
/* Iterate over indices in larger table that are the expansion
* of the index pointed to by the cursor in the smaller table */
do {
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
de = t1->table[v & m1];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
/* Increment bits not covered by the smaller mask */
@@ -907,27 +950,29 @@ static unsigned long _dictNextPower(unsigned long size)
/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
- * If the key already exists, -1 is returned.
+ * If the key already exists, -1 is returned
+ * and the optional output parameter may be filled.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
-static int _dictKeyIndex(dict *d, const void *key)
+static int _dictKeyIndex(dict *d, const void *key, unsigned int hash, dictEntry **existing)
{
- unsigned int h, idx, table;
+ unsigned int idx, table;
dictEntry *he;
+ if (existing) *existing = NULL;
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
- /* Compute the key hash value */
- h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
- idx = h & d->ht[table].sizemask;
+ idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareKeys(d, key, he->key))
+ if (key==he->key || dictCompareKeys(d, key, he->key)) {
+ if (existing) *existing = he;
return -1;
+ }
he = he->next;
}
if (!dictIsRehashing(d)) break;
@@ -950,24 +995,50 @@ void dictDisableResize(void) {
dict_can_resize = 0;
}
-#if 0
+unsigned int dictGetHash(dict *d, const void *key) {
+ return dictHashKey(d, key);
+}
+
+/* Finds the dictEntry reference by using pointer and pre-calculated hash.
+ * oldkey is a dead pointer and should not be accessed.
+ * the hash value should be provided using dictGetHash.
+ * no string / key comparison is performed.
+ * return value is the reference to the dictEntry if found, or NULL if not found. */
+dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash) {
+ dictEntry *he, **heref;
+ unsigned int idx, table;
-/* The following is code that we don't use for Redis currently, but that is part
-of the library. */
+ if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
+ for (table = 0; table <= 1; table++) {
+ idx = hash & d->ht[table].sizemask;
+ heref = &d->ht[table].table[idx];
+ he = *heref;
+ while(he) {
+ if (oldptr==he->key)
+ return heref;
+ heref = &he->next;
+ he = *heref;
+ }
+ if (!dictIsRehashing(d)) return NULL;
+ }
+ return NULL;
+}
-/* ----------------------- Debugging ------------------------*/
+/* ------------------------------- Debugging ---------------------------------*/
#define DICT_STATS_VECTLEN 50
-static void _dictPrintStatsHt(dictht *ht) {
+size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) {
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
unsigned long totchainlen = 0;
unsigned long clvector[DICT_STATS_VECTLEN];
+ size_t l = 0;
if (ht->used == 0) {
- printf("No stats available for empty dictionaries\n");
- return;
+ return snprintf(buf,bufsize,
+ "No stats available for empty dictionaries\n");
}
+ /* Compute stats. */
for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
for (i = 0; i < ht->size; i++) {
dictEntry *he;
@@ -988,89 +1059,163 @@ static void _dictPrintStatsHt(dictht *ht) {
if (chainlen > maxchainlen) maxchainlen = chainlen;
totchainlen += chainlen;
}
- printf("Hash table stats:\n");
- printf(" table size: %ld\n", ht->size);
- printf(" number of elements: %ld\n", ht->used);
- printf(" different slots: %ld\n", slots);
- printf(" max chain length: %ld\n", maxchainlen);
- printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
- printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
- printf(" Chain length distribution:\n");
+
+ /* Generate human readable stats. */
+ l += snprintf(buf+l,bufsize-l,
+ "Hash table %d stats (%s):\n"
+ " table size: %ld\n"
+ " number of elements: %ld\n"
+ " different slots: %ld\n"
+ " max chain length: %ld\n"
+ " avg chain length (counted): %.02f\n"
+ " avg chain length (computed): %.02f\n"
+ " Chain length distribution:\n",
+ tableid, (tableid == 0) ? "main hash table" : "rehashing target",
+ ht->size, ht->used, slots, maxchainlen,
+ (float)totchainlen/slots, (float)ht->used/slots);
+
for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
if (clvector[i] == 0) continue;
- printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
+ if (l >= bufsize) break;
+ l += snprintf(buf+l,bufsize-l,
+ " %s%ld: %ld (%.02f%%)\n",
+ (i == DICT_STATS_VECTLEN-1)?">= ":"",
+ i, clvector[i], ((float)clvector[i]/ht->size)*100);
}
+
+ /* Unlike snprintf(), teturn the number of characters actually written. */
+ if (bufsize) buf[bufsize-1] = '\0';
+ return strlen(buf);
}
-void dictPrintStats(dict *d) {
- _dictPrintStatsHt(&d->ht[0]);
- if (dictIsRehashing(d)) {
- printf("-- Rehashing into ht[1]:\n");
- _dictPrintStatsHt(&d->ht[1]);
+void dictGetStats(char *buf, size_t bufsize, dict *d) {
+ size_t l;
+ char *orig_buf = buf;
+ size_t orig_bufsize = bufsize;
+
+ l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0);
+ buf += l;
+ bufsize -= l;
+ if (dictIsRehashing(d) && bufsize > 0) {
+ _dictGetStatsHt(buf,bufsize,&d->ht[1],1);
}
+ /* Make sure there is a NULL term at the end. */
+ if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0';
}
-/* ----------------------- StringCopy Hash Table Type ------------------------*/
+/* ------------------------------- Benchmark ---------------------------------*/
-static unsigned int _dictStringCopyHTHashFunction(const void *key)
-{
- return dictGenHashFunction(key, strlen(key));
-}
+#ifdef DICT_BENCHMARK_MAIN
-static void *_dictStringDup(void *privdata, const void *key)
-{
- int len = strlen(key);
- char *copy = zmalloc(len+1);
- DICT_NOTUSED(privdata);
+#include "sds.h"
- memcpy(copy, key, len);
- copy[len] = '\0';
- return copy;
+uint64_t hashCallback(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}
-static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
+int compareCallback(void *privdata, const void *key1, const void *key2) {
+ int l1,l2;
DICT_NOTUSED(privdata);
- return strcmp(key1, key2) == 0;
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
}
-static void _dictStringDestructor(void *privdata, void *key)
-{
+void freeCallback(void *privdata, void *val) {
DICT_NOTUSED(privdata);
- zfree(key);
+ sdsfree(val);
}
-dictType dictTypeHeapStringCopyKey = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
+dictType BenchmarkDictType = {
+ hashCallback,
+ NULL,
+ NULL,
+ compareCallback,
+ freeCallback,
+ NULL
};
-/* This is like StringCopy but does not auto-duplicate the key.
- * It's used for intepreter's shared strings. */
-dictType dictTypeHeapStrings = {
- _dictStringCopyHTHashFunction, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
-};
+#define start_benchmark() start = timeInMilliseconds()
+#define end_benchmark(msg) do { \
+ elapsed = timeInMilliseconds()-start; \
+ printf(msg ": %ld items in %lld ms\n", count, elapsed); \
+} while(0);
+
+/* dict-benchmark [count] */
+int main(int argc, char **argv) {
+ long j;
+ long long start, elapsed;
+ dict *dict = dictCreate(&BenchmarkDictType,NULL);
+ long count = 0;
+
+ if (argc == 2) {
+ count = strtol(argv[1],NULL,10);
+ } else {
+ count = 5000000;
+ }
-/* This is like StringCopy but also automatically handle dynamic
- * allocated C strings as values. */
-dictType dictTypeHeapStringCopyKeyValue = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- _dictStringDup, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- _dictStringDestructor, /* val destructor */
-};
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Inserting");
+ assert((long)dictSize(dict) == count);
+
+ /* Wait for rehashing. */
+ while (dictIsRehashing(dict)) {
+ dictRehashMilliseconds(dict,100);
+ }
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Linear access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Linear access of existing elements (2nd round)");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(rand() % count);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Random access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(rand() % count);
+ key[0] = 'X';
+ dictEntry *de = dictFind(dict,key);
+ assert(de == NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Accessing missing");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ int retval = dictDelete(dict,key);
+ assert(retval == DICT_OK);
+ key[0] += 17; /* Change first number to letter. */
+ retval = dictAdd(dict,key,(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Removing and adding");
+}
#endif
diff --git a/src/dict.h b/src/dict.h
index 905330f5d..bf316a00f 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -56,7 +56,7 @@ typedef struct dictEntry {
} dictEntry;
typedef struct dictType {
- unsigned int (*hashFunction)(const void *key);
+ uint64_t (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
@@ -77,8 +77,8 @@ typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
- int rehashidx; /* rehashing not in progress if rehashidx == -1 */
- int iterators; /* number of iterators currently running */
+ long rehashidx; /* rehashing not in progress if rehashidx == -1 */
+ unsigned long iterators; /* number of iterators currently running */
} dict;
/* If safe is set to 1 this is a safe iterator, that means, you can call
@@ -87,12 +87,15 @@ typedef struct dict {
* should be called while iterating. */
typedef struct dictIterator {
dict *d;
- int table, index, safe;
+ long index;
+ int table, safe;
dictEntry *entry, *nextEntry;
- long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
+ /* unsafe iterator fingerprint for misuse detection. */
+ long long fingerprint;
} dictIterator;
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
+typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref);
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4
@@ -104,19 +107,19 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
- entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
+ (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
else \
- entry->v.val = (_val_); \
+ (entry)->v.val = (_val_); \
} while(0)
#define dictSetSignedIntegerVal(entry, _val_) \
- do { entry->v.s64 = _val_; } while(0)
+ do { (entry)->v.s64 = _val_; } while(0)
#define dictSetUnsignedIntegerVal(entry, _val_) \
- do { entry->v.u64 = _val_; } while(0)
+ do { (entry)->v.u64 = _val_; } while(0)
#define dictSetDoubleVal(entry, _val_) \
- do { entry->v.d = _val_; } while(0)
+ do { (entry)->v.d = _val_; } while(0)
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
@@ -124,9 +127,9 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
#define dictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
- entry->key = (d)->type->keyDup((d)->privdata, _key_); \
+ (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
else \
- entry->key = (_key_); \
+ (entry)->key = (_key_); \
} while(0)
#define dictCompareKeys(d, key1, key2) \
@@ -142,17 +145,18 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
#define dictGetDoubleVal(he) ((he)->v.d)
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
-#define dictIsRehashing(ht) ((ht)->rehashidx != -1)
+#define dictIsRehashing(d) ((d)->rehashidx != -1)
/* API */
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
-dictEntry *dictAddRaw(dict *d, void *key);
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
+dictEntry *dictAddOrFind(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
-dictEntry *dictReplaceRaw(dict *d, void *key);
int dictDelete(dict *d, const void *key);
-int dictDeleteNoFree(dict *d, const void *key);
+dictEntry *dictUnlink(dict *ht, const void *key);
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
@@ -162,18 +166,20 @@ dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
-int dictGetRandomKeys(dict *d, dictEntry **des, int count);
-void dictPrintStats(dict *d);
-unsigned int dictGenHashFunction(const void *key, int len);
-unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
+void dictGetStats(char *buf, size_t bufsize, dict *d);
+uint64_t dictGenHashFunction(const void *key, int len);
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
-void dictSetHashFunctionSeed(unsigned int initval);
-unsigned int dictGetHashFunctionSeed(void);
-unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
+void dictSetHashFunctionSeed(uint8_t *seed);
+uint8_t *dictGetHashFunctionSeed(void);
+unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
+unsigned int dictGetHash(dict *d, const void *key);
+dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash);
/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
diff --git a/src/endianconv.c b/src/endianconv.c
index 9adf09c1f..f3b0b4730 100644
--- a/src/endianconv.c
+++ b/src/endianconv.c
@@ -101,12 +101,16 @@ uint64_t intrev64(uint64_t v) {
return v;
}
-#ifdef TESTMAIN
+#ifdef REDIS_TEST
#include <stdio.h>
-int main(void) {
+#define UNUSED(x) (void)(x)
+int endianconvTest(int argc, char *argv[]) {
char buf[32];
+ UNUSED(argc);
+ UNUSED(argv);
+
sprintf(buf,"ciaoroma");
memrev16(buf);
printf("%s\n", buf);
diff --git a/src/endianconv.h b/src/endianconv.h
index d93cd99ba..08f553136 100644
--- a/src/endianconv.h
+++ b/src/endianconv.h
@@ -71,4 +71,8 @@ uint64_t intrev64(uint64_t v);
#define ntohu64(v) intrev64(v)
#endif
+#ifdef REDIS_TEST
+int endianconvTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/evict.c b/src/evict.c
new file mode 100644
index 000000000..5ce5ca07f
--- /dev/null
+++ b/src/evict.c
@@ -0,0 +1,567 @@
+/* Maxmemory directive handling (LRU eviction and other policies).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+
+/* ----------------------------------------------------------------------------
+ * Data structures
+ * --------------------------------------------------------------------------*/
+
+/* To improve the quality of the LRU approximation we take a set of keys
+ * that are good candidate for eviction across freeMemoryIfNeeded() calls.
+ *
+ * Entries inside the eviciton pool are taken ordered by idle time, putting
+ * greater idle times to the right (ascending order).
+ *
+ * When an LFU policy is used instead, a reverse frequency indication is used
+ * instead of the idle time, so that we still evict by larger value (larger
+ * inverse frequency means to evict keys with the least frequent accesses).
+ *
+ * Empty entries have the key pointer set to NULL. */
+#define EVPOOL_SIZE 16
+#define EVPOOL_CACHED_SDS_SIZE 255
+struct evictionPoolEntry {
+ unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
+ sds key; /* Key name. */
+ sds cached; /* Cached SDS object for key name. */
+ int dbid; /* Key DB number. */
+};
+
+static struct evictionPoolEntry *EvictionPoolLRU;
+
+unsigned long LFUDecrAndReturn(robj *o);
+
+/* ----------------------------------------------------------------------------
+ * Implementation of eviction, aging and LRU
+ * --------------------------------------------------------------------------*/
+
+/* Return the LRU clock, based on the clock resolution. This is a time
+ * in a reduced-bits format that can be used to set and check the
+ * object->lru field of redisObject structures. */
+unsigned int getLRUClock(void) {
+ return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
+}
+
+/* This function is used to obtain the current LRU clock.
+ * If the current resolution is lower than the frequency we refresh the
+ * LRU clock (as it should be in production servers) we return the
+ * precomputed value, otherwise we need to resort to a system call. */
+unsigned int LRU_CLOCK(void) {
+ unsigned int lruclock;
+ if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
+ atomicGet(server.lruclock,lruclock);
+ } else {
+ lruclock = getLRUClock();
+ }
+ return lruclock;
+}
+
+/* Given an object returns the min number of milliseconds the object was never
+ * requested, using an approximated LRU algorithm. */
+unsigned long long estimateObjectIdleTime(robj *o) {
+ unsigned long long lruclock = LRU_CLOCK();
+ if (lruclock >= o->lru) {
+ return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
+ } else {
+ return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
+ LRU_CLOCK_RESOLUTION;
+ }
+}
+
+/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
+ * file to limit the max memory used by the server, before processing a
+ * command.
+ *
+ * The goal of the function is to free enough memory to keep Redis under the
+ * configured memory limit.
+ *
+ * The function starts calculating how many bytes should be freed to keep
+ * Redis under the limit, and enters a loop selecting the best keys to
+ * evict accordingly to the configured policy.
+ *
+ * If all the bytes needed to return back under the limit were freed the
+ * function returns C_OK, otherwise C_ERR is returned, and the caller
+ * should block the execution of commands that will result in more memory
+ * used by the server.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * LRU approximation algorithm
+ *
+ * Redis uses an approximation of the LRU algorithm that runs in constant
+ * memory. Every time there is a key to expire, we sample N keys (with
+ * N very small, usually in around 5) to populate a pool of best keys to
+ * evict of M keys (the pool size is defined by EVPOOL_SIZE).
+ *
+ * The N keys sampled are added in the pool of good keys to expire (the one
+ * with an old access time) if they are better than one of the current keys
+ * in the pool.
+ *
+ * After the pool is populated, the best key we have in the pool is expired.
+ * However note that we don't remove keys from the pool when they are deleted
+ * so the pool may contain keys that no longer exist.
+ *
+ * When we try to evict a key, and all the entries in the pool don't exist
+ * we populate it again. This time we'll be sure that the pool has at least
+ * one key that can be evicted, if there is at least one key that can be
+ * evicted in the whole database. */
+
+/* Create a new eviction pool. */
+void evictionPoolAlloc(void) {
+ struct evictionPoolEntry *ep;
+ int j;
+
+ ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
+ for (j = 0; j < EVPOOL_SIZE; j++) {
+ ep[j].idle = 0;
+ ep[j].key = NULL;
+ ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
+ ep[j].dbid = 0;
+ }
+ EvictionPoolLRU = ep;
+}
+
+/* This is an helper function for freeMemoryIfNeeded(), it is used in order
+ * to populate the evictionPool with a few entries every time we want to
+ * expire a key. Keys with idle time smaller than one of the current
+ * keys are added. Keys are always added if there are free entries.
+ *
+ * We insert keys on place in ascending order, so keys with the smaller
+ * idle time are on the left, and keys with the higher idle time on the
+ * right. */
+
+void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
+ int j, k, count;
+ dictEntry *samples[server.maxmemory_samples];
+
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
+ for (j = 0; j < count; j++) {
+ unsigned long long idle;
+ sds key;
+ robj *o;
+ dictEntry *de;
+
+ de = samples[j];
+ key = dictGetKey(de);
+
+ /* If the dictionary we are sampling from is not the main
+ * dictionary (but the expires one) we need to lookup the key
+ * again in the key dictionary to obtain the value object. */
+ if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
+ if (sampledict != keydict) de = dictFind(keydict, key);
+ o = dictGetVal(de);
+ }
+
+ /* Calculate the idle time according to the policy. This is called
+ * idle just because the code initially handled LRU, but is in fact
+ * just a score where an higher score means better candidate. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
+ idle = estimateObjectIdleTime(o);
+ } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ /* When we use an LRU policy, we sort the keys by idle time
+ * so that we expire keys starting from greater idle time.
+ * However when the policy is an LFU one, we have a frequency
+ * estimation, and we want to evict keys with lower frequency
+ * first. So inside the pool we put objects using the inverted
+ * frequency subtracting the actual frequency to the maximum
+ * frequency of 255. */
+ idle = 255-LFUDecrAndReturn(o);
+ } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
+ /* In this case the sooner the expire the better. */
+ idle = ULLONG_MAX - (long)dictGetVal(de);
+ } else {
+ serverPanic("Unknown eviction policy in evictionPoolPopulate()");
+ }
+
+ /* Insert the element inside the pool.
+ * First, find the first empty bucket or the first populated
+ * bucket that has an idle time smaller than our idle time. */
+ k = 0;
+ while (k < EVPOOL_SIZE &&
+ pool[k].key &&
+ pool[k].idle < idle) k++;
+ if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
+ /* Can't insert if the element is < the worst element we have
+ * and there are no empty buckets. */
+ continue;
+ } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
+ /* Inserting into empty position. No setup needed before insert. */
+ } else {
+ /* Inserting in the middle. Now k points to the first element
+ * greater than the element to insert. */
+ if (pool[EVPOOL_SIZE-1].key == NULL) {
+ /* Free space on the right? Insert at k shifting
+ * all the elements from k to end to the right. */
+
+ /* Save SDS before overwriting. */
+ sds cached = pool[EVPOOL_SIZE-1].cached;
+ memmove(pool+k+1,pool+k,
+ sizeof(pool[0])*(EVPOOL_SIZE-k-1));
+ pool[k].cached = cached;
+ } else {
+ /* No free space on right? Insert at k-1 */
+ k--;
+ /* Shift all elements on the left of k (included) to the
+ * left, so we discard the element with smaller idle time. */
+ sds cached = pool[0].cached; /* Save SDS before overwriting. */
+ if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
+ memmove(pool,pool+1,sizeof(pool[0])*k);
+ pool[k].cached = cached;
+ }
+ }
+
+ /* Try to reuse the cached SDS string allocated in the pool entry,
+ * because allocating and deallocating this object is costly
+ * (according to the profiler, not my fantasy. Remember:
+ * premature optimizbla bla bla bla. */
+ int klen = sdslen(key);
+ if (klen > EVPOOL_CACHED_SDS_SIZE) {
+ pool[k].key = sdsdup(key);
+ } else {
+ memcpy(pool[k].cached,key,klen+1);
+ sdssetlen(pool[k].cached,klen);
+ pool[k].key = pool[k].cached;
+ }
+ pool[k].idle = idle;
+ pool[k].dbid = dbid;
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * LFU (Least Frequently Used) implementation.
+
+ * We have 24 total bits of space in each object in order to implement
+ * an LFU (Least Frequently Used) eviction policy, since we re-use the
+ * LRU field for this purpose.
+ *
+ * We split the 24 bits into two fields:
+ *
+ * 16 bits 8 bits
+ * +----------------+--------+
+ * + Last decr time | LOG_C |
+ * +----------------+--------+
+ *
+ * LOG_C is a logarithmic counter that provides an indication of the access
+ * frequency. However this field must also be decremented otherwise what used
+ * to be a frequently accessed key in the past, will remain ranked like that
+ * forever, while we want the algorithm to adapt to access pattern changes.
+ *
+ * So the remaining 16 bits are used in order to store the "decrement time",
+ * a reduced-precision Unix time (we take 16 bits of the time converted
+ * in minutes since we don't care about wrapping around) where the LOG_C
+ * counter is halved if it has an high value, or just decremented if it
+ * has a low value.
+ *
+ * New keys don't start at zero, in order to have the ability to collect
+ * some accesses before being trashed away, so they start at COUNTER_INIT_VAL.
+ * The logarithmic increment performed on LOG_C takes care of COUNTER_INIT_VAL
+ * when incrementing the key, so that keys starting at COUNTER_INIT_VAL
+ * (or having a smaller value) have a very high chance of being incremented
+ * on access.
+ *
+ * During decrement, the value of the logarithmic counter is halved if
+ * its current value is greater than two times the COUNTER_INIT_VAL, otherwise
+ * it is just decremented by one.
+ * --------------------------------------------------------------------------*/
+
+/* Return the current time in minutes, just taking the least significant
+ * 16 bits. The returned time is suitable to be stored as LDT (last decrement
+ * time) for the LFU implementation. */
+unsigned long LFUGetTimeInMinutes(void) {
+ return (server.unixtime/60) & 65535;
+}
+
+/* Given an object last decrement time, compute the minimum number of minutes
+ * that elapsed since the last decrement. Handle overflow (ldt greater than
+ * the current 16 bits minutes time) considering the time as wrapping
+ * exactly once. */
+unsigned long LFUTimeElapsed(unsigned long ldt) {
+ unsigned long now = LFUGetTimeInMinutes();
+ if (now >= ldt) return now-ldt;
+ return 65535-ldt+now;
+}
+
+/* Logarithmically increment a counter. The greater is the current counter value
+ * the less likely is that it gets really implemented. Saturate it at 255. */
+uint8_t LFULogIncr(uint8_t counter) {
+ if (counter == 255) return 255;
+ double r = (double)rand()/RAND_MAX;
+ double baseval = counter - LFU_INIT_VAL;
+ if (baseval < 0) baseval = 0;
+ double p = 1.0/(baseval*server.lfu_log_factor+1);
+ if (r < p) counter++;
+ return counter;
+}
+
+/* If the object decrement time is reached, decrement the LFU counter and
+ * update the decrement time field. Return the object frequency counter.
+ *
+ * This function is used in order to scan the dataset for the best object
+ * to fit: as we check for the candidate, we incrementally decrement the
+ * counter of the scanned objects if needed. */
+#define LFU_DECR_INTERVAL 1
+unsigned long LFUDecrAndReturn(robj *o) {
+ unsigned long ldt = o->lru >> 8;
+ unsigned long counter = o->lru & 255;
+ if (LFUTimeElapsed(ldt) >= server.lfu_decay_time && counter) {
+ if (counter > LFU_INIT_VAL*2) {
+ counter /= 2;
+ if (counter < LFU_INIT_VAL*2) counter = LFU_INIT_VAL*2;
+ } else {
+ counter--;
+ }
+ o->lru = (LFUGetTimeInMinutes()<<8) | counter;
+ }
+ return counter;
+}
+
+/* ----------------------------------------------------------------------------
+ * The external API for eviction: freeMemroyIfNeeded() is called by the
+ * server when there is data to add in order to make space if needed.
+ * --------------------------------------------------------------------------*/
+
+/* We don't want to count AOF buffers and slaves output buffers as
+ * used memory: the eviction should use mostly data size. This function
+ * returns the sum of AOF and slaves buffer. */
+size_t freeMemoryGetNotCountedMemory(void) {
+ size_t overhead = 0;
+ int slaves = listLength(server.slaves);
+
+ if (slaves) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = listNodeValue(ln);
+ overhead += getClientOutputBufferMemoryUsage(slave);
+ }
+ }
+ if (server.aof_state != AOF_OFF) {
+ overhead += sdslen(server.aof_buf)+aofRewriteBufferSize();
+ }
+ return overhead;
+}
+
+int freeMemoryIfNeeded(void) {
+ size_t mem_reported, mem_used, mem_tofree, mem_freed;
+ mstime_t latency, eviction_latency;
+ long long delta;
+ int slaves = listLength(server.slaves);
+
+ /* When clients are paused the dataset should be static not just from the
+ * POV of clients not being able to write, but also from the POV of
+ * expires and evictions of keys not being performed. */
+ if (clientsArePaused()) return C_OK;
+
+ /* Check if we are over the memory usage limit. If we are not, no need
+ * to subtract the slaves output buffers. We can just return ASAP. */
+ mem_reported = zmalloc_used_memory();
+ if (mem_reported <= server.maxmemory) return C_OK;
+
+ /* Remove the size of slaves output buffers and AOF buffer from the
+ * count of used memory. */
+ mem_used = mem_reported;
+ size_t overhead = freeMemoryGetNotCountedMemory();
+ mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
+
+ /* Check if we are still over the memory limit. */
+ if (mem_used <= server.maxmemory) return C_OK;
+
+ /* Compute how much memory we need to free. */
+ mem_tofree = mem_used - server.maxmemory;
+ mem_freed = 0;
+
+ if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
+ goto cant_free; /* We need to free memory, but policy forbids. */
+
+ latencyStartMonitor(latency);
+ while (mem_freed < mem_tofree) {
+ int j, k, i, keys_freed = 0;
+ static int next_db = 0;
+ sds bestkey = NULL;
+ int bestdbid;
+ redisDb *db;
+ dict *dict;
+ dictEntry *de;
+
+ if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
+ {
+ struct evictionPoolEntry *pool = EvictionPoolLRU;
+
+ while(bestkey == NULL) {
+ unsigned long total_keys = 0, keys;
+
+ /* We don't want to make local-db choices when expiring keys,
+ * so to start populate the eviction pool sampling keys from
+ * every DB. */
+ for (i = 0; i < server.dbnum; i++) {
+ db = server.db+i;
+ dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
+ db->dict : db->expires;
+ if ((keys = dictSize(dict)) != 0) {
+ evictionPoolPopulate(i, dict, db->dict, pool);
+ total_keys += keys;
+ }
+ }
+ if (!total_keys) break; /* No keys to evict. */
+
+ /* Go backward from best to worst element to evict. */
+ for (k = EVPOOL_SIZE-1; k >= 0; k--) {
+ if (pool[k].key == NULL) continue;
+ bestdbid = pool[k].dbid;
+
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
+ de = dictFind(server.db[pool[k].dbid].dict,
+ pool[k].key);
+ } else {
+ de = dictFind(server.db[pool[k].dbid].expires,
+ pool[k].key);
+ }
+
+ /* Remove the entry from the pool. */
+ if (pool[k].key != pool[k].cached)
+ sdsfree(pool[k].key);
+ pool[k].key = NULL;
+ pool[k].idle = 0;
+
+ /* If the key exists, is our pick. Otherwise it is
+ * a ghost and we need to try the next element. */
+ if (de) {
+ bestkey = dictGetKey(de);
+ break;
+ } else {
+ /* Ghost... Iterate again. */
+ }
+ }
+ }
+ }
+
+ /* volatile-random and allkeys-random policy */
+ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
+ {
+ /* When evicting a random key, we try to evict a key for
+ * each DB, so we use the static 'next_db' variable to
+ * incrementally visit all DBs. */
+ for (i = 0; i < server.dbnum; i++) {
+ j = (++next_db) % server.dbnum;
+ db = server.db+j;
+ dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
+ db->dict : db->expires;
+ if (dictSize(dict) != 0) {
+ de = dictGetRandomKey(dict);
+ bestkey = dictGetKey(de);
+ bestdbid = j;
+ break;
+ }
+ }
+ }
+
+ /* Finally remove the selected key. */
+ if (bestkey) {
+ db = server.db+bestdbid;
+ robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
+ propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
+ /* We compute the amount of memory freed by db*Delete() alone.
+ * It is possible that actually the memory needed to propagate
+ * the DEL in AOF and replication link is greater than the one
+ * we are freeing removing the key, but we can't account for
+ * that otherwise we would never exit the loop.
+ *
+ * AOF and Output buffer memory will be freed eventually so
+ * we only care about memory used by the key space. */
+ delta = (long long) zmalloc_used_memory();
+ latencyStartMonitor(eviction_latency);
+ if (server.lazyfree_lazy_eviction)
+ dbAsyncDelete(db,keyobj);
+ else
+ dbSyncDelete(db,keyobj);
+ latencyEndMonitor(eviction_latency);
+ latencyAddSampleIfNeeded("eviction-del",eviction_latency);
+ latencyRemoveNestedEvent(latency,eviction_latency);
+ delta -= (long long) zmalloc_used_memory();
+ mem_freed += delta;
+ server.stat_evictedkeys++;
+ notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
+ keyobj, db->id);
+ decrRefCount(keyobj);
+ keys_freed++;
+
+ /* When the memory to free starts to be big enough, we may
+ * start spending so much time here that is impossible to
+ * deliver data to the slaves fast enough, so we force the
+ * transmission here inside the loop. */
+ if (slaves) flushSlavesOutputBuffers();
+
+ /* Normally our stop condition is the ability to release
+ * a fixed, pre-computed amount of memory. However when we
+ * are deleting objects in another thread, it's better to
+ * check, from time to time, if we already reached our target
+ * memory, since the "mem_freed" amount is computed only
+ * across the dbAsyncDelete() call, while the thread can
+ * release the memory all the time. */
+ if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
+ overhead = freeMemoryGetNotCountedMemory();
+ mem_used = zmalloc_used_memory();
+ mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
+ if (mem_used <= server.maxmemory) {
+ mem_freed = mem_tofree;
+ }
+ }
+ }
+
+ if (!keys_freed) {
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ goto cant_free; /* nothing to free... */
+ }
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ return C_OK;
+
+cant_free:
+ /* We are here if we are not able to reclaim memory. There is only one
+ * last thing we can try: check if the lazyfree thread has jobs in queue
+ * and wait... */
+ while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
+ if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
+ break;
+ usleep(1000);
+ }
+ return C_ERR;
+}
+
diff --git a/src/expire.c b/src/expire.c
new file mode 100644
index 000000000..a02fe566a
--- /dev/null
+++ b/src/expire.c
@@ -0,0 +1,504 @@
+/* Implementation of EXPIRE (keys with fixed time to live).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/*-----------------------------------------------------------------------------
+ * Incremental collection of expired keys.
+ *
+ * When keys are accessed they are expired on-access. However we need a
+ * mechanism in order to ensure keys are eventually removed when expired even
+ * if no access is performed on them.
+ *----------------------------------------------------------------------------*/
+
+/* Helper function for the activeExpireCycle() function.
+ * This function will try to expire the key that is stored in the hash table
+ * entry 'de' of the 'expires' hash table of a Redis database.
+ *
+ * If the key is found to be expired, it is removed from the database and
+ * 1 is returned. Otherwise no operation is performed and 0 is returned.
+ *
+ * When a key is expired, server.stat_expiredkeys is incremented.
+ *
+ * The parameter 'now' is the current time in milliseconds as is passed
+ * to the function to avoid too many gettimeofday() syscalls. */
+int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
+ long long t = dictGetSignedIntegerVal(de);
+ if (now > t) {
+ sds key = dictGetKey(de);
+ robj *keyobj = createStringObject(key,sdslen(key));
+
+ propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
+ if (server.lazyfree_lazy_expire)
+ dbAsyncDelete(db,keyobj);
+ else
+ dbSyncDelete(db,keyobj);
+ notifyKeyspaceEvent(NOTIFY_EXPIRED,
+ "expired",keyobj,db->id);
+ decrRefCount(keyobj);
+ server.stat_expiredkeys++;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Try to expire a few timed out keys. The algorithm used is adaptive and
+ * will use few CPU cycles if there are few expiring keys, otherwise
+ * it will get more aggressive to avoid that too much memory is used by
+ * keys that can be removed from the keyspace.
+ *
+ * No more than CRON_DBS_PER_CALL databases are tested at every
+ * iteration.
+ *
+ * This kind of call is used when Redis detects that timelimit_exit is
+ * true, so there is more work to do, and we do it more incrementally from
+ * the beforeSleep() function of the event loop.
+ *
+ * Expire cycle type:
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
+ * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
+ * microseconds, and is not repeated again before the same amount of time.
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
+ * executed, where the time limit is a percentage of the REDIS_HZ period
+ * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */
+
+void activeExpireCycle(int type) {
+ /* This function has some global state in order to continue the work
+ * incrementally across calls. */
+ static unsigned int current_db = 0; /* Last DB tested. */
+ static int timelimit_exit = 0; /* Time limit hit in previous call? */
+ static long long last_fast_cycle = 0; /* When last fast cycle ran. */
+
+ int j, iteration = 0;
+ int dbs_per_call = CRON_DBS_PER_CALL;
+ long long start = ustime(), timelimit;
+
+ /* When clients are paused the dataset should be static not just from the
+ * POV of clients not being able to write, but also from the POV of
+ * expires and evictions of keys not being performed. */
+ if (clientsArePaused()) return;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
+ /* Don't start a fast cycle if the previous cycle did not exited
+ * for time limt. Also don't repeat a fast cycle for the same period
+ * as the fast cycle total duration itself. */
+ if (!timelimit_exit) return;
+ if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
+ last_fast_cycle = start;
+ }
+
+ /* We usually should test CRON_DBS_PER_CALL per iteration, with
+ * two exceptions:
+ *
+ * 1) Don't test more DBs than we have.
+ * 2) If last time we hit the time limit, we want to scan all DBs
+ * in this iteration, as there is work to do in some DB and we don't want
+ * expired keys to use memory for too much time. */
+ if (dbs_per_call > server.dbnum || timelimit_exit)
+ dbs_per_call = server.dbnum;
+
+ /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
+ * per iteration. Since this function gets called with a frequency of
+ * server.hz times per second, the following is the max amount of
+ * microseconds we can spend in this function. */
+ timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
+ timelimit_exit = 0;
+ if (timelimit <= 0) timelimit = 1;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST)
+ timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
+
+ for (j = 0; j < dbs_per_call; j++) {
+ int expired;
+ redisDb *db = server.db+(current_db % server.dbnum);
+
+ /* Increment the DB now so we are sure if we run out of time
+ * in the current DB we'll restart from the next. This allows to
+ * distribute the time evenly across DBs. */
+ current_db++;
+
+ /* Continue to expire if at the end of the cycle more than 25%
+ * of the keys were expired. */
+ do {
+ unsigned long num, slots;
+ long long now, ttl_sum;
+ int ttl_samples;
+
+ /* If there is nothing to expire try next DB ASAP. */
+ if ((num = dictSize(db->expires)) == 0) {
+ db->avg_ttl = 0;
+ break;
+ }
+ slots = dictSlots(db->expires);
+ now = mstime();
+
+ /* When there are less than 1% filled slots getting random
+ * keys is expensive, so stop here waiting for better times...
+ * The dictionary will be resized asap. */
+ if (num && slots > DICT_HT_INITIAL_SIZE &&
+ (num*100/slots < 1)) break;
+
+ /* The main collection cycle. Sample random keys among keys
+ * with an expire set, checking for expired ones. */
+ expired = 0;
+ ttl_sum = 0;
+ ttl_samples = 0;
+
+ if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
+ num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
+
+ while (num--) {
+ dictEntry *de;
+ long long ttl;
+
+ if ((de = dictGetRandomKey(db->expires)) == NULL) break;
+ ttl = dictGetSignedIntegerVal(de)-now;
+ if (activeExpireCycleTryExpire(db,de,now)) expired++;
+ if (ttl > 0) {
+ /* We want the average TTL of keys yet not expired. */
+ ttl_sum += ttl;
+ ttl_samples++;
+ }
+ }
+
+ /* Update the average TTL stats for this database. */
+ if (ttl_samples) {
+ long long avg_ttl = ttl_sum/ttl_samples;
+
+ /* Do a simple running average with a few samples.
+ * We just use the current estimate with a weight of 2%
+ * and the previous estimate with a weight of 98%. */
+ if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
+ db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
+ }
+
+ /* We can't block forever here even if there are many keys to
+ * expire. So after a given amount of milliseconds return to the
+ * caller waiting for the other active expire cycle. */
+ iteration++;
+ if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
+ long long elapsed = ustime()-start;
+
+ latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
+ if (elapsed > timelimit) timelimit_exit = 1;
+ }
+ if (timelimit_exit) return;
+ /* We don't repeat the cycle if there are less than 25% of keys
+ * found expired in the current DB. */
+ } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires of keys created in writable slaves
+ *
+ * Normally slaves do not process expires: they wait the masters to synthesize
+ * DEL operations in order to retain consistency. However writable slaves are
+ * an exception: if a key is created in the slave and an expire is assigned
+ * to it, we need a way to expire such a key, since the master does not know
+ * anything about such a key.
+ *
+ * In order to do so, we track keys created in the slave side with an expire
+ * set, and call the expireSlaveKeys() function from time to time in order to
+ * reclaim the keys if they already expired.
+ *
+ * Note that the use case we are trying to cover here, is a popular one where
+ * slaves are put in writable mode in order to compute slow operations in
+ * the slave side that are mostly useful to actually read data in a more
+ * processed way. Think at sets intersections in a tmp key, with an expire so
+ * that it is also used as a cache to avoid intersecting every time.
+ *
+ * This implementation is currently not perfect but a lot better than leaking
+ * the keys as implemented in 3.2.
+ *----------------------------------------------------------------------------*/
+
+/* The dictionary where we remember key names and database ID of keys we may
+ * want to expire from the slave. Since this function is not often used we
+ * don't even care to initialize the database at startup. We'll do it once
+ * the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
+ * is called.
+ *
+ * The dictionary has an SDS string representing the key as the hash table
+ * key, while the value is a 64 bit unsigned integer with the bits corresponding
+ * to the DB where the keys may exist set to 1. Currently the keys created
+ * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
+ * to the max 64 bit unsigned value when we know there is a key with a DB
+ * ID greater than 63, and check all the configured DBs in such a case. */
+dict *slaveKeysWithExpire = NULL;
+
+/* Check the set of keys created by the master with an expire set in order to
+ * check if they should be evicted. */
+void expireSlaveKeys(void) {
+ if (slaveKeysWithExpire == NULL ||
+ dictSize(slaveKeysWithExpire) == 0) return;
+
+ int cycles = 0, noexpire = 0;
+ mstime_t start = mstime();
+ while(1) {
+ dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
+ sds keyname = dictGetKey(de);
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ uint64_t new_dbids = 0;
+
+ /* Check the key against every database corresponding to the
+ * bits set in the value bitmap. */
+ int dbid = 0;
+ while(dbids && dbid < server.dbnum) {
+ if ((dbids & 1) != 0) {
+ redisDb *db = server.db+dbid;
+ dictEntry *expire = dictFind(db->expires,keyname);
+ int expired = 0;
+
+ if (expire &&
+ activeExpireCycleTryExpire(server.db+dbid,expire,start))
+ {
+ expired = 1;
+ }
+
+ /* If the key was not expired in this DB, we need to set the
+ * corresponding bit in the new bitmap we set as value.
+ * At the end of the loop if the bitmap is zero, it means we
+ * no longer need to keep track of this key. */
+ if (expire && !expired) {
+ noexpire++;
+ new_dbids |= (uint64_t)1 << dbid;
+ }
+ }
+ dbid++;
+ dbids >>= 1;
+ }
+
+ /* Set the new bitmap as value of the key, in the dictionary
+ * of keys with an expire set directly in the writable slave. Otherwise
+ * if the bitmap is zero, we no longer need to keep track of it. */
+ if (new_dbids)
+ dictSetUnsignedIntegerVal(de,new_dbids);
+ else
+ dictDelete(slaveKeysWithExpire,keyname);
+
+ /* Stop conditions: found 3 keys we cna't expire in a row or
+ * time limit was reached. */
+ cycles++;
+ if (noexpire > 3) break;
+ if ((cycles % 64) == 0 && mstime()-start > 1) break;
+ if (dictSize(slaveKeysWithExpire) == 0) break;
+ }
+}
+
+/* Track keys that received an EXPIRE or similar command in the context
+ * of a writable slave. */
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
+ if (slaveKeysWithExpire == NULL) {
+ static dictType dt = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
+ };
+ slaveKeysWithExpire = dictCreate(&dt,NULL);
+ }
+ if (db->id > 63) return;
+
+ dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
+ /* If the entry was just created, set it to a copy of the SDS string
+ * representing the key: we don't want to need to take those keys
+ * in sync with the main DB. The keys will be removed by expireSlaveKeys()
+ * as it scans to find keys to remove. */
+ if (de->key == key->ptr) {
+ de->key = sdsdup(key->ptr);
+ dictSetUnsignedIntegerVal(de,0);
+ }
+
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ dbids |= (uint64_t)1 << db->id;
+ dictSetUnsignedIntegerVal(de,dbids);
+}
+
+/* Return the number of keys we are tracking. */
+size_t getSlaveKeyWithExpireCount(void) {
+ if (slaveKeysWithExpire == NULL) return 0;
+ return dictSize(slaveKeysWithExpire);
+}
+
+/* Remove the keys in the hash table. We need to do that when data is
+ * flushed from the server. We may receive new keys from the master with
+ * the same name/db and it is no longer a good idea to expire them.
+ *
+ * Note: technically we should handle the case of a single DB being flushed
+ * but it is not worth it since anyway race conditions using the same set
+ * of key names in a wriatable slave and in its master will lead to
+ * inconsistencies. This is just a best-effort thing we do. */
+void flushSlaveKeysWithExpireList(void) {
+ if (slaveKeysWithExpire) {
+ dictRelease(slaveKeysWithExpire);
+ slaveKeysWithExpire = NULL;
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires Commands
+ *----------------------------------------------------------------------------*/
+
+/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
+ * and PEXPIREAT. Because the commad second argument may be relative or absolute
+ * the "basetime" argument is used to signal what the base time is (either 0
+ * for *AT variants of the command, or the current time for relative expires).
+ *
+ * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
+ * the argv[2] parameter. The basetime is always specified in milliseconds. */
+void expireGenericCommand(client *c, long long basetime, int unit) {
+ robj *key = c->argv[1], *param = c->argv[2];
+ long long when; /* unix time in milliseconds when the key will expire. */
+
+ if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
+ return;
+
+ if (unit == UNIT_SECONDS) when *= 1000;
+ when += basetime;
+
+ /* No key, return zero. */
+ if (lookupKeyWrite(c->db,key) == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
+ * should never be executed as a DEL when load the AOF or in the context
+ * of a slave instance.
+ *
+ * Instead we take the other branch of the IF statement setting an expire
+ * (possibly in the past) and wait for an explicit DEL from the master. */
+ if (when <= mstime() && !server.loading && !server.masterhost) {
+ robj *aux;
+
+ int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
+ dbSyncDelete(c->db,key);
+ serverAssertWithInfo(c,key,deleted);
+ server.dirty++;
+
+ /* Replicate/AOF this as an explicit DEL or UNLINK. */
+ aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,key);
+ signalModifiedKey(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ addReply(c, shared.cone);
+ return;
+ } else {
+ setExpire(c,c->db,key,when);
+ addReply(c,shared.cone);
+ signalModifiedKey(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
+ server.dirty++;
+ return;
+ }
+}
+
+/* EXPIRE key seconds */
+void expireCommand(client *c) {
+ expireGenericCommand(c,mstime(),UNIT_SECONDS);
+}
+
+/* EXPIREAT key time */
+void expireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_SECONDS);
+}
+
+/* PEXPIRE key milliseconds */
+void pexpireCommand(client *c) {
+ expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
+}
+
+/* PEXPIREAT key ms_time */
+void pexpireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_MILLISECONDS);
+}
+
+/* Implements TTL and PTTL */
+void ttlGenericCommand(client *c, int output_ms) {
+ long long expire, ttl = -1;
+
+ /* If the key does not exist at all, return -2 */
+ if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
+ addReplyLongLong(c,-2);
+ return;
+ }
+ /* The key exists. Return -1 if it has no expire, or the actual
+ * TTL value otherwise. */
+ expire = getExpire(c->db,c->argv[1]);
+ if (expire != -1) {
+ ttl = expire-mstime();
+ if (ttl < 0) ttl = 0;
+ }
+ if (ttl == -1) {
+ addReplyLongLong(c,-1);
+ } else {
+ addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
+ }
+}
+
+/* TTL key */
+void ttlCommand(client *c) {
+ ttlGenericCommand(c, 0);
+}
+
+/* PTTL key */
+void pttlCommand(client *c) {
+ ttlGenericCommand(c, 1);
+}
+
+/* PERSIST key */
+void persistCommand(client *c) {
+ if (lookupKeyWrite(c->db,c->argv[1])) {
+ if (removeExpire(c->db,c->argv[1])) {
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+/* TOUCH key1 [key2 key3 ... keyN] */
+void touchCommand(client *c) {
+ int touched = 0;
+ for (int j = 1; j < c->argc; j++)
+ if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++;
+ addReplyLongLong(c,touched);
+}
+
diff --git a/src/fmacros.h b/src/fmacros.h
index 44e378a68..6e56c759d 100644
--- a/src/fmacros.h
+++ b/src/fmacros.h
@@ -34,6 +34,11 @@
#if defined(__linux__)
#define _GNU_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#if defined(_AIX)
+#define _ALL_SOURCE
#endif
#if defined(__linux__) || defined(__OpenBSD__)
diff --git a/src/geo.c b/src/geo.c
new file mode 100644
index 000000000..90216e7dd
--- /dev/null
+++ b/src/geo.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "geo.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+
+/* Things exported from t_zset.c only for geo.c, since it is the only other
+ * part of Redis that requires close zset introspection. */
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+int zslValueLteMax(double value, zrangespec *spec);
+
+/* ====================================================================
+ * This file implements the following commands:
+ *
+ * - geoadd - add coordinates for value to geoset
+ * - georadius - search radius by coordinates in geoset
+ * - georadiusbymember - search radius based on geoset member position
+ * ==================================================================== */
+
+/* ====================================================================
+ * geoArray implementation
+ * ==================================================================== */
+
+/* Create a new array of geoPoints. */
+geoArray *geoArrayCreate(void) {
+ geoArray *ga = zmalloc(sizeof(*ga));
+ /* It gets allocated on first geoArrayAppend() call. */
+ ga->array = NULL;
+ ga->buckets = 0;
+ ga->used = 0;
+ return ga;
+}
+
+/* Add a new entry and return its pointer so that the caller can populate
+ * it with data. */
+geoPoint *geoArrayAppend(geoArray *ga) {
+ if (ga->used == ga->buckets) {
+ ga->buckets = (ga->buckets == 0) ? 8 : ga->buckets*2;
+ ga->array = zrealloc(ga->array,sizeof(geoPoint)*ga->buckets);
+ }
+ geoPoint *gp = ga->array+ga->used;
+ ga->used++;
+ return gp;
+}
+
+/* Destroy a geoArray created with geoArrayCreate(). */
+void geoArrayFree(geoArray *ga) {
+ size_t i;
+ for (i = 0; i < ga->used; i++) sdsfree(ga->array[i].member);
+ zfree(ga->array);
+ zfree(ga);
+}
+
+/* ====================================================================
+ * Helpers
+ * ==================================================================== */
+int decodeGeohash(double bits, double *xy) {
+ GeoHashBits hash = { .bits = (uint64_t)bits, .step = GEO_STEP_MAX };
+ return geohashDecodeToLongLatWGS84(hash, xy);
+}
+
+/* Input Argument Helper */
+/* Take a pointer to the latitude arg then use the next arg for longitude.
+ * On parse error C_ERR is returned, otherwise C_OK. */
+int extractLongLatOrReply(client *c, robj **argv, double *xy) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ if (getDoubleFromObjectOrReply(c, argv[i], xy + i, NULL) !=
+ C_OK) {
+ return C_ERR;
+ }
+ }
+ if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX ||
+ xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) {
+ addReplySds(c, sdscatprintf(sdsempty(),
+ "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]));
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Input Argument Helper */
+/* Decode lat/long from a zset member's score.
+ * Returns C_OK on successful decoding, otherwise C_ERR is returned. */
+int longLatFromMember(robj *zobj, robj *member, double *xy) {
+ double score = 0;
+
+ if (zsetScore(zobj, member->ptr, &score) == C_ERR) return C_ERR;
+ if (!decodeGeohash(score, xy)) return C_ERR;
+ return C_OK;
+}
+
+/* Check that the unit argument matches one of the known units, and returns
+ * the conversion factor to meters (you need to divide meters by the conversion
+ * factor to convert to the right unit).
+ *
+ * If the unit is not valid, an error is reported to the client, and a value
+ * less than zero is returned. */
+double extractUnitOrReply(client *c, robj *unit) {
+ char *u = unit->ptr;
+
+ if (!strcmp(u, "m")) {
+ return 1;
+ } else if (!strcmp(u, "km")) {
+ return 1000;
+ } else if (!strcmp(u, "ft")) {
+ return 0.3048;
+ } else if (!strcmp(u, "mi")) {
+ return 1609.34;
+ } else {
+ addReplyError(c,
+ "unsupported unit provided. please use m, km, ft, mi");
+ return -1;
+ }
+}
+
+/* Input Argument Helper.
+ * Extract the dinstance from the specified two arguments starting at 'argv'
+ * that shouldbe in the form: <number> <unit> and return the dinstance in the
+ * specified unit on success. *conversino is populated with the coefficient
+ * to use in order to convert meters to the unit.
+ *
+ * On error a value less than zero is returned. */
+double extractDistanceOrReply(client *c, robj **argv,
+ double *conversion) {
+ double distance;
+ if (getDoubleFromObjectOrReply(c, argv[0], &distance,
+ "need numeric radius") != C_OK) {
+ return -1;
+ }
+
+ if (distance < 0) {
+ addReplyError(c,"radius cannot be negative");
+ return -1;
+ }
+
+ double to_meters = extractUnitOrReply(c,argv[1]);
+ if (to_meters < 0) {
+ return -1;
+ }
+
+ if (conversion) *conversion = to_meters;
+ return distance * to_meters;
+}
+
+/* The default addReplyDouble has too much accuracy. We use this
+ * for returning location distances. "5.2145 meters away" is nicer
+ * than "5.2144992818115 meters away." We provide 4 digits after the dot
+ * so that the returned value is decently accurate even when the unit is
+ * the kilometer. */
+void addReplyDoubleDistance(client *c, double d) {
+ char dbuf[128];
+ int dlen = snprintf(dbuf, sizeof(dbuf), "%.4f", d);
+ addReplyBulkCBuffer(c, dbuf, dlen);
+}
+
+/* Helper function for geoGetPointsInRange(): given a sorted set score
+ * representing a point, and another point (the center of our search) and
+ * a radius, appends this entry as a geoPoint into the specified geoArray
+ * only if the point is within the search area.
+ *
+ * returns C_OK if the point is included, or REIDS_ERR if it is outside. */
+int geoAppendIfWithinRadius(geoArray *ga, double lon, double lat, double radius, double score, sds member) {
+ double distance, xy[2];
+
+ if (!decodeGeohash(score,xy)) return C_ERR; /* Can't decode. */
+ /* Note that geohashGetDistanceIfInRadiusWGS84() takes arguments in
+ * reverse order: longitude first, latitude later. */
+ if (!geohashGetDistanceIfInRadiusWGS84(lon,lat, xy[0], xy[1],
+ radius, &distance))
+ {
+ return C_ERR;
+ }
+
+ /* Append the new element. */
+ geoPoint *gp = geoArrayAppend(ga);
+ gp->longitude = xy[0];
+ gp->latitude = xy[1];
+ gp->dist = distance;
+ gp->member = member;
+ gp->score = score;
+ return C_OK;
+}
+
+/* Query a Redis sorted set to extract all the elements between 'min' and
+ * 'max', appending them into the array of geoPoint structures 'gparray'.
+ * The command returns the number of elements added to the array.
+ *
+ * Elements which are farest than 'radius' from the specified 'x' and 'y'
+ * coordinates are not included.
+ *
+ * The ability of this function to append to an existing set of points is
+ * important for good performances because querying by radius is performed
+ * using multiple queries to the sorted set, that we later need to sort
+ * via qsort. Similarly we need to be able to reject points outside the search
+ * radius area ASAP in order to allocate and process more points than needed. */
+int geoGetPointsInRange(robj *zobj, double min, double max, double lon, double lat, double radius, geoArray *ga) {
+ /* minex 0 = include min in range; maxex 1 = exclude max in range */
+ /* That's: min <= val < max */
+ zrangespec range = { .min = min, .max = max, .minex = 0, .maxex = 1 };
+ size_t origincount = ga->used;
+ sds member;
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr = NULL;
+ unsigned int vlen = 0;
+ long long vlong = 0;
+ double score = 0;
+
+ if ((eptr = zzlFirstInRange(zl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ sptr = ziplistNext(zl, eptr);
+ while (eptr) {
+ score = zzlGetScore(sptr);
+
+ /* If we fell out of range, break. */
+ if (!zslValueLteMax(score, &range))
+ break;
+
+ /* We know the element exists. ziplistGet should always succeed */
+ ziplistGet(eptr, &vstr, &vlen, &vlong);
+ member = (vstr == NULL) ? sdsfromlonglong(vlong) :
+ sdsnewlen(vstr,vlen);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,score,member)
+ == C_ERR) sdsfree(member);
+ zzlNext(zl, &eptr, &sptr);
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ if ((ln = zslFirstInRange(zsl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ while (ln) {
+ sds ele = ln->ele;
+ /* Abort when the node is no longer in range. */
+ if (!zslValueLteMax(ln->score, &range))
+ break;
+
+ ele = sdsdup(ele);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,ln->score,ele)
+ == C_ERR) sdsfree(ele);
+ ln = ln->level[0].forward;
+ }
+ }
+ return ga->used - origincount;
+}
+
+/* Compute the sorted set scores min (inclusive), max (exclusive) we should
+ * query in order to retrieve all the elements inside the specified area
+ * 'hash'. The two scores are returned by reference in *min and *max. */
+void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {
+ /* We want to compute the sorted set scores that will include all the
+ * elements inside the specified Geohash 'hash', which has as many
+ * bits as specified by hash.step * 2.
+ *
+ * So if step is, for example, 3, and the hash value in binary
+ * is 101010, since our score is 52 bits we want every element which
+ * is in binary: 101010?????????????????????????????????????????????
+ * Where ? can be 0 or 1.
+ *
+ * To get the min score we just use the initial hash value left
+ * shifted enough to get the 52 bit value. Later we increment the
+ * 6 bit prefis (see the hash.bits++ statement), and get the new
+ * prefix: 101011, which we align again to 52 bits to get the maximum
+ * value (which is excluded from the search). So we get everything
+ * between the two following scores (represented in binary):
+ *
+ * 1010100000000000000000000000000000000000000000000000 (included)
+ * and
+ * 1010110000000000000000000000000000000000000000000000 (excluded).
+ */
+ *min = geohashAlign52Bits(hash);
+ hash.bits++;
+ *max = geohashAlign52Bits(hash);
+}
+
+/* Obtain all members between the min/max of this geohash bounding box.
+ * Populate a geoArray of GeoPoints by calling geoGetPointsInRange().
+ * Return the number of points added to the array. */
+int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, double lon, double lat, double radius) {
+ GeoHashFix52Bits min, max;
+
+ scoresOfGeoHashBox(hash,&min,&max);
+ return geoGetPointsInRange(zobj, min, max, lon, lat, radius, ga);
+}
+
+/* Search all eight neighbors + self geohash box */
+int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, double radius, geoArray *ga) {
+ GeoHashBits neighbors[9];
+ unsigned int i, count = 0, last_processed = 0;
+ int debugmsg = 0;
+
+ neighbors[0] = n.hash;
+ neighbors[1] = n.neighbors.north;
+ neighbors[2] = n.neighbors.south;
+ neighbors[3] = n.neighbors.east;
+ neighbors[4] = n.neighbors.west;
+ neighbors[5] = n.neighbors.north_east;
+ neighbors[6] = n.neighbors.north_west;
+ neighbors[7] = n.neighbors.south_east;
+ neighbors[8] = n.neighbors.south_west;
+
+ /* For each neighbor (*and* our own hashbox), get all the matching
+ * members and add them to the potential result list. */
+ for (i = 0; i < sizeof(neighbors) / sizeof(*neighbors); i++) {
+ if (HASHISZERO(neighbors[i])) {
+ if (debugmsg) D("neighbors[%d] is zero",i);
+ continue;
+ }
+
+ /* Debugging info. */
+ if (debugmsg) {
+ GeoHashRange long_range, lat_range;
+ geohashGetCoordRange(&long_range,&lat_range);
+ GeoHashArea myarea = {{0}};
+ geohashDecode(long_range, lat_range, neighbors[i], &myarea);
+
+ /* Dump center square. */
+ D("neighbors[%d]:\n",i);
+ D("area.longitude.min: %f\n", myarea.longitude.min);
+ D("area.longitude.max: %f\n", myarea.longitude.max);
+ D("area.latitude.min: %f\n", myarea.latitude.min);
+ D("area.latitude.max: %f\n", myarea.latitude.max);
+ D("\n");
+ }
+
+ /* When a huge Radius (in the 5000 km range or more) is used,
+ * adjacent neighbors can be the same, leading to duplicated
+ * elements. Skip every range which is the same as the one
+ * processed previously. */
+ if (last_processed &&
+ neighbors[i].bits == neighbors[last_processed].bits &&
+ neighbors[i].step == neighbors[last_processed].step)
+ {
+ if (debugmsg)
+ D("Skipping processing of %d, same as previous\n",i);
+ continue;
+ }
+ count += membersOfGeoHashBox(zobj, neighbors[i], ga, lon, lat, radius);
+ last_processed = i;
+ }
+ return count;
+}
+
+/* Sort comparators for qsort() */
+static int sort_gp_asc(const void *a, const void *b) {
+ const struct geoPoint *gpa = a, *gpb = b;
+ /* We can't do adist - bdist because they are doubles and
+ * the comparator returns an int. */
+ if (gpa->dist > gpb->dist)
+ return 1;
+ else if (gpa->dist == gpb->dist)
+ return 0;
+ else
+ return -1;
+}
+
+static int sort_gp_desc(const void *a, const void *b) {
+ return -sort_gp_asc(a, b);
+}
+
+/* ====================================================================
+ * Commands
+ * ==================================================================== */
+
+/* GEOADD key long lat name [long2 lat2 name2 ... longN latN nameN] */
+void geoaddCommand(client *c) {
+ /* Check arguments number for sanity. */
+ if ((c->argc - 2) % 3 != 0) {
+ /* Need an odd number of arguments if we got this far... */
+ addReplyError(c, "syntax error. Try GEOADD key [x1] [y1] [name1] "
+ "[x2] [y2] [name2] ... ");
+ return;
+ }
+
+ int elements = (c->argc - 2) / 3;
+ int argc = 2+elements*2; /* ZADD key score ele ... */
+ robj **argv = zcalloc(argc*sizeof(robj*));
+ argv[0] = createRawStringObject("zadd",4);
+ argv[1] = c->argv[1]; /* key */
+ incrRefCount(argv[1]);
+
+ /* Create the argument vector to call ZADD in order to add all
+ * the score,value pairs to the requested zset, where score is actually
+ * an encoded version of lat,long. */
+ int i;
+ for (i = 0; i < elements; i++) {
+ double xy[2];
+
+ if (extractLongLatOrReply(c, (c->argv+2)+(i*3),xy) == C_ERR) {
+ for (i = 0; i < argc; i++)
+ if (argv[i]) decrRefCount(argv[i]);
+ zfree(argv);
+ return;
+ }
+
+ /* Turn the coordinates into the score of the element. */
+ GeoHashBits hash;
+ geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
+ GeoHashFix52Bits bits = geohashAlign52Bits(hash);
+ robj *score = createObject(OBJ_STRING, sdsfromlonglong(bits));
+ robj *val = c->argv[2 + i * 3 + 2];
+ argv[2+i*2] = score;
+ argv[3+i*2] = val;
+ incrRefCount(val);
+ }
+
+ /* Finally call ZADD that will do the work for us. */
+ replaceClientCommandVector(c,argc,argv);
+ zaddCommand(c);
+}
+
+#define SORT_NONE 0
+#define SORT_ASC 1
+#define SORT_DESC 2
+
+#define RADIUS_COORDS (1<<0) /* Search around coordinates. */
+#define RADIUS_MEMBER (1<<1) /* Search around member. */
+#define RADIUS_NOSTORE (1<<2) /* Do not acceot STORE/STOREDIST option. */
+
+/* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count] [STORE key] [STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ... */
+void georadiusGeneric(client *c, int flags) {
+ robj *key = c->argv[1];
+ robj *storekey = NULL;
+ int storedist = 0; /* 0 for STORE, 1 for STOREDIST. */
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, key, shared.emptymultibulk)) == NULL ||
+ checkType(c, zobj, OBJ_ZSET)) {
+ return;
+ }
+
+ /* Find long/lat to use for radius search based on inquiry type */
+ int base_args;
+ double xy[2] = { 0 };
+ if (flags & RADIUS_COORDS) {
+ base_args = 6;
+ if (extractLongLatOrReply(c, c->argv + 2, xy) == C_ERR)
+ return;
+ } else if (flags & RADIUS_MEMBER) {
+ base_args = 5;
+ robj *member = c->argv[2];
+ if (longLatFromMember(zobj, member, xy) == C_ERR) {
+ addReplyError(c, "could not decode requested zset member");
+ return;
+ }
+ } else {
+ addReplyError(c, "Unknown georadius search type");
+ return;
+ }
+
+ /* Extract radius and units from arguments */
+ double radius_meters = 0, conversion = 1;
+ if ((radius_meters = extractDistanceOrReply(c, c->argv + base_args - 2,
+ &conversion)) < 0) {
+ return;
+ }
+
+ /* Discover and populate all optional parameters. */
+ int withdist = 0, withhash = 0, withcoords = 0;
+ int sort = SORT_NONE;
+ long long count = 0;
+ if (c->argc > base_args) {
+ int remaining = c->argc - base_args;
+ for (int i = 0; i < remaining; i++) {
+ char *arg = c->argv[base_args + i]->ptr;
+ if (!strcasecmp(arg, "withdist")) {
+ withdist = 1;
+ } else if (!strcasecmp(arg, "withhash")) {
+ withhash = 1;
+ } else if (!strcasecmp(arg, "withcoord")) {
+ withcoords = 1;
+ } else if (!strcasecmp(arg, "asc")) {
+ sort = SORT_ASC;
+ } else if (!strcasecmp(arg, "desc")) {
+ sort = SORT_DESC;
+ } else if (!strcasecmp(arg, "count") && (i+1) < remaining) {
+ if (getLongLongFromObjectOrReply(c, c->argv[base_args+i+1],
+ &count, NULL) != C_OK) return;
+ if (count <= 0) {
+ addReplyError(c,"COUNT must be > 0");
+ return;
+ }
+ i++;
+ } else if (!strcasecmp(arg, "store") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 0;
+ i++;
+ } else if (!strcasecmp(arg, "storedist") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 1;
+ i++;
+ } else {
+ addReply(c, shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* Trap options not compatible with STORE and STOREDIST. */
+ if (storekey && (withdist || withhash || withcoords)) {
+ addReplyError(c,
+ "STORE option in GEORADIUS is not compatible with "
+ "WITHDIST, WITHHASH and WITHCOORDS options");
+ return;
+ }
+
+ /* COUNT without ordering does not make much sense, force ASC
+ * ordering if COUNT was specified but no sorting was requested. */
+ if (count != 0 && sort == SORT_NONE) sort = SORT_ASC;
+
+ /* Get all neighbor geohash boxes for our radius search */
+ GeoHashRadius georadius =
+ geohashGetAreasByRadiusWGS84(xy[0], xy[1], radius_meters);
+
+ /* Search the zset for all matching points */
+ geoArray *ga = geoArrayCreate();
+ membersOfAllNeighbors(zobj, georadius, xy[0], xy[1], radius_meters, ga);
+
+ /* If no matching results, the user gets an empty reply. */
+ if (ga->used == 0 && storekey == NULL) {
+ addReply(c, shared.emptymultibulk);
+ geoArrayFree(ga);
+ return;
+ }
+
+ long result_length = ga->used;
+ long returned_items = (count == 0 || result_length < count) ?
+ result_length : count;
+ long option_length = 0;
+
+ /* Process [optional] requested sorting */
+ if (sort == SORT_ASC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_asc);
+ } else if (sort == SORT_DESC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_desc);
+ }
+
+ if (storekey == NULL) {
+ /* No target key, return results to user. */
+
+ /* Our options are self-contained nested multibulk replies, so we
+ * only need to track how many of those nested replies we return. */
+ if (withdist)
+ option_length++;
+
+ if (withcoords)
+ option_length++;
+
+ if (withhash)
+ option_length++;
+
+ /* The multibulk len we send is exactly result_length. The result is
+ * either all strings of just zset members *or* a nested multi-bulk
+ * reply containing the zset member string _and_ all the additional
+ * options the user enabled for this request. */
+ addReplyMultiBulkLen(c, returned_items);
+
+ /* Finally send results back to the caller */
+ int i;
+ for (i = 0; i < returned_items; i++) {
+ geoPoint *gp = ga->array+i;
+ gp->dist /= conversion; /* Fix according to unit. */
+
+ /* If we have options in option_length, return each sub-result
+ * as a nested multi-bulk. Add 1 to account for result value
+ * itself. */
+ if (option_length)
+ addReplyMultiBulkLen(c, option_length + 1);
+
+ addReplyBulkSds(c,gp->member);
+ gp->member = NULL;
+
+ if (withdist)
+ addReplyDoubleDistance(c, gp->dist);
+
+ if (withhash)
+ addReplyLongLong(c, gp->score);
+
+ if (withcoords) {
+ addReplyMultiBulkLen(c, 2);
+ addReplyHumanLongDouble(c, gp->longitude);
+ addReplyHumanLongDouble(c, gp->latitude);
+ }
+ }
+ } else {
+ /* Target key, create a sorted set with the results. */
+ robj *zobj;
+ zset *zs;
+ int i;
+ size_t maxelelen = 0;
+
+ if (returned_items) {
+ zobj = createZsetObject();
+ zs = zobj->ptr;
+ }
+
+ for (i = 0; i < returned_items; i++) {
+ zskiplistNode *znode;
+ geoPoint *gp = ga->array+i;
+ gp->dist /= conversion; /* Fix according to unit. */
+ double score = storedist ? gp->dist : gp->score;
+ size_t elelen = sdslen(gp->member);
+
+ if (maxelelen < elelen) maxelelen = elelen;
+ znode = zslInsert(zs->zsl,score,gp->member);
+ serverAssert(dictAdd(zs->dict,gp->member,&znode->score) == DICT_OK);
+ gp->member = NULL;
+ }
+
+ if (returned_items) {
+ zsetConvertToZiplistIfNeeded(zobj,maxelelen);
+ setKey(c->db,storekey,zobj);
+ decrRefCount(zobj);
+ notifyKeyspaceEvent(NOTIFY_LIST,"georadiusstore",storekey,
+ c->db->id);
+ server.dirty += returned_items;
+ } else if (dbDelete(c->db,storekey)) {
+ signalModifiedKey(c->db,storekey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
+ server.dirty++;
+ }
+ addReplyLongLong(c, returned_items);
+ }
+ geoArrayFree(ga);
+}
+
+/* GEORADIUS wrapper function. */
+void georadiusCommand(client *c) {
+ georadiusGeneric(c, RADIUS_COORDS);
+}
+
+/* GEORADIUSBYMEMBER wrapper function. */
+void georadiusbymemberCommand(client *c) {
+ georadiusGeneric(c, RADIUS_MEMBER);
+}
+
+/* GEORADIUS_RO wrapper function. */
+void georadiusroCommand(client *c) {
+ georadiusGeneric(c, RADIUS_COORDS|RADIUS_NOSTORE);
+}
+
+/* GEORADIUSBYMEMBER_RO wrapper function. */
+void georadiusbymemberroCommand(client *c) {
+ georadiusGeneric(c, RADIUS_MEMBER|RADIUS_NOSTORE);
+}
+
+/* GEOHASH key ele1 ele2 ... eleN
+ *
+ * Returns an array with an 11 characters geohash representation of the
+ * position of the specified elements. */
+void geohashCommand(client *c) {
+ char *geoalphabet= "0123456789bcdefghjkmnpqrstuvwxyz";
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (zobj && checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Geohash elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReply(c,shared.nullbulk);
+ } else {
+ /* The internal format we use for geocoding is a bit different
+ * than the standard, since we use as initial latitude range
+ * -85,85, while the normal geohashing algorithm uses -90,90.
+ * So we have to decode our position and re-encode using the
+ * standard ranges in order to output a valid geohash string. */
+
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullbulk);
+ continue;
+ }
+
+ /* Re-encode */
+ GeoHashRange r[2];
+ GeoHashBits hash;
+ r[0].min = -180;
+ r[0].max = 180;
+ r[1].min = -90;
+ r[1].max = 90;
+ geohashEncode(&r[0],&r[1],xy[0],xy[1],26,&hash);
+
+ char buf[12];
+ int i;
+ for (i = 0; i < 11; i++) {
+ int idx = (hash.bits >> (52-((i+1)*5))) & 0x1f;
+ buf[i] = geoalphabet[idx];
+ }
+ buf[11] = '\0';
+ addReplyBulkCBuffer(c,buf,11);
+ }
+ }
+}
+
+/* GEOPOS key ele1 ele2 ... eleN
+ *
+ * Returns an array of two-items arrays representing the x,y position of each
+ * element specified in the arguments. For missing elements NULL is returned. */
+void geoposCommand(client *c) {
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (zobj && checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Report elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReply(c,shared.nullmultibulk);
+ } else {
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullmultibulk);
+ continue;
+ }
+ addReplyMultiBulkLen(c,2);
+ addReplyHumanLongDouble(c,xy[0]);
+ addReplyHumanLongDouble(c,xy[1]);
+ }
+ }
+}
+
+/* GEODIST key ele1 ele2 [unit]
+ *
+ * Return the distance, in meters by default, otherwise accordig to "unit",
+ * between points ele1 and ele2. If one or more elements are missing NULL
+ * is returned. */
+void geodistCommand(client *c) {
+ double to_meter = 1;
+
+ /* Check if there is the unit to extract, otherwise assume meters. */
+ if (c->argc == 5) {
+ to_meter = extractUnitOrReply(c,c->argv[4]);
+ if (to_meter < 0) return;
+ } else if (c->argc > 5) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.nullbulk))
+ == NULL || checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Get the scores. We need both otherwise NULL is returned. */
+ double score1, score2, xyxy[4];
+ if (zsetScore(zobj, c->argv[2]->ptr, &score1) == C_ERR ||
+ zsetScore(zobj, c->argv[3]->ptr, &score2) == C_ERR)
+ {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ /* Decode & compute the distance. */
+ if (!decodeGeohash(score1,xyxy) || !decodeGeohash(score2,xyxy+2))
+ addReply(c,shared.nullbulk);
+ else
+ addReplyDoubleDistance(c,
+ geohashGetDistance(xyxy[0],xyxy[1],xyxy[2],xyxy[3]) / to_meter);
+}
diff --git a/src/geo.h b/src/geo.h
new file mode 100644
index 000000000..79d0a6a4a
--- /dev/null
+++ b/src/geo.h
@@ -0,0 +1,22 @@
+#ifndef __GEO_H__
+#define __GEO_H__
+
+#include "server.h"
+
+/* Structures used inside geo.c in order to represent points and array of
+ * points on the earth. */
+typedef struct geoPoint {
+ double longitude;
+ double latitude;
+ double dist;
+ double score;
+ char *member;
+} geoPoint;
+
+typedef struct geoArray {
+ struct geoPoint *array;
+ size_t buckets;
+ size_t used;
+} geoArray;
+
+#endif
diff --git a/src/geohash.c b/src/geohash.c
new file mode 100644
index 000000000..1ae7a7e05
--- /dev/null
+++ b/src/geohash.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "geohash.h"
+
+/**
+ * Hashing works like this:
+ * Divide the world into 4 buckets. Label each one as such:
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,1 | 1,1 |
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,0 | 1,0 |
+ * -----------------
+ */
+
+/* Interleave lower bits of x and y, so the bits of x
+ * are in the even positions and bits from y in the odd;
+ * x and y must initially be less than 2**32 (65536).
+ * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
+ */
+static inline uint64_t interleave64(uint32_t xlo, uint32_t ylo) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL};
+ static const unsigned int S[] = {1, 2, 4, 8, 16};
+
+ uint64_t x = xlo;
+ uint64_t y = ylo;
+
+ x = (x | (x << S[4])) & B[4];
+ y = (y | (y << S[4])) & B[4];
+
+ x = (x | (x << S[3])) & B[3];
+ y = (y | (y << S[3])) & B[3];
+
+ x = (x | (x << S[2])) & B[2];
+ y = (y | (y << S[2])) & B[2];
+
+ x = (x | (x << S[1])) & B[1];
+ y = (y | (y << S[1])) & B[1];
+
+ x = (x | (x << S[0])) & B[0];
+ y = (y | (y << S[0])) & B[0];
+
+ return x | (y << 1);
+}
+
+/* reverse the interleave process
+ * derived from http://stackoverflow.com/questions/4909263
+ */
+static inline uint64_t deinterleave64(uint64_t interleaved) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+ static const unsigned int S[] = {0, 1, 2, 4, 8, 16};
+
+ uint64_t x = interleaved;
+ uint64_t y = interleaved >> 1;
+
+ x = (x | (x >> S[0])) & B[0];
+ y = (y | (y >> S[0])) & B[0];
+
+ x = (x | (x >> S[1])) & B[1];
+ y = (y | (y >> S[1])) & B[1];
+
+ x = (x | (x >> S[2])) & B[2];
+ y = (y | (y >> S[2])) & B[2];
+
+ x = (x | (x >> S[3])) & B[3];
+ y = (y | (y >> S[3])) & B[3];
+
+ x = (x | (x >> S[4])) & B[4];
+ y = (y | (y >> S[4])) & B[4];
+
+ x = (x | (x >> S[5])) & B[5];
+ y = (y | (y >> S[5])) & B[5];
+
+ return x | (y << 32);
+}
+
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {
+ /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+ /* We can't geocode at the north/south pole. */
+ long_range->max = GEO_LONG_MAX;
+ long_range->min = GEO_LONG_MIN;
+ lat_range->max = GEO_LAT_MAX;
+ lat_range->min = GEO_LAT_MIN;
+}
+
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ /* Check basic arguments sanity. */
+ if (hash == NULL || step > 32 || step == 0 ||
+ RANGEPISZERO(lat_range) || RANGEPISZERO(long_range)) return 0;
+
+ /* Return an error when trying to index outside the supported
+ * constraints. */
+ if (longitude > 180 || longitude < -180 ||
+ latitude > 85.05112878 || latitude < -85.05112878) return 0;
+
+ hash->bits = 0;
+ hash->step = step;
+
+ if (latitude < lat_range->min || latitude > lat_range->max ||
+ longitude < long_range->min || longitude > long_range->max) {
+ return 0;
+ }
+
+ double lat_offset =
+ (latitude - lat_range->min) / (lat_range->max - lat_range->min);
+ double long_offset =
+ (longitude - long_range->min) / (long_range->max - long_range->min);
+
+ /* convert to fixed point based on the step size */
+ lat_offset *= (1 << step);
+ long_offset *= (1 << step);
+ hash->bits = interleave64(lat_offset, long_offset);
+ return 1;
+}
+
+int geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash);
+}
+
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ return geohashEncodeType(longitude, latitude, step, hash);
+}
+
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area) {
+ if (HASHISZERO(hash) || NULL == area || RANGEISZERO(lat_range) ||
+ RANGEISZERO(long_range)) {
+ return 0;
+ }
+
+ area->hash = hash;
+ uint8_t step = hash.step;
+ uint64_t hash_sep = deinterleave64(hash.bits); /* hash = [LAT][LONG] */
+
+ double lat_scale = lat_range.max - lat_range.min;
+ double long_scale = long_range.max - long_range.min;
+
+ uint32_t ilato = hash_sep; /* get lat part of deinterleaved hash */
+ uint32_t ilono = hash_sep >> 32; /* shift over to get long part of hash */
+
+ /* divide by 2**step.
+ * Then, for 0-1 coordinate, multiply times scale and add
+ to the min to get the absolute coordinate. */
+ area->latitude.min =
+ lat_range.min + (ilato * 1.0 / (1ull << step)) * lat_scale;
+ area->latitude.max =
+ lat_range.min + ((ilato + 1) * 1.0 / (1ull << step)) * lat_scale;
+ area->longitude.min =
+ long_range.min + (ilono * 1.0 / (1ull << step)) * long_scale;
+ area->longitude.max =
+ long_range.min + ((ilono + 1) * 1.0 / (1ull << step)) * long_scale;
+
+ return 1;
+}
+
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashDecode(r[0], r[1], hash, area);
+}
+
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) {
+ return geohashDecodeType(hash, area);
+}
+
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) {
+ if (!xy) return 0;
+ xy[0] = (area->longitude.min + area->longitude.max) / 2;
+ xy[1] = (area->latitude.min + area->latitude.max) / 2;
+ return 1;
+}
+
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) {
+ GeoHashArea area = {{0}};
+ if (!xy || !geohashDecodeType(hash, &area))
+ return 0;
+ return geohashDecodeAreaToLongLat(&area, xy);
+}
+
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy) {
+ return geohashDecodeToLongLatType(hash, xy);
+}
+
+static void geohash_move_x(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0x5555555555555555ULL >> (64 - hash->step * 2);
+
+ if (d > 0) {
+ x = x + (zz + 1);
+ } else {
+ x = x | zz;
+ x = x - (zz + 1);
+ }
+
+ x &= (0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+static void geohash_move_y(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2);
+ if (d > 0) {
+ y = y + (zz + 1);
+ } else {
+ y = y | zz;
+ y = y - (zz + 1);
+ }
+ y &= (0x5555555555555555ULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors) {
+ neighbors->east = *hash;
+ neighbors->west = *hash;
+ neighbors->north = *hash;
+ neighbors->south = *hash;
+ neighbors->south_east = *hash;
+ neighbors->south_west = *hash;
+ neighbors->north_east = *hash;
+ neighbors->north_west = *hash;
+
+ geohash_move_x(&neighbors->east, 1);
+ geohash_move_y(&neighbors->east, 0);
+
+ geohash_move_x(&neighbors->west, -1);
+ geohash_move_y(&neighbors->west, 0);
+
+ geohash_move_x(&neighbors->south, 0);
+ geohash_move_y(&neighbors->south, -1);
+
+ geohash_move_x(&neighbors->north, 0);
+ geohash_move_y(&neighbors->north, 1);
+
+ geohash_move_x(&neighbors->north_west, -1);
+ geohash_move_y(&neighbors->north_west, 1);
+
+ geohash_move_x(&neighbors->north_east, 1);
+ geohash_move_y(&neighbors->north_east, 1);
+
+ geohash_move_x(&neighbors->south_east, 1);
+ geohash_move_y(&neighbors->south_east, -1);
+
+ geohash_move_x(&neighbors->south_west, -1);
+ geohash_move_y(&neighbors->south_west, -1);
+}
diff --git a/src/geohash.h b/src/geohash.h
new file mode 100644
index 000000000..ed2ef9336
--- /dev/null
+++ b/src/geohash.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_H_
+#define GEOHASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define HASHISZERO(r) (!(r).bits && !(r).step)
+#define RANGEISZERO(r) (!(r).max && !(r).min)
+#define RANGEPISZERO(r) (r == NULL || RANGEISZERO(*r))
+
+#define GEO_STEP_MAX 26 /* 26*2 = 52 bits. */
+
+/* Limits from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+#define GEO_LAT_MIN -85.05112878
+#define GEO_LAT_MAX 85.05112878
+#define GEO_LONG_MIN -180
+#define GEO_LONG_MAX 180
+
+typedef enum {
+ GEOHASH_NORTH = 0,
+ GEOHASH_EAST,
+ GEOHASH_WEST,
+ GEOHASH_SOUTH,
+ GEOHASH_SOUTH_WEST,
+ GEOHASH_SOUTH_EAST,
+ GEOHASH_NORT_WEST,
+ GEOHASH_NORT_EAST
+} GeoDirection;
+
+typedef struct {
+ uint64_t bits;
+ uint8_t step;
+} GeoHashBits;
+
+typedef struct {
+ double min;
+ double max;
+} GeoHashRange;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashRange longitude;
+ GeoHashRange latitude;
+} GeoHashArea;
+
+typedef struct {
+ GeoHashBits north;
+ GeoHashBits east;
+ GeoHashBits west;
+ GeoHashBits south;
+ GeoHashBits north_east;
+ GeoHashBits south_east;
+ GeoHashBits north_west;
+ GeoHashBits south_west;
+} GeoHashNeighbors;
+
+/*
+ * 0:success
+ * -1:failed
+ */
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashEncodeType(double longitude, double latitude,
+ uint8_t step, GeoHashBits *hash);
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy);
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatMercator(const GeoHashBits hash, double *xy);
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* GEOHASH_H_ */
diff --git a/src/geohash_helper.c b/src/geohash_helper.c
new file mode 100644
index 000000000..e23f17b4e
--- /dev/null
+++ b/src/geohash_helper.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a C++ to C conversion from the ardb project.
+ * This file started out as:
+ * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp
+ */
+
+#include "fmacros.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+#include <math.h>
+
+#define D_R (M_PI / 180.0)
+#define R_MAJOR 6378137.0
+#define R_MINOR 6356752.3142
+#define RATIO (R_MINOR / R_MAJOR)
+#define ECCENT (sqrt(1.0 - (RATIO *RATIO)))
+#define COM (0.5 * ECCENT)
+
+/// @brief The usual PI/180 constant
+const double DEG_TO_RAD = 0.017453292519943295769236907684886;
+/// @brief Earth's quatratic mean radius for WGS-84
+const double EARTH_RADIUS_IN_METERS = 6372797.560856;
+
+const double MERCATOR_MAX = 20037726.37;
+const double MERCATOR_MIN = -20037726.37;
+
+static inline double deg_rad(double ang) { return ang * D_R; }
+static inline double rad_deg(double ang) { return ang / D_R; }
+
+/* This function is used in order to estimate the step (bits precision)
+ * of the 9 search area boxes during radius queries. */
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
+ if (range_meters == 0) return 26;
+ int step = 1;
+ while (range_meters < MERCATOR_MAX) {
+ range_meters *= 2;
+ step++;
+ }
+ step -= 2; /* Make sure range is included in most of the base cases. */
+
+ /* Wider range torwards the poles... Note: it is possible to do better
+ * than this approximation by computing the distance between meridians
+ * at this latitude, but this does the trick for now. */
+ if (lat > 66 || lat < -66) {
+ step--;
+ if (lat > 80 || lat < -80) step--;
+ }
+
+ /* Frame to valid range. */
+ if (step < 1) step = 1;
+ if (step > 26) step = 26;
+ return step;
+}
+
+/* Return the bounding box of the search area centered at latitude,longitude
+ * having a radius of radius_meter. bounds[0] - bounds[2] is the minimum
+ * and maxium longitude, while bounds[1] - bounds[3] is the minimum and
+ * maximum latitude.
+ *
+ * This function does not behave correctly with very large radius values, for
+ * instance for the coordinates 81.634948934258375 30.561509253718668 and a
+ * radius of 7083 kilometers, it reports as bounding boxes:
+ *
+ * min_lon 7.680495, min_lat -33.119473, max_lon 155.589402, max_lat 94.242491
+ *
+ * However, for instance, a min_lon of 7.680495 is not correct, because the
+ * point -1.27579540014266968 61.33421815228281559 is at less than 7000
+ * kilometers away.
+ *
+ * Since this function is currently only used as an optimization, the
+ * optimization is not used for very big radiuses, however the function
+ * should be fixed. */
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds) {
+ if (!bounds) return 0;
+
+ bounds[0] = longitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude)));
+ bounds[2] = longitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude)));
+ bounds[1] = latitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS);
+ bounds[3] = latitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS);
+ return 1;
+}
+
+/* Return a set of areas (center + 8) that are able to cover a range query
+ * for the specified position and radius. */
+GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) {
+ GeoHashRange long_range, lat_range;
+ GeoHashRadius radius;
+ GeoHashBits hash;
+ GeoHashNeighbors neighbors;
+ GeoHashArea area;
+ double min_lon, max_lon, min_lat, max_lat;
+ double bounds[4];
+ int steps;
+
+ geohashBoundingBox(longitude, latitude, radius_meters, bounds);
+ min_lon = bounds[0];
+ min_lat = bounds[1];
+ max_lon = bounds[2];
+ max_lat = bounds[3];
+
+ steps = geohashEstimateStepsByRadius(radius_meters,latitude);
+
+ geohashGetCoordRange(&long_range,&lat_range);
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+
+ /* Check if the step is enough at the limits of the covered area.
+ * Sometimes when the search area is near an edge of the
+ * area, the estimated step is not small enough, since one of the
+ * north / south / west / east square is too near to the search area
+ * to cover everything. */
+ int decrease_step = 0;
+ {
+ GeoHashArea north, south, east, west;
+
+ geohashDecode(long_range, lat_range, neighbors.north, &north);
+ geohashDecode(long_range, lat_range, neighbors.south, &south);
+ geohashDecode(long_range, lat_range, neighbors.east, &east);
+ geohashDecode(long_range, lat_range, neighbors.west, &west);
+
+ if (geohashGetDistance(longitude,latitude,longitude,north.latitude.max)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,longitude,south.latitude.min)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,east.longitude.max,latitude)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,west.longitude.min,latitude)
+ < radius_meters) decrease_step = 1;
+ }
+
+ if (steps > 1 && decrease_step) {
+ steps--;
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+ }
+
+ /* Exclude the search areas that are useless. */
+ if (steps >= 2) {
+ if (area.latitude.min < min_lat) {
+ GZERO(neighbors.south);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.south_east);
+ }
+ if (area.latitude.max > max_lat) {
+ GZERO(neighbors.north);
+ GZERO(neighbors.north_east);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.min < min_lon) {
+ GZERO(neighbors.west);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.max > max_lon) {
+ GZERO(neighbors.east);
+ GZERO(neighbors.south_east);
+ GZERO(neighbors.north_east);
+ }
+ }
+ radius.hash = hash;
+ radius.neighbors = neighbors;
+ radius.area = area;
+ return radius;
+}
+
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters) {
+ return geohashGetAreasByRadius(longitude, latitude, radius_meters);
+}
+
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash) {
+ uint64_t bits = hash.bits;
+ bits <<= (52 - hash.step * 2);
+ return bits;
+}
+
+/* Calculate distance using haversin great circle distance formula. */
+double geohashGetDistance(double lon1d, double lat1d, double lon2d, double lat2d) {
+ double lat1r, lon1r, lat2r, lon2r, u, v;
+ lat1r = deg_rad(lat1d);
+ lon1r = deg_rad(lon1d);
+ lat2r = deg_rad(lat2d);
+ lon2r = deg_rad(lon2d);
+ u = sin((lat2r - lat1r) / 2);
+ v = sin((lon2r - lon1r) / 2);
+ return 2.0 * EARTH_RADIUS_IN_METERS *
+ asin(sqrt(u * u + cos(lat1r) * cos(lat2r) * v * v));
+}
+
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance) {
+ *distance = geohashGetDistance(x1, y1, x2, y2);
+ if (*distance > radius) return 0;
+ return 1;
+}
+
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance) {
+ return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);
+}
diff --git a/src/geohash_helper.h b/src/geohash_helper.h
new file mode 100644
index 000000000..eb0dda38a
--- /dev/null
+++ b/src/geohash_helper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_HELPER_HPP_
+#define GEOHASH_HELPER_HPP_
+
+#include "geohash.h"
+
+#define GZERO(s) s.bits = s.step = 0;
+#define GISZERO(s) (!s.bits && !s.step)
+#define GISNOTZERO(s) (s.bits || s.step)
+
+typedef uint64_t GeoHashFix52Bits;
+typedef uint64_t GeoHashVarBits;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashArea area;
+ GeoHashNeighbors neighbors;
+} GeoHashRadius;
+
+int GeoHashBitsComparator(const GeoHashBits *a, const GeoHashBits *b);
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat);
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds);
+GeoHashRadius geohashGetAreasByRadius(double longitude,
+ double latitude, double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusMercator(double longitude, double latitude,
+ double radius_meters);
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash);
+double geohashGetDistance(double lon1d, double lat1d,
+ double lon2d, double lat2d);
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance);
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance);
+
+#endif /* GEOHASH_HELPER_HPP_ */
diff --git a/src/help.h b/src/help.h
index 8395c525b..5f927c303 100644
--- a/src/help.h
+++ b/src/help.h
@@ -15,7 +15,9 @@ static char *commandGroups[] = {
"connection",
"server",
"scripting",
- "hyperloglog"
+ "hyperloglog",
+ "cluster",
+ "geo"
};
struct commandHelp {
@@ -46,10 +48,15 @@ struct commandHelp {
9,
"1.0.0" },
{ "BITCOUNT",
- "key [start] [end]",
+ "key [start end]",
"Count set bits in a string",
1,
"2.6.0" },
+ { "BITFIELD",
+ "key [GET type offset] [SET type offset value] [INCRBY type offset increment] [OVERFLOW WRAP|SAT|FAIL]",
+ "Perform arbitrary bitfield integer operations on strings",
+ 1,
+ "3.2.0" },
{ "BITOP",
"operation destkey key [key ...]",
"Perform bitwise operations between strings",
@@ -81,7 +88,7 @@ struct commandHelp {
9,
"2.6.9" },
{ "CLIENT KILL",
- "ip:port",
+ "[ip:port] [ID client-id] [TYPE normal|master|slave|pubsub] [ADDR ip:port] [SKIPME yes/no]",
"Kill the connection of a client",
9,
"2.4.0" },
@@ -95,11 +102,126 @@ struct commandHelp {
"Stop processing commands from clients for some time",
9,
"2.9.50" },
+ { "CLIENT REPLY",
+ "ON|OFF|SKIP",
+ "Instruct the server whether to reply to commands",
+ 9,
+ "3.2" },
{ "CLIENT SETNAME",
"connection-name",
"Set the current connection name",
9,
"2.6.9" },
+ { "CLUSTER ADDSLOTS",
+ "slot [slot ...]",
+ "Assign new hash slots to receiving node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER COUNT-FAILURE-REPORTS",
+ "node-id",
+ "Return the number of failure reports active for a given node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER COUNTKEYSINSLOT",
+ "slot",
+ "Return the number of local keys in the specified hash slot",
+ 12,
+ "3.0.0" },
+ { "CLUSTER DELSLOTS",
+ "slot [slot ...]",
+ "Set hash slots as unbound in receiving node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER FAILOVER",
+ "[FORCE|TAKEOVER]",
+ "Forces a slave to perform a manual failover of its master.",
+ 12,
+ "3.0.0" },
+ { "CLUSTER FORGET",
+ "node-id",
+ "Remove a node from the nodes table",
+ 12,
+ "3.0.0" },
+ { "CLUSTER GETKEYSINSLOT",
+ "slot count",
+ "Return local key names in the specified hash slot",
+ 12,
+ "3.0.0" },
+ { "CLUSTER INFO",
+ "-",
+ "Provides info about Redis Cluster node state",
+ 12,
+ "3.0.0" },
+ { "CLUSTER KEYSLOT",
+ "key",
+ "Returns the hash slot of the specified key",
+ 12,
+ "3.0.0" },
+ { "CLUSTER MEET",
+ "ip port",
+ "Force a node cluster to handshake with another node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER NODES",
+ "-",
+ "Get Cluster config for the node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER REPLICATE",
+ "node-id",
+ "Reconfigure a node as a slave of the specified master node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER RESET",
+ "[HARD|SOFT]",
+ "Reset a Redis Cluster node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SAVECONFIG",
+ "-",
+ "Forces the node to save cluster state on disk",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SET-CONFIG-EPOCH",
+ "config-epoch",
+ "Set the configuration epoch in a new node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SETSLOT",
+ "slot IMPORTING|MIGRATING|STABLE|NODE [node-id]",
+ "Bind a hash slot to a specific node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SLAVES",
+ "node-id",
+ "List slave nodes of the specified master node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SLOTS",
+ "-",
+ "Get array of Cluster slot to node mappings",
+ 12,
+ "3.0.0" },
+ { "COMMAND",
+ "-",
+ "Get array of Redis command details",
+ 9,
+ "2.8.13" },
+ { "COMMAND COUNT",
+ "-",
+ "Get total number of Redis commands",
+ 9,
+ "2.8.13" },
+ { "COMMAND GETKEYS",
+ "-",
+ "Extract keys given a full Redis command",
+ 9,
+ "2.8.13" },
+ { "COMMAND INFO",
+ "command-name [command-name ...]",
+ "Get array of specific Redis command details",
+ 9,
+ "2.8.13" },
{ "CONFIG GET",
"parameter",
"Get the value of a configuration parameter",
@@ -181,7 +303,7 @@ struct commandHelp {
7,
"1.2.0" },
{ "EXISTS",
- "key",
+ "key [key ...]",
"Determine if a key exists",
0,
"1.0.0" },
@@ -205,6 +327,36 @@ struct commandHelp {
"Remove all keys from the current database",
9,
"1.0.0" },
+ { "GEOADD",
+ "key longitude latitude member [longitude latitude member ...]",
+ "Add one or more geospatial items in the geospatial index represented using a sorted set",
+ 13,
+ "3.2.0" },
+ { "GEODIST",
+ "key member1 member2 [unit]",
+ "Returns the distance between two members of a geospatial index",
+ 13,
+ "3.2.0" },
+ { "GEOHASH",
+ "key member [member ...]",
+ "Returns members of a geospatial index as standard geohash strings",
+ 13,
+ "3.2.0" },
+ { "GEOPOS",
+ "key member [member ...]",
+ "Returns longitude and latitude of members of a geospatial index",
+ 13,
+ "3.2.0" },
+ { "GEORADIUS",
+ "key longitude latitude radius m|km|ft|mi [WITHCOORD] [WITHDIST] [WITHHASH] [COUNT count] [ASC|DESC] [STORE key] [STOREDIST key]",
+ "Query a sorted set representing a geospatial index to fetch members matching a given maximum distance from a point",
+ 13,
+ "3.2.0" },
+ { "GEORADIUSBYMEMBER",
+ "key member radius m|km|ft|mi [WITHCOORD] [WITHDIST] [WITHHASH] [COUNT count] [ASC|DESC] [STORE key] [STOREDIST key]",
+ "Query a sorted set representing a geospatial index to fetch members matching a given maximum distance from a member",
+ 13,
+ "3.2.0" },
{ "GET",
"key",
"Get the value of a key",
@@ -290,6 +442,11 @@ struct commandHelp {
"Set the value of a hash field, only if the field does not exist",
5,
"2.0.0" },
+ { "HSTRLEN",
+ "key field",
+ "Get the length of the value of a hash field",
+ 5,
+ "3.2.0" },
{ "HVALS",
"key",
"Get all the values in a hash",
@@ -381,7 +538,7 @@ struct commandHelp {
1,
"1.0.0" },
{ "MIGRATE",
- "host port key destination-db timeout [COPY] [REPLACE]",
+ "host port key|"" destination-db timeout [COPY] [REPLACE] [KEYS key]",
"Atomically transfer a key from a Redis instance to another one.",
0,
"2.6.0" },
@@ -446,7 +603,7 @@ struct commandHelp {
11,
"2.8.9" },
{ "PING",
- "-",
+ "[message]",
"Ping the server",
8,
"1.0.0" },
@@ -490,6 +647,16 @@ struct commandHelp {
"Return a random key from the keyspace",
0,
"1.0.0" },
+ { "READONLY",
+ "-",
+ "Enables read queries for a connection to a cluster slave node",
+ 12,
+ "3.0.0" },
+ { "READWRITE",
+ "-",
+ "Disables read queries for a connection to a cluster slave node",
+ 12,
+ "3.0.0" },
{ "RENAME",
"key newkey",
"Rename a key",
@@ -501,10 +668,15 @@ struct commandHelp {
0,
"1.0.0" },
{ "RESTORE",
- "key ttl serialized-value",
+ "key ttl serialized-value [REPLACE]",
"Create a key using the provided serialized value, previously obtained using DUMP.",
0,
"2.6.0" },
+ { "ROLE",
+ "-",
+ "Return the role of the instance in the context of replication",
+ 9,
+ "2.8.12" },
{ "RPOP",
"key",
"Remove and get the last element in a list",
@@ -512,7 +684,7 @@ struct commandHelp {
"1.0.0" },
{ "RPOPLPUSH",
"source destination",
- "Remove the last element in a list, append it to another list and return it",
+ "Remove the last element in a list, prepend it to another list and return it",
2,
"1.2.0" },
{ "RPUSH",
@@ -545,6 +717,11 @@ struct commandHelp {
"Get the number of members in a set",
3,
"1.0.0" },
+ { "SCRIPT DEBUG",
+ "YES|SYNC|NO",
+ "Set the debug mode for executed scripts.",
+ 10,
+ "3.2.0" },
{ "SCRIPT EXISTS",
"script [script ...]",
"Check existence of scripts in the script cache.",
@@ -606,7 +783,7 @@ struct commandHelp {
1,
"2.2.0" },
{ "SHUTDOWN",
- "[NOSAVE] [SAVE]",
+ "[NOSAVE|SAVE]",
"Synchronously save the dataset to disk and then shut down the server",
9,
"1.0.0" },
@@ -651,8 +828,8 @@ struct commandHelp {
0,
"1.0.0" },
{ "SPOP",
- "key",
- "Remove and return a random member from a set",
+ "key [count]",
+ "Remove and return one or multiple random members from a set",
3,
"1.0.0" },
{ "SRANDMEMBER",
@@ -720,13 +897,18 @@ struct commandHelp {
"Forget about all watched keys",
7,
"2.2.0" },
+ { "WAIT",
+ "numslaves timeout",
+ "Wait for the synchronous replication of all the write commands sent in the context of the current connection",
+ 0,
+ "3.0.0" },
{ "WATCH",
"key [key ...]",
"Watch the given keys to determine execution of the MULTI/EXEC block",
7,
"2.2.0" },
{ "ZADD",
- "key score member [score member ...]",
+ "key [NX|XX] [CH] [INCR] score member [score member ...]",
"Add one or more members to a sorted set, or update its score if it already exists",
4,
"1.2.0" },
@@ -800,6 +982,11 @@ struct commandHelp {
"Return a range of members in a sorted set, by index, with scores ordered from high to low",
4,
"1.2.0" },
+ { "ZREVRANGEBYLEX",
+ "key max min [LIMIT offset count]",
+ "Return a range of members in a sorted set, by lexicographical range, ordered from higher to lower strings.",
+ 4,
+ "2.8.9" },
{ "ZREVRANGEBYSCORE",
"key max min [WITHSCORES] [LIMIT offset count]",
"Return a range of members in a sorted set, by score, with scores ordered from high to low",
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 63052a789..f4b5bd1c1 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -29,7 +29,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <stdint.h>
#include <math.h>
@@ -401,7 +401,11 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
uint64_t k;
#if (BYTE_ORDER == LITTLE_ENDIAN)
+ #ifdef USE_ALIGNED_ACCESS
+ memcpy(&k,data,sizeof(uint64_t));
+ #else
k = *((uint64_t*)data);
+ #endif
#else
k = (uint64_t) data[0];
k |= (uint64_t) data[1] << 8;
@@ -563,8 +567,8 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
* representation. Both representations are represented by SDS strings, and
* the input representation is freed as a side effect.
*
- * The function returns REDIS_OK if the sparse representation was valid,
- * otherwise REDIS_ERR is returned if the representation was corrupted. */
+ * The function returns C_OK if the sparse representation was valid,
+ * otherwise C_ERR is returned if the representation was corrupted. */
int hllSparseToDense(robj *o) {
sds sparse = o->ptr, dense;
struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
@@ -573,7 +577,7 @@ int hllSparseToDense(robj *o) {
/* If the representation is already the right one return ASAP. */
hdr = (struct hllhdr*) sparse;
- if (hdr->encoding == HLL_DENSE) return REDIS_OK;
+ if (hdr->encoding == HLL_DENSE) return C_OK;
/* Create a string of the right size filled with zero bytes.
* Note that the cached cardinality is set to 0 as a side effect
@@ -610,13 +614,13 @@ int hllSparseToDense(robj *o) {
* set to HLL_REGISTERS. */
if (idx != HLL_REGISTERS) {
sdsfree(dense);
- return REDIS_ERR;
+ return C_ERR;
}
/* Free the old representation and set the new one. */
sdsfree(o->ptr);
o->ptr = dense;
- return REDIS_OK;
+ return C_OK;
}
/* "Add" the element in the sparse hyperloglog data structure.
@@ -866,7 +870,7 @@ updated:
return 1;
promote: /* Promote to dense representation. */
- if (hllSparseToDense(o) == REDIS_ERR) return -1; /* Corrupted HLL. */
+ if (hllSparseToDense(o) == C_ERR) return -1; /* Corrupted HLL. */
hdr = o->ptr;
/* We need to call hllDenseAdd() to perform the operation after the
@@ -877,7 +881,7 @@ promote: /* Promote to dense representation. */
* is propagated to slaves / AOF, so if there is a sparse -> dense
* convertion, it will be performed in all the slaves as well. */
int dense_retval = hllDenseAdd(hdr->registers, ele, elesize);
- redisAssert(dense_retval == 1);
+ serverAssert(dense_retval == 1);
return dense_retval;
}
@@ -991,35 +995,24 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
} else if (hdr->encoding == HLL_RAW) {
E = hllRawSum(hdr->registers,PE,&ez);
} else {
- redisPanic("Unknown HyperLogLog encoding in hllCount()");
+ serverPanic("Unknown HyperLogLog encoding in hllCount()");
}
- /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
- E = (1/E)*alpha*m*m;
-
- /* Use the LINEARCOUNTING algorithm for small cardinalities.
- * For larger values but up to 72000 HyperLogLog raw approximation is
- * used since linear counting error starts to increase. However HyperLogLog
- * shows a strong bias in the range 2.5*16384 - 72000, so we try to
- * compensate for it. */
- if (E < m*2.5 && ez != 0) {
- E = m*log(m/ez); /* LINEARCOUNTING() */
- } else if (m == 16384 && E < 72000) {
- /* We did polynomial regression of the bias for this range, this
- * way we can compute the bias for a given cardinality and correct
- * according to it. Only apply the correction for P=14 that's what
- * we use and the value the correction was verified with. */
- double bias = 5.9119*1.0e-18*(E*E*E*E)
- -1.4253*1.0e-12*(E*E*E)+
- 1.2940*1.0e-7*(E*E)
- -5.2921*1.0e-3*E+
- 83.3216;
- E -= E*(bias/100);
- }
- /* We don't apply the correction for E > 1/30 of 2^32 since we use
- * a 64 bit function and 6 bit counters. To apply the correction for
- * 1/30 of 2^64 is not needed since it would require a huge set
- * to approach such a value. */
+ /* Apply loglog-beta to the raw estimate. See:
+ * "LogLog-Beta and More: A New Algorithm for Cardinality Estimation
+ * Based on LogLog Counting" Jason Qin, Denys Kim, Yumei Tung
+ * arXiv:1612.02284 */
+ double zl = log(ez + 1);
+ double beta = -0.370393911*ez +
+ 0.070471823*zl +
+ 0.17393686*pow(zl,2) +
+ 0.16339839*pow(zl,3) +
+ -0.09237745*pow(zl,4) +
+ 0.03738027*pow(zl,5) +
+ -0.005384159*pow(zl,6) +
+ 0.00042419*pow(zl,7);
+
+ E = llroundl(alpha*m*(m-ez)*(1/(E+beta)));
return (uint64_t) E;
}
@@ -1039,7 +1032,7 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) {
* The hll object must be already validated via isHLLObjectOrReply()
* or in some other way.
*
- * If the HyperLogLog is sparse and is found to be invalid, REDIS_ERR
+ * If the HyperLogLog is sparse and is found to be invalid, C_ERR
* is returned, otherwise the function always succeeds. */
int hllMerge(uint8_t *max, robj *hll) {
struct hllhdr *hdr = hll->ptr;
@@ -1077,9 +1070,9 @@ int hllMerge(uint8_t *max, robj *hll) {
p++;
}
}
- if (i != HLL_REGISTERS) return REDIS_ERR;
+ if (i != HLL_REGISTERS) return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* ========================== HyperLogLog commands ========================== */
@@ -1108,10 +1101,10 @@ robj *createHLLObject(void) {
p += 2;
aux -= xzero;
}
- redisAssert((p-(uint8_t*)s) == sparselen);
+ serverAssert((p-(uint8_t*)s) == sparselen);
/* Create the actual object. */
- o = createObject(REDIS_STRING,s);
+ o = createObject(OBJ_STRING,s);
hdr = o->ptr;
memcpy(hdr->magic,"HYLL",4);
hdr->encoding = HLL_SPARSE;
@@ -1119,15 +1112,16 @@ robj *createHLLObject(void) {
}
/* Check if the object is a String with a valid HLL representation.
- * Return REDIS_OK if this is true, otherwise reply to the client
- * with an error and return REDIS_ERR. */
-int isHLLObjectOrReply(redisClient *c, robj *o) {
+ * Return C_OK if this is true, otherwise reply to the client
+ * with an error and return C_ERR. */
+int isHLLObjectOrReply(client *c, robj *o) {
struct hllhdr *hdr;
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
- return REDIS_ERR; /* Error already sent. */
+ if (checkType(c,o,OBJ_STRING))
+ return C_ERR; /* Error already sent. */
+ if (!sdsEncodedObject(o)) goto invalid;
if (stringObjectLen(o) < sizeof(*hdr)) goto invalid;
hdr = o->ptr;
@@ -1142,17 +1136,17 @@ int isHLLObjectOrReply(redisClient *c, robj *o) {
stringObjectLen(o) != HLL_DENSE_SIZE) goto invalid;
/* All tests passed. */
- return REDIS_OK;
+ return C_OK;
invalid:
addReplySds(c,
sdsnew("-WRONGTYPE Key is not a valid "
"HyperLogLog string value.\r\n"));
- return REDIS_ERR;
+ return C_ERR;
}
/* PFADD var ele ele ele ... ele => :0 or :1 */
-void pfaddCommand(redisClient *c) {
+void pfaddCommand(client *c) {
robj *o = lookupKeyWrite(c->db,c->argv[1]);
struct hllhdr *hdr;
int updated = 0, j;
@@ -1165,7 +1159,7 @@ void pfaddCommand(redisClient *c) {
dbAdd(c->db,c->argv[1],o);
updated++;
} else {
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[1],o);
}
/* Perform the low level ADD operation for every element. */
@@ -1184,7 +1178,7 @@ void pfaddCommand(redisClient *c) {
hdr = o->ptr;
if (updated) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
server.dirty++;
HLL_INVALIDATE_CACHE(hdr);
}
@@ -1192,7 +1186,7 @@ void pfaddCommand(redisClient *c) {
}
/* PFCOUNT var -> approximated cardinality of set. */
-void pfcountCommand(redisClient *c) {
+void pfcountCommand(client *c) {
robj *o;
struct hllhdr *hdr;
uint64_t card;
@@ -1213,12 +1207,12 @@ void pfcountCommand(redisClient *c) {
for (j = 1; j < c->argc; j++) {
/* Check type and size. */
robj *o = lookupKeyRead(c->db,c->argv[j]);
- if (o == NULL) continue; /* Assume empty HLL for non existing var. */
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (o == NULL) continue; /* Assume empty HLL for non existing var.*/
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
/* Merge with this HLL with our 'max' HHL by setting max[i]
* to MAX(max[i],hll[i]). */
- if (hllMerge(registers,o) == REDIS_ERR) {
+ if (hllMerge(registers,o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1233,13 +1227,13 @@ void pfcountCommand(redisClient *c) {
*
* The user specified a single key. Either return the cached value
* or compute one and update the cache. */
- o = lookupKeyRead(c->db,c->argv[1]);
+ o = lookupKeyWrite(c->db,c->argv[1]);
if (o == NULL) {
/* No key? Cardinality is zero since no element was added, otherwise
* we would have a key as HLLADD creates it as a side effect. */
addReply(c,shared.czero);
} else {
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[1],o);
/* Check if the cached cardinality is valid. */
@@ -1282,7 +1276,7 @@ void pfcountCommand(redisClient *c) {
}
/* PFMERGE dest src1 src2 src3 ... srcN => OK */
-void pfmergeCommand(redisClient *c) {
+void pfmergeCommand(client *c) {
uint8_t max[HLL_REGISTERS];
struct hllhdr *hdr;
int j;
@@ -1295,11 +1289,11 @@ void pfmergeCommand(redisClient *c) {
/* Check type and size. */
robj *o = lookupKeyRead(c->db,c->argv[j]);
if (o == NULL) continue; /* Assume empty HLL for non existing var. */
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
/* Merge with this HLL with our 'max' HHL by setting max[i]
* to MAX(max[i],hll[i]). */
- if (hllMerge(max,o) == REDIS_ERR) {
+ if (hllMerge(max,o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1321,7 +1315,7 @@ void pfmergeCommand(redisClient *c) {
}
/* Only support dense objects as destination. */
- if (hllSparseToDense(o) == REDIS_ERR) {
+ if (hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1337,7 +1331,7 @@ void pfmergeCommand(redisClient *c) {
signalModifiedKey(c->db,c->argv[1]);
/* We generate an PFADD event for PFMERGE for semantical simplicity
* since in theory this is a mass-add of elements. */
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
server.dirty++;
addReply(c,shared.ok);
}
@@ -1348,8 +1342,8 @@ void pfmergeCommand(redisClient *c) {
* This command performs a self-test of the HLL registers implementation.
* Something that is not easy to test from within the outside. */
#define HLL_TEST_CYCLES 1000
-void pfselftestCommand(redisClient *c) {
- int j, i;
+void pfselftestCommand(client *c) {
+ unsigned int j, i;
sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE);
struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2;
robj *o = NULL;
@@ -1431,7 +1425,7 @@ void pfselftestCommand(redisClient *c) {
if (j == 10) maxerr = 1;
if (abserr < 0) abserr = -abserr;
- if (abserr > maxerr) {
+ if (abserr > (int64_t)maxerr) {
addReplyErrorFormat(c,
"TESTFAILED Too big error. card:%llu abserr:%llu",
(unsigned long long) checkpoint,
@@ -1452,18 +1446,18 @@ cleanup:
/* PFDEBUG <subcommand> <key> ... args ...
* Different debugging related operations about the HLL implementation. */
-void pfdebugCommand(redisClient *c) {
+void pfdebugCommand(client *c) {
char *cmd = c->argv[1]->ptr;
struct hllhdr *hdr;
robj *o;
int j;
- o = lookupKeyRead(c->db,c->argv[2]);
+ o = lookupKeyWrite(c->db,c->argv[2]);
if (o == NULL) {
addReplyError(c,"The specified key does not exist");
return;
}
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[2],o);
hdr = o->ptr;
@@ -1472,7 +1466,7 @@ void pfdebugCommand(redisClient *c) {
if (c->argc != 3) goto arityerr;
if (hdr->encoding == HLL_SPARSE) {
- if (hllSparseToDense(o) == REDIS_ERR) {
+ if (hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1536,7 +1530,7 @@ void pfdebugCommand(redisClient *c) {
if (c->argc != 3) goto arityerr;
if (hdr->encoding == HLL_SPARSE) {
- if (hllSparseToDense(o) == REDIS_ERR) {
+ if (hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
diff --git a/src/intset.c b/src/intset.c
index b61530e45..198c90aa1 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -133,7 +133,7 @@ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
}
while(max >= min) {
- mid = (min+max)/2;
+ mid = ((unsigned int)min + (unsigned int)max) >> 1;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+1;
@@ -261,7 +261,7 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}
-/* Sets the value to the value at the given position. When this position is
+/* Get the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
if (pos < intrev32ifbe(is->length)) {
@@ -272,7 +272,7 @@ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
}
/* Return intset length */
-uint32_t intsetLen(intset *is) {
+uint32_t intsetLen(const intset *is) {
return intrev32ifbe(is->length);
}
@@ -281,44 +281,46 @@ size_t intsetBlobLen(intset *is) {
return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
}
-#ifdef INTSET_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
+#include <time.h>
-void intsetRepr(intset *is) {
- int i;
- for (i = 0; i < intrev32ifbe(is->length); i++) {
+#if 0
+static void intsetRepr(intset *is) {
+ for (uint32_t i = 0; i < intrev32ifbe(is->length); i++) {
printf("%lld\n", (uint64_t)_intsetGet(is,i));
}
printf("\n");
}
-void error(char *err) {
+static void error(char *err) {
printf("%s\n", err);
exit(1);
}
+#endif
-void ok(void) {
+static void ok(void) {
printf("OK\n");
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
-void _assert(char *estr, char *file, int line) {
+static void _assert(char *estr, char *file, int line) {
printf("\n\n=== ASSERTION FAILED ===\n");
printf("==> %s:%d '%s' is not true\n",file,line,estr);
}
-intset *createSet(int bits, int size) {
+static intset *createSet(int bits, int size) {
uint64_t mask = (1<<bits)-1;
- uint64_t i, value;
+ uint64_t value;
intset *is = intsetNew();
- for (i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
if (bits > 32) {
value = (rand()*rand()) & mask;
} else {
@@ -329,10 +331,8 @@ intset *createSet(int bits, int size) {
return is;
}
-void checkConsistency(intset *is) {
- int i;
-
- for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
+static void checkConsistency(intset *is) {
+ for (uint32_t i = 0; i < (intrev32ifbe(is->length)-1); i++) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT16) {
@@ -348,11 +348,15 @@ void checkConsistency(intset *is) {
}
}
-int main(int argc, char **argv) {
+#define UNUSED(x) (void)(x)
+int intsetTest(int argc, char **argv) {
uint8_t success;
int i;
intset *is;
- sranddev();
+ srand(time(NULL));
+
+ UNUSED(argc);
+ UNUSED(argv);
printf("Value encodings: "); {
assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
@@ -363,8 +367,10 @@ int main(int argc, char **argv) {
assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
- assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(-9223372036854775808ull) ==
+ INTSET_ENC_INT64);
+ assert(_intsetValueEncoding(+9223372036854775807ull) ==
+ INTSET_ENC_INT64);
ok();
}
@@ -378,7 +384,7 @@ int main(int argc, char **argv) {
}
printf("Large number of random adds: "); {
- int inserts = 0;
+ uint32_t inserts = 0;
is = intsetNew();
for (i = 0; i < 1024; i++) {
is = intsetAdd(is,rand()%0x800,&success);
@@ -461,7 +467,8 @@ int main(int argc, char **argv) {
start = usec();
for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
- printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
+ printf("%ld lookups, %ld element set, %lldusec\n",
+ num,size,usec()-start);
}
printf("Stress add+delete: "); {
@@ -479,5 +486,7 @@ int main(int argc, char **argv) {
checkConsistency(is);
ok();
}
+
+ return 0;
}
#endif
diff --git a/src/intset.h b/src/intset.h
index bd01ff22f..8119e6636 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -44,7 +44,11 @@ intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
-uint32_t intsetLen(intset *is);
+uint32_t intsetLen(const intset *is);
size_t intsetBlobLen(intset *is);
+#ifdef REDIS_TEST
+int intsetTest(int argc, char *argv[]);
+#endif
+
#endif // __INTSET_H
diff --git a/src/latency.c b/src/latency.c
index fdc88210e..9e9f1f13a 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -33,14 +33,15 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* Dictionary type for latency events. */
int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
+ UNUSED(privdata);
return strcmp(key1,key2) == 0;
}
-unsigned int dictStringHash(const void *key) {
+uint64_t dictStringHash(const void *key) {
return dictGenHashFunction(key, strlen(key));
}
@@ -55,6 +56,32 @@ dictType latencyTimeSeriesDictType = {
dictVanillaFree /* val destructor */
};
+/* ------------------------- Utility functions ------------------------------ */
+
+#ifdef __linux__
+/* Returns 1 if Transparent Huge Pages support is enabled in the kernel.
+ * Otherwise (or if we are unable to check) 0 is returned. */
+int THPIsEnabled(void) {
+ char buf[1024];
+
+ FILE *fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
+ if (!fp) return 0;
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ fclose(fp);
+ return 0;
+ }
+ fclose(fp);
+ return (strstr(buf,"[never]") == NULL) ? 1 : 0;
+}
+#endif
+
+/* Report the amount of AnonHugePages in smap, in bytes. If the return
+ * value of the function is non-zero, the process is being targeted by
+ * THP support, and is likely to have memory usage / latency issues. */
+int THPGetAnonHugePagesSize(void) {
+ return zmalloc_get_smap_bytes_by_field("AnonHugePages:",-1);
+}
+
/* ---------------------------- Latency API --------------------------------- */
/* Latency monitor initialization. We just need to create the dictionary
@@ -201,7 +228,9 @@ sds createLatencyReport(void) {
int advise_write_load_info = 0; /* Print info about AOF and write load. */
int advise_hz = 0; /* Use higher HZ. */
int advise_large_objects = 0; /* Deletion of large objects. */
+ int advise_mass_eviction = 0; /* Avoid mass eviction of keys. */
int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
+ int advise_disable_thp = 0; /* AnonHugePages detected. */
int advices = 0;
/* Return ASAP if the latency engine is disabled and it looks like it
@@ -219,7 +248,7 @@ sds createLatencyReport(void) {
dictEntry *de;
int eventnum = 0;
- di = dictGetIterator(server.latency_events);
+ di = dictGetSafeIterator(server.latency_events);
while((de = dictNext(di)) != NULL) {
char *event = dictGetKey(de);
struct latencyTimeSeries *ts = dictGetVal(de);
@@ -336,18 +365,29 @@ sds createLatencyReport(void) {
}
/* Eviction cycle. */
- if (!strcasecmp(event,"eviction-cycle")) {
+ if (!strcasecmp(event,"eviction-del")) {
advise_large_objects = 1;
advices++;
}
+ if (!strcasecmp(event,"eviction-cycle")) {
+ advise_mass_eviction = 1;
+ advices++;
+ }
+
report = sdscatlen(report,"\n",1);
}
dictReleaseIterator(di);
- if (eventnum == 0) {
+ /* Add non event based advices. */
+ if (THPGetAnonHugePagesSize() > 0) {
+ advise_disable_thp = 1;
+ advices++;
+ }
+
+ if (eventnum == 0 && advices == 0) {
report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n");
- } else if (advices == 0) {
+ } else if (eventnum > 0 && advices == 0) {
report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Redis community to get some help, providing this report in your help request.\n");
} else {
/* Add all the suggestions accumulated so far. */
@@ -417,6 +457,14 @@ sds createLatencyReport(void) {
if (advise_large_objects) {
report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
}
+
+ if (advise_mass_eviction) {
+ report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
+ }
+
+ if (advise_disable_thp) {
+ report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
+ }
}
return report;
@@ -426,7 +474,7 @@ sds createLatencyReport(void) {
/* latencyCommand() helper to produce a time-delay reply for all the samples
* in memory for the specified time series. */
-void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts) {
+void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
void *replylen = addDeferredMultiBulkLength(c);
int samples = 0, j;
@@ -444,7 +492,7 @@ void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts
/* latencyCommand() helper to produce the reply for the LATEST subcommand,
* listing the last latency sample for every event type registered so far. */
-void latencyCommandReplyWithLatestEvents(redisClient *c) {
+void latencyCommandReplyWithLatestEvents(client *c) {
dictIterator *di;
dictEntry *de;
@@ -474,7 +522,6 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
for (j = 0; j < LATENCY_TS_LEN; j++) {
int i = (ts->idx + j) % LATENCY_TS_LEN;
int elapsed;
- char *label;
char buf[64];
if (ts->samples[i].time == 0) continue;
@@ -496,8 +543,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
else
snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
- label = zstrdup(buf);
- sparklineSequenceAddSample(seq,ts->samples[i].latency,label);
+ sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
}
graph = sdscatprintf(graph,
@@ -518,7 +564,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
* LATENCY DOCTOR: returns an human readable analysis of instance latency.
* LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
*/
-void latencyCommand(redisClient *c) {
+void latencyCommand(client *c) {
struct latencyTimeSeries *ts;
if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
diff --git a/src/latency.h b/src/latency.h
index 6ddbe0410..0fe26e0e4 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -63,6 +63,7 @@ struct latencyStats {
void latencyMonitorInit(void);
void latencyAddSample(char *event, mstime_t latency);
+int THPIsEnabled(void);
/* Latency monitoring macros. */
@@ -85,4 +86,8 @@ void latencyAddSample(char *event, mstime_t latency);
(var) >= server.latency_monitor_threshold) \
latencyAddSample((event),(var));
+/* Remove time from a nested event. */
+#define latencyRemoveNestedEvent(event_var,nested_var) \
+ event_var += nested_var;
+
#endif /* __LATENCY_H */
diff --git a/src/lazyfree.c b/src/lazyfree.c
new file mode 100644
index 000000000..809ebdb57
--- /dev/null
+++ b/src/lazyfree.c
@@ -0,0 +1,135 @@
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+#include "cluster.h"
+
+static size_t lazyfree_objects = 0;
+pthread_mutex_t lazyfree_objects_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Return the number of currently pending objects to free. */
+size_t lazyfreeGetPendingObjectsCount(void) {
+ size_t aux;
+ atomicGet(lazyfree_objects,aux);
+ return aux;
+}
+
+/* Return the amount of work needed in order to free an object.
+ * The return value is not always the actual number of allocations the
+ * object is compoesd of, but a number proportional to it.
+ *
+ * For strings the function always returns 1.
+ *
+ * For aggregated objects represented by hash tables or other data structures
+ * the function just returns the number of elements the object is composed of.
+ *
+ * Objects composed of single allocations are always reported as having a
+ * single item even if they are actaully logical composed of multiple
+ * elements.
+ *
+ * For lists the funciton returns the number of elements in the quicklist
+ * representing the list. */
+size_t lazyfreeGetFreeEffort(robj *obj) {
+ if (obj->type == OBJ_LIST) {
+ quicklist *ql = obj->ptr;
+ return ql->len;
+ } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST){
+ zset *zs = obj->ptr;
+ return zs->zsl->length;
+ } else if (obj->type == OBJ_HASH && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else {
+ return 1; /* Everything else is a single allocation. */
+ }
+}
+
+/* Delete a key, value, and associated expiration entry if any, from the DB.
+ * If there are enough allocations to free the value object may be put into
+ * a lazy free list instead of being freed synchronously. The lazy free list
+ * will be reclaimed in a different bio.c thread. */
+#define LAZYFREE_THRESHOLD 64
+int dbAsyncDelete(redisDb *db, robj *key) {
+ /* Deleting an entry from the expires dict will not free the sds of
+ * the key, because it is shared with the main dictionary. */
+ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
+
+ /* If the value is composed of a few allocations, to free in a lazy way
+ * is actually just slower... So under a certain limit we just free
+ * the object synchronously. */
+ dictEntry *de = dictUnlink(db->dict,key->ptr);
+ if (de) {
+ robj *val = dictGetVal(de);
+ size_t free_effort = lazyfreeGetFreeEffort(val);
+
+ /* If releasing the object is too much work, let's put it into the
+ * lazy free list. */
+ if (free_effort > LAZYFREE_THRESHOLD) {
+ atomicIncr(lazyfree_objects,1);
+ bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL);
+ dictSetVal(db->dict,de,NULL);
+ }
+ }
+
+ /* Release the key-val pair, or just the key if we set the val
+ * field to NULL in order to lazy free it later. */
+ if (de) {
+ dictFreeUnlinkedEntry(db->dict,de);
+ if (server.cluster_enabled) slotToKeyDel(key);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Empty a Redis DB asynchronously. What the function does actually is to
+ * create a new empty set of hash tables and scheduling the old ones for
+ * lazy freeing. */
+void emptyDbAsync(redisDb *db) {
+ dict *oldht1 = db->dict, *oldht2 = db->expires;
+ db->dict = dictCreate(&dbDictType,NULL);
+ db->expires = dictCreate(&keyptrDictType,NULL);
+ atomicIncr(lazyfree_objects,dictSize(oldht1));
+ bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,oldht1,oldht2);
+}
+
+/* Empty the slots-keys map of Redis CLuster by creating a new empty one
+ * and scheduiling the old for lazy freeing. */
+void slotToKeyFlushAsync(void) {
+ rax *old = server.cluster->slots_to_keys;
+
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
+ atomicIncr(lazyfree_objects,old->numele);
+ bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,old);
+}
+
+/* Release objects from the lazyfree thread. It's just decrRefCount()
+ * updating the count of objects to release. */
+void lazyfreeFreeObjectFromBioThread(robj *o) {
+ decrRefCount(o);
+ atomicDecr(lazyfree_objects,1);
+}
+
+/* Release a database from the lazyfree thread. The 'db' pointer is the
+ * database which was substitutied with a fresh one in the main thread
+ * when the database was logically deleted. 'sl' is a skiplist used by
+ * Redis Cluster in order to take the hash slots -> keys mapping. This
+ * may be NULL if Redis Cluster is disabled. */
+void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) {
+ size_t numkeys = dictSize(ht1);
+ dictRelease(ht1);
+ dictRelease(ht2);
+ atomicDecr(lazyfree_objects,numkeys);
+}
+
+/* Release the skiplist mapping Redis Cluster keys to slots in the
+ * lazyfree thread. */
+void lazyfreeFreeSlotsMapFromBioThread(rax *rt) {
+ size_t len = rt->numele;
+ raxFree(rt);
+ atomicDecr(lazyfree_objects,len);
+}
diff --git a/src/lzfP.h b/src/lzfP.h
index c9eae3f6a..c6d2e096c 100644
--- a/src/lzfP.h
+++ b/src/lzfP.h
@@ -49,7 +49,7 @@
* the difference between 15 and 14 is very small
* for small blocks (and 14 is usually a bit faster).
* For a low-memory/faster configuration, use HLOG == 13;
- * For best compression, use 15 or 16 (or more, up to 23).
+ * For best compression, use 15 or 16 (or more, up to 22).
*/
#ifndef HLOG
# define HLOG 16
@@ -94,7 +94,7 @@
/*
* Avoid assigning values to errno variable? for some embedding purposes
* (linux kernel for example), this is necessary. NOTE: this breaks
- * the documentation in lzf.h.
+ * the documentation in lzf.h. Avoiding errno has no speed impact.
*/
#ifndef AVOID_ERRNO
# define AVOID_ERRNO 0
@@ -121,16 +121,52 @@
# define CHECK_INPUT 1
#endif
+/*
+ * Whether to store pointers or offsets inside the hash table. On
+ * 64 bit architetcures, pointers take up twice as much space,
+ * and might also be slower. Default is to autodetect.
+ */
+/*#define LZF_USER_OFFSETS autodetect */
+
/*****************************************************************************/
/* nothing should be changed below */
+#ifdef __cplusplus
+# include <cstring>
+# include <climits>
+using namespace std;
+#else
+# include <string.h>
+# include <limits.h>
+#endif
+
+#ifndef LZF_USE_OFFSETS
+# if defined (WIN32)
+# define LZF_USE_OFFSETS defined(_M_X64)
+# else
+# if __cplusplus > 199711L
+# include <cstdint>
+# else
+# include <stdint.h>
+# endif
+# define LZF_USE_OFFSETS (UINTPTR_MAX > 0xffffffffU)
+# endif
+#endif
+
typedef unsigned char u8;
-typedef const u8 *LZF_STATE[1 << (HLOG)];
+#if LZF_USE_OFFSETS
+# define LZF_HSLOT_BIAS ((const u8 *)in_data)
+ typedef unsigned int LZF_HSLOT;
+#else
+# define LZF_HSLOT_BIAS 0
+ typedef const u8 *LZF_HSLOT;
+#endif
+
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
#if !STRICT_ALIGN
/* for unaligned accesses we need a 16 bit datatype. */
-# include <limits.h>
# if USHRT_MAX == 65535
typedef unsigned short u16;
# elif UINT_MAX == 65535
@@ -142,17 +178,7 @@ typedef const u8 *LZF_STATE[1 << (HLOG)];
#endif
#if ULTRA_FAST
-# if defined(VERY_FAST)
-# undef VERY_FAST
-# endif
-#endif
-
-#if INIT_HTAB
-# ifdef __cplusplus
-# include <cstring>
-# else
-# include <string.h>
-# endif
+# undef VERY_FAST
#endif
#endif
diff --git a/src/lzf_c.c b/src/lzf_c.c
index 9e031ad0b..e9c69a0b8 100644
--- a/src/lzf_c.c
+++ b/src/lzf_c.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -40,8 +40,8 @@
/*
* don't play with this unless you benchmark!
- * decompression is not dependent on the hash function
- * the hashing function might seem strange, just believe me
+ * the data format is not dependent on the hash function.
+ * the hash function might seem strange, just believe me,
* it works ;)
*/
#ifndef FRST
@@ -89,9 +89,9 @@
/*
* compressed format
*
- * 000LLLLL <L+1> ; literal
- * LLLooooo oooooooo ; backref L
- * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ * 000LLLLL <L+1> ; literal, L+1=1..33 octets
+ * LLLooooo oooooooo ; backref L+1=1..7 octets, o+1=1..4096 offset
+ * 111ooooo LLLLLLLL oooooooo ; backref L+8 octets, o+1=1..4096 offset
*
*/
@@ -106,7 +106,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if !LZF_STATE_ARG
LZF_STATE htab;
#endif
- const u8 **hslot;
const u8 *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
const u8 *in_end = ip + in_len;
@@ -133,10 +132,6 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if INIT_HTAB
memset (htab, 0, sizeof (htab));
-# if 0
- for (hslot = htab; hslot < htab + HSIZE; hslot++)
- *hslot++ = ip;
-# endif
#endif
lit = 0; op++; /* start run */
@@ -144,24 +139,23 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
while (ip < in_end - 2)
{
+ LZF_HSLOT *hslot;
+
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
- ref = *hslot; *hslot = ip;
+ ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
&& ref < ip /* the next test will actually take care of this, but this is faster */
#endif
&& (off = ip - ref - 1) < MAX_OFF
- && ip + 4 < in_end
&& ref > (u8 *)in_data
-#if STRICT_ALIGN
- && ref[0] == ip[0]
- && ref[1] == ip[1]
&& ref[2] == ip[2]
+#if STRICT_ALIGN
+ && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])
#else
&& *(u16 *)ref == *(u16 *)ip
- && ref[2] == ip[2]
#endif
)
{
@@ -170,12 +164,13 @@ lzf_compress (const void *const in_data, unsigned int in_len,
unsigned int maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+ if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+ if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+ return 0;
+
op [- lit - 1] = lit - 1; /* stop run */
op -= !lit; /* undo run if length is zero */
- if (expect_false (op + 3 + 1 >= out_end))
- return 0;
-
for (;;)
{
if (expect_true (maxlen > 16))
@@ -222,6 +217,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
}
*op++ = off;
+
lit = 0; op++; /* start run */
ip += len + 1;
@@ -237,12 +233,12 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = FRST (ip);
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# if VERY_FAST && !ULTRA_FAST
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
# endif
#else
@@ -251,7 +247,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
do
{
hval = NEXT (hval, ip);
- htab[IDX (hval)] = ip;
+ htab[IDX (hval)] = ip - LZF_HSLOT_BIAS;
ip++;
}
while (len--);
diff --git a/src/lzf_d.c b/src/lzf_d.c
index 6c723f5e0..c32be8e87 100644
--- a/src/lzf_d.c
+++ b/src/lzf_d.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * Copyright (c) 2000-2010 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
@@ -43,14 +43,14 @@
# define SET_ERRNO(n) errno = (n)
#endif
-/*
+#if USE_REP_MOVSB /* small win on amd, big loss on intel */
#if (__i386 || __amd64) && __GNUC__ >= 3
# define lzf_movsb(dst, src, len) \
asm ("rep movsb" \
: "=D" (dst), "=S" (src), "=c" (len) \
: "0" (dst), "1" (src), "2" (len));
#endif
-*/
+#endif
unsigned int
lzf_decompress (const void *const in_data, unsigned int in_len,
@@ -86,9 +86,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
- do
- *op++ = *ip++;
- while (--ctrl);
+ switch (ctrl)
+ {
+ case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
+ case 28: *op++ = *ip++; case 27: *op++ = *ip++; case 26: *op++ = *ip++; case 25: *op++ = *ip++;
+ case 24: *op++ = *ip++; case 23: *op++ = *ip++; case 22: *op++ = *ip++; case 21: *op++ = *ip++;
+ case 20: *op++ = *ip++; case 19: *op++ = *ip++; case 18: *op++ = *ip++; case 17: *op++ = *ip++;
+ case 16: *op++ = *ip++; case 15: *op++ = *ip++; case 14: *op++ = *ip++; case 13: *op++ = *ip++;
+ case 12: *op++ = *ip++; case 11: *op++ = *ip++; case 10: *op++ = *ip++; case 9: *op++ = *ip++;
+ case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
+ case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
+ }
#endif
}
else /* back reference */
@@ -134,12 +142,39 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
len += 2;
lzf_movsb (op, ref, len);
#else
- *op++ = *ref++;
- *op++ = *ref++;
-
- do
- *op++ = *ref++;
- while (--len);
+ switch (len)
+ {
+ default:
+ len += 2;
+
+ if (op >= ref + len)
+ {
+ /* disjunct areas */
+ memcpy (op, ref, len);
+ op += len;
+ }
+ else
+ {
+ /* overlapping, use octte by octte copying */
+ do
+ *op++ = *ref++;
+ while (--len);
+ }
+
+ break;
+
+ case 9: *op++ = *ref++;
+ case 8: *op++ = *ref++;
+ case 7: *op++ = *ref++;
+ case 6: *op++ = *ref++;
+ case 5: *op++ = *ref++;
+ case 4: *op++ = *ref++;
+ case 3: *op++ = *ref++;
+ case 2: *op++ = *ref++;
+ case 1: *op++ = *ref++;
+ case 0: *op++ = *ref++; /* two octets more */
+ *op++ = *ref++;
+ }
#endif
}
}
diff --git a/src/memtest.c b/src/memtest.c
index cabfb5c89..a455430f5 100644
--- a/src/memtest.c
+++ b/src/memtest.c
@@ -26,7 +26,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
-
+#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -35,6 +35,9 @@
#include <errno.h>
#include <termios.h>
#include <sys/ioctl.h>
+#if defined(__sun)
+#include <stropts.h>
+#endif
#include "config.h"
#if (ULONG_MAX == 4294967295UL)
@@ -87,7 +90,7 @@ void memtest_progress_step(size_t curr, size_t size, char c) {
/* Test that addressing is fine. Every location is populated with its own
* address, and finally verified. This test is very fast but may detect
* ASAP big issues with the memory subsystem. */
-void memtest_addressing(unsigned long *l, size_t bytes) {
+int memtest_addressing(unsigned long *l, size_t bytes, int interactive) {
unsigned long words = bytes/sizeof(unsigned long);
unsigned long j, *p;
@@ -96,48 +99,60 @@ void memtest_addressing(unsigned long *l, size_t bytes) {
for (j = 0; j < words; j++) {
*p = (unsigned long)p;
p++;
- if ((j & 0xffff) == 0) memtest_progress_step(j,words*2,'A');
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j,words*2,'A');
}
/* Test */
p = l;
for (j = 0; j < words; j++) {
if (*p != (unsigned long)p) {
- printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
- (void*) p, *p);
- exit(1);
+ if (interactive) {
+ printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
+ (void*) p, *p);
+ exit(1);
+ }
+ return 1;
}
p++;
- if ((j & 0xffff) == 0) memtest_progress_step(j+words,words*2,'A');
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j+words,words*2,'A');
}
+ return 0;
}
/* Fill words stepping a single page at every write, so we continue to
* touch all the pages in the smallest amount of time reducing the
* effectiveness of caches, and making it hard for the OS to transfer
- * pages on the swap. */
-void memtest_fill_random(unsigned long *l, size_t bytes) {
+ * pages on the swap.
+ *
+ * In this test we can't call rand() since the system may be completely
+ * unable to handle library calls, so we have to resort to our own
+ * PRNG that only uses local state. We use an xorshift* PRNG. */
+#define xorshift64star_next() do { \
+ rseed ^= rseed >> 12; \
+ rseed ^= rseed << 25; \
+ rseed ^= rseed >> 27; \
+ rout = rseed * UINT64_C(2685821657736338717); \
+} while(0)
+
+void memtest_fill_random(unsigned long *l, size_t bytes, int interactive) {
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long iwords = words/step; /* words per iteration */
unsigned long off, w, *l1, *l2;
+ uint64_t rseed = UINT64_C(0xd13133de9afdb566); /* Just a random seed. */
+ uint64_t rout = 0;
assert((bytes & 4095) == 0);
for (off = 0; off < step; off++) {
l1 = l+off;
l2 = l1+words;
for (w = 0; w < iwords; w++) {
-#ifdef MEMTEST_32BIT
- *l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
- (((unsigned long) (rand()&0xffff)) << 16);
-#else
- *l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
- (((unsigned long) (rand()&0xffff)) << 16) |
- (((unsigned long) (rand()&0xffff)) << 32) |
- (((unsigned long) (rand()&0xffff)) << 48);
-#endif
+ xorshift64star_next();
+ *l1 = *l2 = (unsigned long) rout;
l1 += step;
l2 += step;
- if ((w & 0xffff) == 0)
+ if ((w & 0xffff) == 0 && interactive)
memtest_progress_step(w+iwords*off,words,'R');
}
}
@@ -146,7 +161,7 @@ void memtest_fill_random(unsigned long *l, size_t bytes) {
/* Like memtest_fill_random() but uses the two specified values to fill
* memory, in an alternated way (v1|v2|v1|v2|...) */
void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
- unsigned long v2, char sym)
+ unsigned long v2, char sym, int interactive)
{
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
@@ -170,13 +185,13 @@ void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
#endif
l1 += step;
l2 += step;
- if ((w & 0xffff) == 0)
+ if ((w & 0xffff) == 0 && interactive)
memtest_progress_step(w+iwords*off,words,sym);
}
}
}
-void memtest_compare(unsigned long *l, size_t bytes) {
+int memtest_compare(unsigned long *l, size_t bytes, int interactive) {
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long w, *l1, *l2;
@@ -185,84 +200,150 @@ void memtest_compare(unsigned long *l, size_t bytes) {
l2 = l1+words;
for (w = 0; w < words; w++) {
if (*l1 != *l2) {
- printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
- (void*)l1, (void*)l2, *l1, *l2);
- exit(1);
+ if (interactive) {
+ printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
+ (void*)l1, (void*)l2, *l1, *l2);
+ exit(1);
+ }
+ return 1;
}
l1 ++;
l2 ++;
- if ((w & 0xffff) == 0) memtest_progress_step(w,words,'=');
+ if ((w & 0xffff) == 0 && interactive)
+ memtest_progress_step(w,words,'=');
}
+ return 0;
}
-void memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times) {
+int memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times,
+ int interactive)
+{
int j;
+ int errors = 0;
for (j = 0; j < times; j++) {
- memtest_progress_start("Compare",pass);
- memtest_compare(m,bytes);
- memtest_progress_end();
+ if (interactive) memtest_progress_start("Compare",pass);
+ errors += memtest_compare(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
}
+ return errors;
}
-void memtest_test(size_t megabytes, int passes) {
- size_t bytes = megabytes*1024*1024;
- unsigned long *m = malloc(bytes);
+/* Test the specified memory. The number of bytes must be multiple of 4096.
+ * If interactive is true the program exists with an error and prints
+ * ASCII arts to show progresses. Instead when interactive is 0, it can
+ * be used as an API call, and returns 1 if memory errors were found or
+ * 0 if there were no errors detected. */
+int memtest_test(unsigned long *m, size_t bytes, int passes, int interactive) {
int pass = 0;
+ int errors = 0;
- if (m == NULL) {
- fprintf(stderr,"Unable to allocate %zu megabytes: %s",
- megabytes, strerror(errno));
- exit(1);
- }
while (pass != passes) {
pass++;
- memtest_progress_start("Addressing test",pass);
- memtest_addressing(m,bytes);
- memtest_progress_end();
+ if (interactive) memtest_progress_start("Addressing test",pass);
+ errors += memtest_addressing(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
- memtest_progress_start("Random fill",pass);
- memtest_fill_random(m,bytes);
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Random fill",pass);
+ memtest_fill_random(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
- memtest_progress_start("Solid fill",pass);
- memtest_fill_value(m,bytes,0,(unsigned long)-1,'S');
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Solid fill",pass);
+ memtest_fill_value(m,bytes,0,(unsigned long)-1,'S',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
- memtest_progress_start("Checkerboard fill",pass);
- memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C');
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Checkerboard fill",pass);
+ memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
}
+ return errors;
}
-void memtest_non_destructive_invert(void *addr, size_t size) {
- volatile unsigned long *p = addr;
- size_t words = size / sizeof(unsigned long);
- size_t j;
+/* A version of memtest_test() that tests memory in small pieces
+ * in order to restore the memory content at exit.
+ *
+ * One problem we have with this approach, is that the cache can avoid
+ * real memory accesses, and we can't test big chunks of memory at the
+ * same time, because we need to backup them on the stack (the allocator
+ * may not be usable or we may be already in an out of memory condition).
+ * So what we do is to try to trash the cache with useless memory accesses
+ * between the fill and compare cycles. */
+#define MEMTEST_BACKUP_WORDS (1024*(1024/sizeof(long)))
+/* Random accesses of MEMTEST_DECACHE_SIZE are performed at the start and
+ * end of the region between fill and compare cycles in order to trash
+ * the cache. */
+#define MEMTEST_DECACHE_SIZE (1024*8)
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes) {
+ unsigned long backup[MEMTEST_BACKUP_WORDS];
+ unsigned long *p = m;
+ unsigned long *end = (unsigned long*) (((unsigned char*)m)+(bytes-MEMTEST_DECACHE_SIZE));
+ size_t left = bytes;
+ int errors = 0;
- /* Invert */
- for (j = 0; j < words; j++)
- p[j] = ~p[j];
-}
+ if (bytes & 4095) return 0; /* Can't test across 4k page boundaries. */
+ if (bytes < 4096*2) return 0; /* Can't test a single page. */
+
+ while(left) {
+ /* If we have to test a single final page, go back a single page
+ * so that we can test two pages, since the code can't test a single
+ * page but at least two. */
+ if (left == 4096) {
+ left += 4096;
+ p -= 4096/sizeof(unsigned long);
+ }
-void memtest_non_destructive_swap(void *addr, size_t size) {
- volatile unsigned long *p = addr;
- size_t words = size / sizeof(unsigned long);
- size_t j;
+ int pass = 0;
+ size_t len = (left > sizeof(backup)) ? sizeof(backup) : left;
- /* Swap */
- for (j = 0; j < words; j += 2) {
- unsigned long a, b;
+ /* Always test an even number of pages. */
+ if (len/4096 % 2) len -= 4096;
+
+ memcpy(backup,p,len); /* Backup. */
+ while(pass != passes) {
+ pass++;
+ errors += memtest_addressing(p,len,0);
+ memtest_fill_random(p,len,0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,0,(unsigned long)-1,'S',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,ULONG_ONEZERO,ULONG_ZEROONE,'C',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ }
+ memcpy(p,backup,len); /* Restore. */
+ left -= len;
+ p += len/sizeof(unsigned long);
+ }
+ return errors;
+}
- a = p[j];
- b = p[j+1];
- p[j] = b;
- p[j+1] = a;
+/* Perform an interactive test allocating the specified number of megabytes. */
+void memtest_alloc_and_test(size_t megabytes, int passes) {
+ size_t bytes = megabytes*1024*1024;
+ unsigned long *m = malloc(bytes);
+
+ if (m == NULL) {
+ fprintf(stderr,"Unable to allocate %zu megabytes: %s",
+ megabytes, strerror(errno));
+ exit(1);
}
+ memtest_test(m,bytes,passes,1);
+ free(m);
}
void memtest(size_t megabytes, int passes) {
@@ -270,7 +351,7 @@ void memtest(size_t megabytes, int passes) {
ws.ws_col = 80;
ws.ws_row = 20;
}
- memtest_test(megabytes,passes);
+ memtest_alloc_and_test(megabytes,passes);
printf("\nYour memory passed this test.\n");
printf("Please if you are still in doubt use the following two tools:\n");
printf("1) memtest86: http://www.memtest86.com/\n");
diff --git a/src/module.c b/src/module.c
new file mode 100644
index 000000000..fda68b273
--- /dev/null
+++ b/src/module.c
@@ -0,0 +1,3932 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include <dlfcn.h>
+
+#define REDISMODULE_CORE 1
+#include "redismodule.h"
+
+/* --------------------------------------------------------------------------
+ * Private data structures used by the modules system. Those are data
+ * structures that are never exposed to Redis Modules, if not as void
+ * pointers that have an API the module can call with them)
+ * -------------------------------------------------------------------------- */
+
+/* This structure represents a module inside the system. */
+struct RedisModule {
+ void *handle; /* Module dlopen() handle. */
+ char *name; /* Module name. */
+ int ver; /* Module version. We use just progressive integers. */
+ int apiver; /* Module API version as requested during initialization.*/
+ list *types; /* Module data types. */
+};
+typedef struct RedisModule RedisModule;
+
+static dict *modules; /* Hash table of modules. SDS -> RedisModule ptr.*/
+
+/* Entries in the context->amqueue array, representing objects to free
+ * when the callback returns. */
+struct AutoMemEntry {
+ void *ptr;
+ int type;
+};
+
+/* AutMemEntry type field values. */
+#define REDISMODULE_AM_KEY 0
+#define REDISMODULE_AM_STRING 1
+#define REDISMODULE_AM_REPLY 2
+#define REDISMODULE_AM_FREED 3 /* Explicitly freed by user already. */
+
+/* The pool allocator block. Redis Modules can allocate memory via this special
+ * allocator that will automatically release it all once the callback returns.
+ * This means that it can only be used for ephemeral allocations. However
+ * there are two advantages for modules to use this API:
+ *
+ * 1) The memory is automatically released when the callback returns.
+ * 2) This allocator is faster for many small allocations since whole blocks
+ * are allocated, and small pieces returned to the caller just advancing
+ * the index of the allocation.
+ *
+ * Allocations are always rounded to the size of the void pointer in order
+ * to always return aligned memory chunks. */
+
+#define REDISMODULE_POOL_ALLOC_MIN_SIZE (1024*8)
+#define REDISMODULE_POOL_ALLOC_ALIGN (sizeof(void*))
+
+typedef struct RedisModulePoolAllocBlock {
+ uint32_t size;
+ uint32_t used;
+ struct RedisModulePoolAllocBlock *next;
+ char memory[];
+} RedisModulePoolAllocBlock;
+
+/* This structure represents the context in which Redis modules operate.
+ * Most APIs module can access, get a pointer to the context, so that the API
+ * implementation can hold state across calls, or remember what to free after
+ * the call and so forth.
+ *
+ * Note that not all the context structure is always filled with actual values
+ * but only the fields needed in a given context. */
+
+struct RedisModuleBlockedClient;
+
+struct RedisModuleCtx {
+ void *getapifuncptr; /* NOTE: Must be the first field. */
+ struct RedisModule *module; /* Module reference. */
+ client *client; /* Client calling a command. */
+ struct RedisModuleBlockedClient *blocked_client; /* Blocked client for
+ thread safe context. */
+ struct AutoMemEntry *amqueue; /* Auto memory queue of objects to free. */
+ int amqueue_len; /* Number of slots in amqueue. */
+ int amqueue_used; /* Number of used slots in amqueue. */
+ int flags; /* REDISMODULE_CTX_... flags. */
+ void **postponed_arrays; /* To set with RM_ReplySetArrayLength(). */
+ int postponed_arrays_count; /* Number of entries in postponed_arrays. */
+ void *blocked_privdata; /* Privdata set when unblocking a client. */
+
+ /* Used if there is the REDISMODULE_CTX_KEYS_POS_REQUEST flag set. */
+ int *keys_pos;
+ int keys_count;
+
+ struct RedisModulePoolAllocBlock *pa_head;
+};
+typedef struct RedisModuleCtx RedisModuleCtx;
+
+#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL, 0, NULL}
+#define REDISMODULE_CTX_MULTI_EMITTED (1<<0)
+#define REDISMODULE_CTX_AUTO_MEMORY (1<<1)
+#define REDISMODULE_CTX_KEYS_POS_REQUEST (1<<2)
+#define REDISMODULE_CTX_BLOCKED_REPLY (1<<3)
+#define REDISMODULE_CTX_BLOCKED_TIMEOUT (1<<4)
+#define REDISMODULE_CTX_THREAD_SAFE (1<<5)
+
+/* This represents a Redis key opened with RM_OpenKey(). */
+struct RedisModuleKey {
+ RedisModuleCtx *ctx;
+ redisDb *db;
+ robj *key; /* Key name object. */
+ robj *value; /* Value object, or NULL if the key was not found. */
+ void *iter; /* Iterator. */
+ int mode; /* Opening mode. */
+
+ /* Zset iterator. */
+ uint32_t ztype; /* REDISMODULE_ZSET_RANGE_* */
+ zrangespec zrs; /* Score range. */
+ zlexrangespec zlrs; /* Lex range. */
+ uint32_t zstart; /* Start pos for positional ranges. */
+ uint32_t zend; /* End pos for positional ranges. */
+ void *zcurrent; /* Zset iterator current node. */
+ int zer; /* Zset iterator end reached flag
+ (true if end was reached). */
+};
+typedef struct RedisModuleKey RedisModuleKey;
+
+/* RedisModuleKey 'ztype' values. */
+#define REDISMODULE_ZSET_RANGE_NONE 0 /* This must always be 0. */
+#define REDISMODULE_ZSET_RANGE_LEX 1
+#define REDISMODULE_ZSET_RANGE_SCORE 2
+#define REDISMODULE_ZSET_RANGE_POS 3
+
+/* Function pointer type of a function representing a command inside
+ * a Redis module. */
+typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, void **argv, int argc);
+
+/* This struct holds the information about a command registered by a module.*/
+struct RedisModuleCommandProxy {
+ struct RedisModule *module;
+ RedisModuleCmdFunc func;
+ struct redisCommand *rediscmd;
+};
+typedef struct RedisModuleCommandProxy RedisModuleCommandProxy;
+
+#define REDISMODULE_REPLYFLAG_NONE 0
+#define REDISMODULE_REPLYFLAG_TOPARSE (1<<0) /* Protocol must be parsed. */
+#define REDISMODULE_REPLYFLAG_NESTED (1<<1) /* Nested reply object. No proto
+ or struct free. */
+
+/* Reply of RM_Call() function. The function is filled in a lazy
+ * way depending on the function called on the reply structure. By default
+ * only the type, proto and protolen are filled. */
+typedef struct RedisModuleCallReply {
+ RedisModuleCtx *ctx;
+ int type; /* REDISMODULE_REPLY_... */
+ int flags; /* REDISMODULE_REPLYFLAG_... */
+ size_t len; /* Len of strings or num of elements of arrays. */
+ char *proto; /* Raw reply protocol. An SDS string at top-level object. */
+ size_t protolen;/* Length of protocol. */
+ union {
+ const char *str; /* String pointer for string and error replies. This
+ does not need to be freed, always points inside
+ a reply->proto buffer of the reply object or, in
+ case of array elements, of parent reply objects. */
+ long long ll; /* Reply value for integer reply. */
+ struct RedisModuleCallReply *array; /* Array of sub-reply elements. */
+ } val;
+} RedisModuleCallReply;
+
+/* Structure representing a blocked client. We get a pointer to such
+ * an object when blocking from modules. */
+typedef struct RedisModuleBlockedClient {
+ client *client; /* Pointer to the blocked client. or NULL if the client
+ was destroyed during the life of this object. */
+ RedisModule *module; /* Module blocking the client. */
+ RedisModuleCmdFunc reply_callback; /* Reply callback on normal completion.*/
+ RedisModuleCmdFunc timeout_callback; /* Reply callback on timeout. */
+ void (*free_privdata)(void *); /* privdata cleanup callback. */
+ void *privdata; /* Module private data that may be used by the reply
+ or timeout callback. It is set via the
+ RedisModule_UnblockClient() API. */
+ client *reply_client; /* Fake client used to accumulate replies
+ in thread safe contexts. */
+ int dbid; /* Database number selected by the original client. */
+} RedisModuleBlockedClient;
+
+static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER;
+static list *moduleUnblockedClients;
+
+/* We need a mutex that is unlocked / relocked in beforeSleep() in order to
+ * allow thread safe contexts to execute commands at a safe moment. */
+static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER;
+
+/* --------------------------------------------------------------------------
+ * Prototypes
+ * -------------------------------------------------------------------------- */
+
+void RM_FreeCallReply(RedisModuleCallReply *reply);
+void RM_CloseKey(RedisModuleKey *key);
+void autoMemoryCollect(RedisModuleCtx *ctx);
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap);
+void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx);
+void RM_ZsetRangeStop(RedisModuleKey *kp);
+static void zsetKeyReset(RedisModuleKey *key);
+
+/* --------------------------------------------------------------------------
+ * Heap allocation raw functions
+ * -------------------------------------------------------------------------- */
+
+/* Use like malloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using malloc(). */
+void *RM_Alloc(size_t bytes) {
+ return zmalloc(bytes);
+}
+
+/* Use like calloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using calloc() directly. */
+void *RM_Calloc(size_t nmemb, size_t size) {
+ return zcalloc(nmemb*size);
+}
+
+/* Use like realloc() for memory obtained with RedisModule_Alloc(). */
+void* RM_Realloc(void *ptr, size_t bytes) {
+ return zrealloc(ptr,bytes);
+}
+
+/* Use like free() for memory obtained by RedisModule_Alloc() and
+ * RedisModule_Realloc(). However you should never try to free with
+ * RedisModule_Free() memory allocated with malloc() inside your module. */
+void RM_Free(void *ptr) {
+ zfree(ptr);
+}
+
+/* Like strdup() but returns memory allocated with RedisModule_Alloc(). */
+char *RM_Strdup(const char *str) {
+ return zstrdup(str);
+}
+
+/* --------------------------------------------------------------------------
+ * Pool allocator
+ * -------------------------------------------------------------------------- */
+
+/* Release the chain of blocks used for pool allocations. */
+void poolAllocRelease(RedisModuleCtx *ctx) {
+ RedisModulePoolAllocBlock *head = ctx->pa_head, *next;
+
+ while(head != NULL) {
+ next = head->next;
+ zfree(head);
+ head = next;
+ }
+ ctx->pa_head = NULL;
+}
+
+/* Return heap allocated memory that will be freed automatically when the
+ * module callback function returns. Mostly suitable for small allocations
+ * that are short living and must be released when the callback returns
+ * anyway. The returned memory is aligned to the architecture word size
+ * if at least word size bytes are requested, otherwise it is just
+ * aligned to the next power of two, so for example a 3 bytes request is
+ * 4 bytes aligned while a 2 bytes request is 2 bytes aligned.
+ *
+ * There is no realloc style function since when this is needed to use the
+ * pool allocator is not a good idea.
+ *
+ * The function returns NULL if `bytes` is 0. */
+void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes) {
+ if (bytes == 0) return NULL;
+ RedisModulePoolAllocBlock *b = ctx->pa_head;
+ size_t left = b ? b->size - b->used : 0;
+
+ /* Fix alignment. */
+ if (left >= bytes) {
+ size_t alignment = REDISMODULE_POOL_ALLOC_ALIGN;
+ while (bytes < alignment && alignment/2 >= bytes) alignment /= 2;
+ if (b->used % alignment)
+ b->used += alignment - (b->used % alignment);
+ left = (b->used > b->size) ? 0 : b->size - b->used;
+ }
+
+ /* Create a new block if needed. */
+ if (left < bytes) {
+ size_t blocksize = REDISMODULE_POOL_ALLOC_MIN_SIZE;
+ if (blocksize < bytes) blocksize = bytes;
+ b = zmalloc(sizeof(*b) + blocksize);
+ b->size = blocksize;
+ b->used = 0;
+ b->next = ctx->pa_head;
+ ctx->pa_head = b;
+ }
+
+ char *retval = b->memory + b->used;
+ b->used += bytes;
+ return retval;
+}
+
+/* --------------------------------------------------------------------------
+ * Helpers for modules API implementation
+ * -------------------------------------------------------------------------- */
+
+/* Create an empty key of the specified type. 'kp' must point to a key object
+ * opened for writing where the .value member is set to NULL because the
+ * key was found to be non existing.
+ *
+ * On success REDISMODULE_OK is returned and the key is populated with
+ * the value of the specified type. The function fails and returns
+ * REDISMODULE_ERR if:
+ *
+ * 1) The key is not open for writing.
+ * 2) The key is not empty.
+ * 3) The specified type is unknown.
+ */
+int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
+ robj *obj;
+
+ /* The key must be open for writing and non existing to proceed. */
+ if (!(key->mode & REDISMODULE_WRITE) || key->value)
+ return REDISMODULE_ERR;
+
+ switch(type) {
+ case REDISMODULE_KEYTYPE_LIST:
+ obj = createQuicklistObject();
+ quicklistSetOptions(obj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+ break;
+ case REDISMODULE_KEYTYPE_ZSET:
+ obj = createZsetZiplistObject();
+ break;
+ case REDISMODULE_KEYTYPE_HASH:
+ obj = createHashObject();
+ break;
+ default: return REDISMODULE_ERR;
+ }
+ dbAdd(key->db,key->key,obj);
+ key->value = obj;
+ return REDISMODULE_OK;
+}
+
+/* This function is called in low-level API implementation functions in order
+ * to check if the value associated with the key remained empty after an
+ * operation that removed elements from an aggregate data type.
+ *
+ * If this happens, the key is deleted from the DB and the key object state
+ * is set to the right one in order to be targeted again by write operations
+ * possibly recreating the key if needed.
+ *
+ * The function returns 1 if the key value object is found empty and is
+ * deleted, otherwise 0 is returned. */
+int moduleDelKeyIfEmpty(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL) return 0;
+ int isempty;
+ robj *o = key->value;
+
+ switch(o->type) {
+ case OBJ_LIST: isempty = listTypeLength(o) == 0; break;
+ case OBJ_SET: isempty = setTypeSize(o) == 0; break;
+ case OBJ_ZSET: isempty = zsetLength(o) == 0; break;
+ case OBJ_HASH : isempty = hashTypeLength(o) == 0; break;
+ default: isempty = 0;
+ }
+
+ if (isempty) {
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Service API exported to modules
+ *
+ * Note that all the exported APIs are called RM_<funcname> in the core
+ * and RedisModule_<funcname> in the module side (defined as function
+ * pointers in redismodule.h). In this way the dynamic linker does not
+ * mess with our global function pointers, overriding it with the symbols
+ * defined in the main executable having the same names.
+ * -------------------------------------------------------------------------- */
+
+/* Lookup the requested module API and store the function pointer into the
+ * target pointer. The function returns REDISMODULE_ERR if there is no such
+ * named API, otherwise REDISMODULE_OK.
+ *
+ * This function is not meant to be used by modules developer, it is only
+ * used implicitly by including redismodule.h. */
+int RM_GetApi(const char *funcname, void **targetPtrPtr) {
+ dictEntry *he = dictFind(server.moduleapi, funcname);
+ if (!he) return REDISMODULE_ERR;
+ *targetPtrPtr = dictGetVal(he);
+ return REDISMODULE_OK;
+}
+
+/* Free the context after the user function was called. */
+void moduleFreeContext(RedisModuleCtx *ctx) {
+ autoMemoryCollect(ctx);
+ poolAllocRelease(ctx);
+ if (ctx->postponed_arrays) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays_count = 0;
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplyWithArray(REDISMODULE_POSTPONED_ARRAY_LEN) "
+ "not matched by the same number of RedisModule_SetReplyArrayLen() "
+ "calls.",
+ ctx->module->name);
+ }
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) freeClient(ctx->client);
+}
+
+/* Helper function for when a command callback is called, in order to handle
+ * details needed to correctly replicate commands. */
+void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) {
+ client *c = ctx->client;
+
+ /* We don't want any automatic propagation here since in modules we handle
+ * replication / AOF propagation in explicit ways. */
+ preventCommandPropagation(c);
+
+ /* Handle the replication of the final EXEC, since whatever a command
+ * emits is always wrappered around MULTI/EXEC. */
+ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) {
+ robj *propargv[1];
+ propargv[0] = createStringObject("EXEC",4);
+ alsoPropagate(server.execCommand,c->db->id,propargv,1,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(propargv[0]);
+ }
+}
+
+/* This Redis command binds the normal Redis command invocation with commands
+ * exported by modules. */
+void RedisModuleCommandDispatcher(client *c) {
+ RedisModuleCommandProxy *cp = (void*)(unsigned long)c->cmd->getkeys_proc;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ ctx.module = cp->module;
+ ctx.client = c;
+ cp->func(&ctx,(void**)c->argv,c->argc);
+ moduleHandlePropagationAfterCommandCallback(&ctx);
+ moduleFreeContext(&ctx);
+}
+
+/* This function returns the list of keys, with the same interface as the
+ * 'getkeys' function of the native commands, for module commands that exported
+ * the "getkeys-api" flag during the registration. This is done when the
+ * list of keys are not at fixed positions, so that first/last/step cannot
+ * be used.
+ *
+ * In order to accomplish its work, the module command is called, flagging
+ * the context in a way that the command can recognize this is a special
+ * "get keys" call by calling RedisModule_IsKeysPositionRequest(ctx). */
+int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ RedisModuleCommandProxy *cp = (void*)(unsigned long)cmd->getkeys_proc;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ ctx.module = cp->module;
+ ctx.client = NULL;
+ ctx.flags |= REDISMODULE_CTX_KEYS_POS_REQUEST;
+ cp->func(&ctx,(void**)argv,argc);
+ int *res = ctx.keys_pos;
+ if (numkeys) *numkeys = ctx.keys_count;
+ moduleFreeContext(&ctx);
+ return res;
+}
+
+/* Return non-zero if a module command, that was declared with the
+ * flag "getkeys-api", is called in a special way to get the keys positions
+ * and not to get executed. Otherwise zero is returned. */
+int RM_IsKeysPositionRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST) != 0;
+}
+
+/* When a module command is called in order to obtain the position of
+ * keys, since it was flagged as "getkeys-api" during the registration,
+ * the command implementation checks for this special call using the
+ * RedisModule_IsKeysPositionRequest() API and uses this function in
+ * order to report keys, like in the following example:
+ *
+ * if (RedisModule_IsKeysPositionRequest(ctx)) {
+ * RedisModule_KeyAtPos(ctx,1);
+ * RedisModule_KeyAtPos(ctx,2);
+ * }
+ *
+ * Note: in the example below the get keys API would not be needed since
+ * keys are at fixed positions. This interface is only used for commands
+ * with a more complex structure. */
+void RM_KeyAtPos(RedisModuleCtx *ctx, int pos) {
+ if (!(ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST)) return;
+ if (pos <= 0) return;
+ ctx->keys_pos = zrealloc(ctx->keys_pos,sizeof(int)*(ctx->keys_count+1));
+ ctx->keys_pos[ctx->keys_count++] = pos;
+}
+
+/* Helper for RM_CreateCommand(). Truns a string representing command
+ * flags into the command flags used by the Redis core.
+ *
+ * It returns the set of flags, or -1 if unknown flags are found. */
+int commandFlagsFromString(char *s) {
+ int count, j;
+ int flags = 0;
+ sds *tokens = sdssplitlen(s,strlen(s)," ",1,&count);
+ for (j = 0; j < count; j++) {
+ char *t = tokens[j];
+ if (!strcasecmp(t,"write")) flags |= CMD_WRITE;
+ else if (!strcasecmp(t,"readonly")) flags |= CMD_READONLY;
+ else if (!strcasecmp(t,"admin")) flags |= CMD_ADMIN;
+ else if (!strcasecmp(t,"deny-oom")) flags |= CMD_DENYOOM;
+ else if (!strcasecmp(t,"deny-script")) flags |= CMD_NOSCRIPT;
+ else if (!strcasecmp(t,"allow-loading")) flags |= CMD_LOADING;
+ else if (!strcasecmp(t,"pubsub")) flags |= CMD_PUBSUB;
+ else if (!strcasecmp(t,"random")) flags |= CMD_RANDOM;
+ else if (!strcasecmp(t,"allow-stale")) flags |= CMD_STALE;
+ else if (!strcasecmp(t,"no-monitor")) flags |= CMD_SKIP_MONITOR;
+ else if (!strcasecmp(t,"fast")) flags |= CMD_FAST;
+ else if (!strcasecmp(t,"getkeys-api")) flags |= CMD_MODULE_GETKEYS;
+ else if (!strcasecmp(t,"no-cluster")) flags |= CMD_MODULE_NO_CLUSTER;
+ else break;
+ }
+ sdsfreesplitres(tokens,count);
+ if (j != count) return -1; /* Some token not processed correctly. */
+ return flags;
+}
+
+/* Register a new command in the Redis server, that will be handled by
+ * calling the function pointer 'func' using the RedisModule calling
+ * convention. The function returns REDISMODULE_ERR if the specified command
+ * name is already busy or a set of invalid flags were passed, otherwise
+ * REDISMODULE_OK is returned and the new command is registered.
+ *
+ * This function must be called during the initialization of the module
+ * inside the RedisModule_OnLoad() function. Calling this function outside
+ * of the initialization function is not defined.
+ *
+ * The command function type is the following:
+ *
+ * int MyCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+ *
+ * And is supposed to always return REDISMODULE_OK.
+ *
+ * The set of flags 'strflags' specify the behavior of the command, and should
+ * be passed as a C string compoesd of space separated words, like for
+ * example "write deny-oom". The set of flags are:
+ *
+ * * **"write"**: The command may modify the data set (it may also read
+ * from it).
+ * * **"readonly"**: The command returns data from keys but never writes.
+ * * **"admin"**: The command is an administrative command (may change
+ * replication or perform similar tasks).
+ * * **"deny-oom"**: The command may use additional memory and should be
+ * denied during out of memory conditions.
+ * * **"deny-script"**: Don't allow this command in Lua scripts.
+ * * **"allow-loading"**: Allow this command while the server is loading data.
+ * Only commands not interacting with the data set
+ * should be allowed to run in this mode. If not sure
+ * don't use this flag.
+ * * **"pubsub"**: The command publishes things on Pub/Sub channels.
+ * * **"random"**: The command may have different outputs even starting
+ * from the same input arguments and key values.
+ * * **"allow-stale"**: The command is allowed to run on slaves that don't
+ * serve stale data. Don't use if you don't know what
+ * this means.
+ * * **"no-monitor"**: Don't propoagate the command on monitor. Use this if
+ * the command has sensible data among the arguments.
+ * * **"fast"**: The command time complexity is not greater
+ * than O(log(N)) where N is the size of the collection or
+ * anything else representing the normal scalability
+ * issue with the command.
+ * * **"getkeys-api"**: The command implements the interface to return
+ * the arguments that are keys. Used when start/stop/step
+ * is not enough because of the command syntax.
+ * * **"no-cluster"**: The command should not register in Redis Cluster
+ * since is not designed to work with it because, for
+ * example, is unable to report the position of the
+ * keys, programmatically creates key names, or any
+ * other reason.
+ */
+int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) {
+ int flags = strflags ? commandFlagsFromString((char*)strflags) : 0;
+ if (flags == -1) return REDISMODULE_ERR;
+ if ((flags & CMD_MODULE_NO_CLUSTER) && server.cluster_enabled)
+ return REDISMODULE_ERR;
+
+ struct redisCommand *rediscmd;
+ RedisModuleCommandProxy *cp;
+ sds cmdname = sdsnew(name);
+
+ /* Check if the command name is busy. */
+ if (lookupCommand((char*)name) != NULL) {
+ sdsfree(cmdname);
+ return REDISMODULE_ERR;
+ }
+
+ /* Create a command "proxy", which is a structure that is referenced
+ * in the command table, so that the generic command that works as
+ * binding between modules and Redis, can know what function to call
+ * and what the module is.
+ *
+ * Note that we use the Redis command table 'getkeys_proc' in order to
+ * pass a reference to the command proxy structure. */
+ cp = zmalloc(sizeof(*cp));
+ cp->module = ctx->module;
+ cp->func = cmdfunc;
+ cp->rediscmd = zmalloc(sizeof(*rediscmd));
+ cp->rediscmd->name = cmdname;
+ cp->rediscmd->proc = RedisModuleCommandDispatcher;
+ cp->rediscmd->arity = -1;
+ cp->rediscmd->flags = flags | CMD_MODULE;
+ cp->rediscmd->getkeys_proc = (redisGetKeysProc*)(unsigned long)cp;
+ cp->rediscmd->firstkey = firstkey;
+ cp->rediscmd->lastkey = lastkey;
+ cp->rediscmd->keystep = keystep;
+ cp->rediscmd->microseconds = 0;
+ cp->rediscmd->calls = 0;
+ dictAdd(server.commands,sdsdup(cmdname),cp->rediscmd);
+ dictAdd(server.orig_commands,sdsdup(cmdname),cp->rediscmd);
+ return REDISMODULE_OK;
+}
+
+/* Called by RM_Init() to setup the `ctx->module` structure.
+ *
+ * This is an internal function, Redis modules developers don't need
+ * to use it. */
+void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver){
+ RedisModule *module;
+
+ if (ctx->module != NULL) return;
+ module = zmalloc(sizeof(*module));
+ module->name = sdsnew((char*)name);
+ module->ver = ver;
+ module->apiver = apiver;
+ module->types = listCreate();
+ ctx->module = module;
+}
+
+/* Return the current UNIX time in milliseconds. */
+long long RM_Milliseconds(void) {
+ return mstime();
+}
+
+/* --------------------------------------------------------------------------
+ * Automatic memory management for modules
+ * -------------------------------------------------------------------------- */
+
+/* Enable automatic memory management. See API.md for more information.
+ *
+ * The function must be called as the first function of a command implementation
+ * that wants to use automatic memory. */
+void RM_AutoMemory(RedisModuleCtx *ctx) {
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+}
+
+/* Add a new object to release automatically when the callback returns. */
+void autoMemoryAdd(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ if (ctx->amqueue_used == ctx->amqueue_len) {
+ ctx->amqueue_len *= 2;
+ if (ctx->amqueue_len < 16) ctx->amqueue_len = 16;
+ ctx->amqueue = zrealloc(ctx->amqueue,sizeof(struct AutoMemEntry)*ctx->amqueue_len);
+ }
+ ctx->amqueue[ctx->amqueue_used].type = type;
+ ctx->amqueue[ctx->amqueue_used].ptr = ptr;
+ ctx->amqueue_used++;
+}
+
+/* Mark an object as freed in the auto release queue, so that users can still
+ * free things manually if they want.
+ *
+ * The function returns 1 if the object was actually found in the auto memory
+ * pool, otherwise 0 is returned. */
+int autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return 0;
+
+ int count = (ctx->amqueue_used+1)/2;
+ for (int j = 0; j < count; j++) {
+ for (int side = 0; side < 2; side++) {
+ /* For side = 0 check right side of the array, for
+ * side = 1 check the left side instead (zig-zag scanning). */
+ int i = (side == 0) ? (ctx->amqueue_used - 1 - j) : j;
+ if (ctx->amqueue[i].type == type &&
+ ctx->amqueue[i].ptr == ptr)
+ {
+ ctx->amqueue[i].type = REDISMODULE_AM_FREED;
+
+ /* Switch the freed element and the last element, to avoid growing
+ * the queue unnecessarily if we allocate/free in a loop */
+ if (i != ctx->amqueue_used-1) {
+ ctx->amqueue[i] = ctx->amqueue[ctx->amqueue_used-1];
+ }
+
+ /* Reduce the size of the queue because we either moved the top
+ * element elsewhere or freed it */
+ ctx->amqueue_used--;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Release all the objects in queue. */
+void autoMemoryCollect(RedisModuleCtx *ctx) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ /* Clear the AUTO_MEMORY flag from the context, otherwise the functions
+ * we call to free the resources, will try to scan the auto release
+ * queue to mark the entries as freed. */
+ ctx->flags &= ~REDISMODULE_CTX_AUTO_MEMORY;
+ int j;
+ for (j = 0; j < ctx->amqueue_used; j++) {
+ void *ptr = ctx->amqueue[j].ptr;
+ switch(ctx->amqueue[j].type) {
+ case REDISMODULE_AM_STRING: decrRefCount(ptr); break;
+ case REDISMODULE_AM_REPLY: RM_FreeCallReply(ptr); break;
+ case REDISMODULE_AM_KEY: RM_CloseKey(ptr); break;
+ }
+ }
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+ zfree(ctx->amqueue);
+ ctx->amqueue = NULL;
+ ctx->amqueue_len = 0;
+ ctx->amqueue_used = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * String objects APIs
+ * -------------------------------------------------------------------------- */
+
+/* Create a new module string object. The returned string must be freed
+ * with RedisModule_FreeString(), unless automatic memory is enabled.
+ *
+ * The string is created by copying the `len` bytes starting
+ * at `ptr`. No reference is retained to the passed buffer. */
+RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len) {
+ RedisModuleString *o = createStringObject(ptr,len);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+
+/* Create a new module string object from a printf format and arguments.
+ * The returned string must be freed with RedisModule_FreeString(), unless
+ * automatic memory is enabled.
+ *
+ * The string is created using the sds formatter function sdscatvprintf(). */
+RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...) {
+ sds s = sdsempty();
+
+ va_list ap;
+ va_start(ap, fmt);
+ s = sdscatvprintf(s, fmt, ap);
+ va_end(ap);
+
+ RedisModuleString *o = createObject(OBJ_STRING, s);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+
+ return o;
+}
+
+
+/* Like RedisModule_CreatString(), but creates a string starting from a long long
+ * integer instead of taking a buffer and its length.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreatString(), but creates a string starting from another
+ * RedisModuleString.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str) {
+ RedisModuleString *o = dupStringObject(str);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+/* Free a module string object obtained with one of the Redis modules API calls
+ * that return new string objects.
+ *
+ * It is possible to call this function even when automatic memory management
+ * is enabled. In that case the string will be released ASAP and removed
+ * from the pool of string to release at the end. */
+void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ decrRefCount(str);
+ autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str);
+}
+
+/* Every call to this function, will make the string 'str' requiring
+ * an additional call to RedisModule_FreeString() in order to really
+ * free the string. Note that the automatic freeing of the string obtained
+ * enabling modules automatic memory management counts for one
+ * RedisModule_FreeString() call (it is just executed automatically).
+ *
+ * Normally you want to call this function when, at the same time
+ * the following conditions are true:
+ *
+ * 1) You have automatic memory management enabled.
+ * 2) You want to create string objects.
+ * 3) Those string objects you create need to live *after* the callback
+ * function(for example a command implementation) creating them returns.
+ *
+ * Usually you want this in order to store the created string object
+ * into your own data structure, for example when implementing a new data
+ * type.
+ *
+ * Note that when memory management is turned off, you don't need
+ * any call to RetainString() since creating a string will always result
+ * into a string that lives after the callback function returns, if
+ * no FreeString() call is performed. */
+void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ if (!autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) {
+ /* Increment the string reference counting only if we can't
+ * just remove the object from the list of objects that should
+ * be reclaimed. Why we do that, instead of just incrementing
+ * the refcount in any case, and let the automatic FreeString()
+ * call at the end to bring the refcount back at the desired
+ * value? Because this way we ensure that the object refcount
+ * value is 1 (instead of going to 2 to be dropped later to 1)
+ * after the call to this function. This is needed for functions
+ * like RedisModule_StringAppendBuffer() to work. */
+ incrRefCount(str);
+ }
+}
+
+/* Given a string module object, this function returns the string pointer
+ * and length of the string. The returned pointer and length should only
+ * be used for read only accesses and never modified. */
+const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) {
+ if (str == NULL) {
+ const char *errmsg = "(NULL string reply referenced in module)";
+ if (len) *len = strlen(errmsg);
+ return errmsg;
+ }
+ if (len) *len = sdslen(str->ptr);
+ return str->ptr;
+}
+
+/* --------------------------------------------------------------------------
+ * Higher level string operations
+ * ------------------------------------------------------------------------- */
+
+/* Convert the string into a long long integer, storing it at `*ll`.
+ * Returns REDISMODULE_OK on success. If the string can't be parsed
+ * as a valid, strict long long (no spaces before/after), REDISMODULE_ERR
+ * is returned. */
+int RM_StringToLongLong(const RedisModuleString *str, long long *ll) {
+ return string2ll(str->ptr,sdslen(str->ptr),ll) ? REDISMODULE_OK :
+ REDISMODULE_ERR;
+}
+
+/* Convert the string into a double, storing it at `*d`.
+ * Returns REDISMODULE_OK on success or REDISMODULE_ERR if the string is
+ * not a valid string representation of a double value. */
+int RM_StringToDouble(const RedisModuleString *str, double *d) {
+ int retval = getDoubleFromObject(str,d);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Compare two string objects, returning -1, 0 or 1 respectively if
+ * a < b, a == b, a > b. Strings are compared byte by byte as two
+ * binary blobs without any encoding care / collation attempt. */
+int RM_StringCompare(RedisModuleString *a, RedisModuleString *b) {
+ return compareStringObjects(a,b);
+}
+
+/* Return the (possibly modified in encoding) input 'str' object if
+ * the string is unshared, otherwise NULL is returned. */
+RedisModuleString *moduleAssertUnsharedString(RedisModuleString *str) {
+ if (str->refcount != 1) {
+ serverLog(LL_WARNING,
+ "Module attempted to use an in-place string modify operation "
+ "with a string referenced multiple times. Please check the code "
+ "for API usage correctness.");
+ return NULL;
+ }
+ if (str->encoding == OBJ_ENCODING_EMBSTR) {
+ /* Note: here we "leak" the additional allocation that was
+ * used in order to store the embedded string in the object. */
+ str->ptr = sdsnewlen(str->ptr,sdslen(str->ptr));
+ str->encoding = OBJ_ENCODING_RAW;
+ } else if (str->encoding == OBJ_ENCODING_INT) {
+ /* Convert the string from integer to raw encoding. */
+ str->ptr = sdsfromlonglong((long)str->ptr);
+ str->encoding = OBJ_ENCODING_RAW;
+ }
+ return str;
+}
+
+/* Append the specified buffere to the string 'str'. The string must be a
+ * string created by the user that is referenced only a single time, otherwise
+ * REDISMODULE_ERR is returend and the operation is not performed. */
+int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) {
+ UNUSED(ctx);
+ str = moduleAssertUnsharedString(str);
+ if (str == NULL) return REDISMODULE_ERR;
+ str->ptr = sdscatlen(str->ptr,buf,len);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Reply APIs
+ *
+ * Most functions always return REDISMODULE_OK so you can use it with
+ * 'return' in order to return from the command implementation with:
+ *
+ * if (... some condition ...)
+ * return RM_ReplyWithLongLong(ctx,mycount);
+ * -------------------------------------------------------------------------- */
+
+/* Send an error about the number of arguments given to the command,
+ * citing the command name in the error message.
+ *
+ * Example:
+ *
+ * if (argc != 3) return RedisModule_WrongArity(ctx);
+ */
+int RM_WrongArity(RedisModuleCtx *ctx) {
+ addReplyErrorFormat(ctx->client,
+ "wrong number of arguments for '%s' command",
+ (char*)ctx->client->argv[0]->ptr);
+ return REDISMODULE_OK;
+}
+
+/* Return the client object the `RM_Reply*` functions should target.
+ * Normally this is just `ctx->client`, that is the client that called
+ * the module command, however in the case of thread safe contexts there
+ * is no directly associated client (since it would not be safe to access
+ * the client from a thread), so instead the blocked client object referenced
+ * in the thread safe context, has a fake client that we just use to accumulate
+ * the replies. Later, when the client is unblocked, the accumulated replies
+ * are appended to the actual client.
+ *
+ * The function returns the client pointer depending on the context, or
+ * NULL if there is no potential client. This happens when we are in the
+ * context of a thread safe context that was not initialized with a blocked
+ * client object. */
+client *moduleGetReplyClient(RedisModuleCtx *ctx) {
+ if (!(ctx->flags & REDISMODULE_CTX_THREAD_SAFE) && ctx->client)
+ return ctx->client;
+ if (ctx->blocked_client)
+ return ctx->blocked_client->reply_client;
+ return NULL;
+}
+
+/* Send an integer reply to the client, with the specified long long value.
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyLongLong(c,ll);
+ return REDISMODULE_OK;
+}
+
+/* Reply with an error or simple string (status message). Used to implement
+ * ReplyWithSimpleString() and ReplyWithError().
+ * The function always returns REDISMODULE_OK. */
+int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ sds strmsg = sdsnewlen(prefix,1);
+ strmsg = sdscat(strmsg,msg);
+ strmsg = sdscatlen(strmsg,"\r\n",2);
+ addReplySds(c,strmsg);
+ return REDISMODULE_OK;
+}
+
+/* Reply with the error 'err'.
+ *
+ * Note that 'err' must contain all the error, including
+ * the initial error code. The function only provides the initial "-", so
+ * the usage is, for example:
+ *
+ * RedisModule_ReplyWithError(ctx,"ERR Wrong Type");
+ *
+ * and not just:
+ *
+ * RedisModule_ReplyWithError(ctx,"Wrong Type");
+ *
+ * The function always returns REDISMODULE_OK.
+ */
+int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
+ return replyWithStatus(ctx,err,"-");
+}
+
+/* Reply with a simple string (+... \r\n in RESP protocol). This replies
+ * are suitable only when sending a small non-binary string with small
+ * overhead, like "OK" or similar replies.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg) {
+ return replyWithStatus(ctx,msg,"+");
+}
+
+/* Reply with an array type of 'len' elements. However 'len' other calls
+ * to `ReplyWith*` style functions must follow in order to emit the elements
+ * of the array.
+ *
+ * When producing arrays with a number of element that is not known beforehand
+ * the function can be called with the special count
+ * REDISMODULE_POSTPONED_ARRAY_LEN, and the actual number of elements can be
+ * later set with RedisModule_ReplySetArrayLength() (which will set the
+ * latest "open" count if there are multiple ones).
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ if (len == REDISMODULE_POSTPONED_ARRAY_LEN) {
+ ctx->postponed_arrays = zrealloc(ctx->postponed_arrays,sizeof(void*)*
+ (ctx->postponed_arrays_count+1));
+ ctx->postponed_arrays[ctx->postponed_arrays_count] =
+ addDeferredMultiBulkLength(c);
+ ctx->postponed_arrays_count++;
+ } else {
+ addReplyMultiBulkLen(c,len);
+ }
+ return REDISMODULE_OK;
+}
+
+/* When RedisModule_ReplyWithArray() is used with the argument
+ * REDISMODULE_POSTPONED_ARRAY_LEN, because we don't know beforehand the number
+ * of items we are going to output as elements of the array, this function
+ * will take care to set the array length.
+ *
+ * Since it is possible to have multiple array replies pending with unknown
+ * length, this function guarantees to always set the latest array length
+ * that was created in a postponed way.
+ *
+ * For example in order to output an array like [1,[10,20,30]] we
+ * could write:
+ *
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,1);
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,10);
+ * RedisModule_ReplyWithLongLong(ctx,20);
+ * RedisModule_ReplyWithLongLong(ctx,30);
+ * RedisModule_ReplySetArrayLength(ctx,3); // Set len of 10,20,30 array.
+ * RedisModule_ReplySetArrayLength(ctx,2); // Set len of top array
+ *
+ * Note that in the above example there is no reason to postpone the array
+ * length, since we produce a fixed number of elements, but in the practice
+ * the code may use an interator or other ways of creating the output so
+ * that is not easy to calculate in advance the number of elements.
+ */
+void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return;
+ if (ctx->postponed_arrays_count == 0) {
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplySetArrayLength() called without previous "
+ "RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN) "
+ "call.", ctx->module->name);
+ return;
+ }
+ ctx->postponed_arrays_count--;
+ setDeferredMultiBulkLength(c,
+ ctx->postponed_arrays[ctx->postponed_arrays_count],
+ len);
+ if (ctx->postponed_arrays_count == 0) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays = NULL;
+ }
+}
+
+/* Reply with a bulk string, taking in input a C buffer pointer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulkCBuffer(c,(char*)buf,len);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a bulk string, taking in input a RedisModuleString object.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulk(c,str);
+ return REDISMODULE_OK;
+}
+
+/* Reply to the client with a NULL. In the RESP protocol a NULL is encoded
+ * as the string "$-1\r\n".
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithNull(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReply(c,shared.nullbulk);
+ return REDISMODULE_OK;
+}
+
+/* Reply exactly what a Redis command returned us with RedisModule_Call().
+ * This function is useful when we use RedisModule_Call() in order to
+ * execute some command, as we want to reply to the client exactly the
+ * same reply we obtained by the command.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ sds proto = sdsnewlen(reply->proto, reply->protolen);
+ addReplySds(c,proto);
+ return REDISMODULE_OK;
+}
+
+/* Send a string reply obtained converting the double 'd' into a bulk string.
+ * This function is basically equivalent to converting a double into
+ * a string into a C buffer, and then calling the function
+ * RedisModule_ReplyWithStringBuffer() with the buffer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyDouble(c,d);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Commands replication API
+ * -------------------------------------------------------------------------- */
+
+/* Helper function to replicate MULTI the first time we replicate something
+ * in the context of a command execution. EXEC will be handled by the
+ * RedisModuleCommandDispatcher() function. */
+void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) {
+ /* If we already emitted MULTI return ASAP. */
+ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) return;
+ /* If this is a thread safe context, we do not want to wrap commands
+ * executed into MUTLI/EXEC, they are executed as single commands
+ * from an external client in essence. */
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) return;
+ execCommandPropagateMulti(ctx->client);
+ ctx->flags |= REDISMODULE_CTX_MULTI_EMITTED;
+}
+
+/* Replicate the specified command and arguments to slaves and AOF, as effect
+ * of execution of the calling command implementation.
+ *
+ * The replicated commands are always wrapped into the MULTI/EXEC that
+ * contains all the commands replicated in a given module command
+ * execution. However the commands replicated with RedisModule_Call()
+ * are the first items, the ones replicated with RedisModule_Replicate()
+ * will all follow before the EXEC.
+ *
+ * Modules should try to use one interface or the other.
+ *
+ * This command follows exactly the same interface of RedisModule_Call(),
+ * so a set of format specifiers must be passed, followed by arguments
+ * matching the provided format specifiers.
+ *
+ * Please refer to RedisModule_Call() for more information.
+ *
+ * The command returns REDISMODULE_ERR if the format specifiers are invalid
+ * or the command name does not belong to a known command. */
+int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) return REDISMODULE_ERR;
+
+ /* Create the client and dispatch the command. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) return REDISMODULE_ERR;
+
+ /* Replicate! */
+ moduleReplicateMultiIfNeeded(ctx);
+ alsoPropagate(cmd,ctx->client->db->id,argv,argc,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+
+ /* Release the argv. */
+ for (j = 0; j < argc; j++) decrRefCount(argv[j]);
+ zfree(argv);
+ return REDISMODULE_OK;
+}
+
+/* This function will replicate the command exactly as it was invoked
+ * by the client. Note that this function will not wrap the command into
+ * a MULTI/EXEC stanza, so it should not be mixed with other replication
+ * commands.
+ *
+ * Basically this form of replication is useful when you want to propagate
+ * the command to the slaves and AOF file exactly as it was called, since
+ * the command can just be re-executed to deterministically re-create the
+ * new state starting from the old one.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplicateVerbatim(RedisModuleCtx *ctx) {
+ alsoPropagate(ctx->client->cmd,ctx->client->db->id,
+ ctx->client->argv,ctx->client->argc,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * DB and Key APIs -- Generic API
+ * -------------------------------------------------------------------------- */
+
+/* Return the ID of the current client calling the currently active module
+ * command. The returned ID has a few guarantees:
+ *
+ * 1. The ID is different for each different client, so if the same client
+ * executes a module command multiple times, it can be recognized as
+ * having the same ID, otherwise the ID will be different.
+ * 2. The ID increases monotonically. Clients connecting to the server later
+ * are guaranteed to get IDs greater than any past ID previously seen.
+ *
+ * Valid IDs are from 1 to 2^64-1. If 0 is returned it means there is no way
+ * to fetch the ID in the context the function was currently called. */
+unsigned long long RM_GetClientId(RedisModuleCtx *ctx) {
+ if (ctx->client == NULL) return 0;
+ return ctx->client->id;
+}
+
+/* Return the currently selected DB. */
+int RM_GetSelectedDb(RedisModuleCtx *ctx) {
+ return ctx->client->db->id;
+}
+
+/* Change the currently selected DB. Returns an error if the id
+ * is out of range.
+ *
+ * Note that the client will retain the currently selected DB even after
+ * the Redis command implemented by the module calling this function
+ * returns.
+ *
+ * If the module command wishes to change something in a different DB and
+ * returns back to the original one, it should call RedisModule_GetSelectedDb()
+ * before in order to restore the old DB number before returning. */
+int RM_SelectDb(RedisModuleCtx *ctx, int newid) {
+ int retval = selectDb(ctx->client,newid);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Return an handle representing a Redis key, so that it is possible
+ * to call other APIs with the key handle as argument to perform
+ * operations on the key.
+ *
+ * The return value is the handle repesenting the key, that must be
+ * closed with RM_CloseKey().
+ *
+ * If the key does not exist and WRITE mode is requested, the handle
+ * is still returned, since it is possible to perform operations on
+ * a yet not existing key (that will be created, for example, after
+ * a list push operation). If the mode is just READ instead, and the
+ * key does not exist, NULL is returned. However it is still safe to
+ * call RedisModule_CloseKey() and RedisModule_KeyType() on a NULL
+ * value. */
+void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) {
+ RedisModuleKey *kp;
+ robj *value;
+
+ if (mode & REDISMODULE_WRITE) {
+ value = lookupKeyWrite(ctx->client->db,keyname);
+ } else {
+ value = lookupKeyRead(ctx->client->db,keyname);
+ if (value == NULL) {
+ return NULL;
+ }
+ }
+
+ /* Setup the key handle. */
+ kp = zmalloc(sizeof(*kp));
+ kp->ctx = ctx;
+ kp->db = ctx->client->db;
+ kp->key = keyname;
+ incrRefCount(keyname);
+ kp->value = value;
+ kp->iter = NULL;
+ kp->mode = mode;
+ zsetKeyReset(kp);
+ autoMemoryAdd(ctx,REDISMODULE_AM_KEY,kp);
+ return (void*)kp;
+}
+
+/* Close a key handle. */
+void RM_CloseKey(RedisModuleKey *key) {
+ if (key == NULL) return;
+ if (key->mode & REDISMODULE_WRITE) signalModifiedKey(key->db,key->key);
+ /* TODO: if (key->iter) RM_KeyIteratorStop(kp); */
+ RM_ZsetRangeStop(key);
+ decrRefCount(key->key);
+ autoMemoryFreed(key->ctx,REDISMODULE_AM_KEY,key);
+ zfree(key);
+}
+
+/* Return the type of the key. If the key pointer is NULL then
+ * REDISMODULE_KEYTYPE_EMPTY is returned. */
+int RM_KeyType(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return REDISMODULE_KEYTYPE_EMPTY;
+ /* We map between defines so that we are free to change the internal
+ * defines as desired. */
+ switch(key->value->type) {
+ case OBJ_STRING: return REDISMODULE_KEYTYPE_STRING;
+ case OBJ_LIST: return REDISMODULE_KEYTYPE_LIST;
+ case OBJ_SET: return REDISMODULE_KEYTYPE_SET;
+ case OBJ_ZSET: return REDISMODULE_KEYTYPE_ZSET;
+ case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH;
+ case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE;
+ default: return 0;
+ }
+}
+
+/* Return the length of the value associated with the key.
+ * For strings this is the length of the string. For all the other types
+ * is the number of elements (just counting keys for hashes).
+ *
+ * If the key pointer is NULL or the key is empty, zero is returned. */
+size_t RM_ValueLength(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return 0;
+ switch(key->value->type) {
+ case OBJ_STRING: return stringObjectLen(key->value);
+ case OBJ_LIST: return listTypeLength(key->value);
+ case OBJ_SET: return setTypeSize(key->value);
+ case OBJ_ZSET: return zsetLength(key->value);
+ case OBJ_HASH: return hashTypeLength(key->value);
+ default: return 0;
+ }
+}
+
+/* If the key is open for writing, remove it, and setup the key to
+ * accept new writes as an empty key (that will be created on demand).
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing REDISMODULE_ERR is returned. */
+int RM_DeleteKey(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value) {
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Return the key expire value, as milliseconds of remaining TTL.
+ * If no TTL is associated with the key or if the key is empty,
+ * REDISMODULE_NO_EXPIRE is returned. */
+mstime_t RM_GetExpire(RedisModuleKey *key) {
+ mstime_t expire = getExpire(key->db,key->key);
+ if (expire == -1 || key->value == NULL) return -1;
+ expire -= mstime();
+ return expire >= 0 ? expire : 0;
+}
+
+/* Set a new expire for the key. If the special expire
+ * REDISMODULE_NO_EXPIRE is set, the expire is cancelled if there was
+ * one (the same as the PERSIST command).
+ *
+ * Note that the expire must be provided as a positive integer representing
+ * the number of milliseconds of TTL the key should have.
+ *
+ * The function returns REDISMODULE_OK on success or REDISMODULE_ERR if
+ * the key was not open for writing or is an empty key. */
+int RM_SetExpire(RedisModuleKey *key, mstime_t expire) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL)
+ return REDISMODULE_ERR;
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += mstime();
+ setExpire(key->ctx->client,key->db,key->key,expire);
+ } else {
+ removeExpire(key->db,key->key);
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for String type
+ * -------------------------------------------------------------------------- */
+
+/* If the key is open for writing, set the specified string 'str' as the
+ * value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_StringSet(RedisModuleKey *key, RedisModuleString *str) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ setKey(key->db,key->key,str);
+ key->value = str;
+ return REDISMODULE_OK;
+}
+
+/* Prepare the key associated string value for DMA access, and returns
+ * a pointer and size (by reference), that the user can use to read or
+ * modify the string in-place accessing it directly via pointer.
+ *
+ * The 'mode' is composed by bitwise OR-ing the following flags:
+ *
+ * REDISMODULE_READ -- Read access
+ * REDISMODULE_WRITE -- Write access
+ *
+ * If the DMA is not requested for writing, the pointer returned should
+ * only be accessed in a read-only fashion.
+ *
+ * On error (wrong type) NULL is returned.
+ *
+ * DMA access rules:
+ *
+ * 1. No other key writing function should be called since the moment
+ * the pointer is obtained, for all the time we want to use DMA access
+ * to read or modify the string.
+ *
+ * 2. Each time RM_StringTruncate() is called, to continue with the DMA
+ * access, RM_StringDMA() should be called again to re-obtain
+ * a new pointer and length.
+ *
+ * 3. If the returned pointer is not NULL, but the length is zero, no
+ * byte can be touched (the string is empty, or the key itself is empty)
+ * so a RM_StringTruncate() call should be used if there is to enlarge
+ * the string, and later call StringDMA() again to get the pointer.
+ */
+char *RM_StringDMA(RedisModuleKey *key, size_t *len, int mode) {
+ /* We need to return *some* pointer for empty keys, we just return
+ * a string literal pointer, that is the advantage to be mapped into
+ * a read only memory page, so the module will segfault if a write
+ * attempt is performed. */
+ char *emptystring = "<dma-empty-string>";
+ if (key->value == NULL) {
+ *len = 0;
+ return emptystring;
+ }
+
+ if (key->value->type != OBJ_STRING) return NULL;
+
+ /* For write access, and even for read access if the object is encoded,
+ * we unshare the string (that has the side effect of decoding it). */
+ if ((mode & REDISMODULE_WRITE) || key->value->encoding != OBJ_ENCODING_RAW)
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+
+ *len = sdslen(key->value->ptr);
+ return key->value->ptr;
+}
+
+/* If the string is open for writing and is of string type, resize it, padding
+ * with zero bytes if the new length is greater than the old one.
+ *
+ * After this call, RM_StringDMA() must be called again to continue
+ * DMA access with the new pointer.
+ *
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR on
+ * error, that is, the key is not open for writing, is not a string
+ * or resizing for more than 512 MB is requested.
+ *
+ * If the key is empty, a string key is created with the new string value
+ * unless the new length value requested is zero. */
+int RM_StringTruncate(RedisModuleKey *key, size_t newlen) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_STRING) return REDISMODULE_ERR;
+ if (newlen > 512*1024*1024) return REDISMODULE_ERR;
+
+ /* Empty key and new len set to 0. Just return REDISMODULE_OK without
+ * doing anything. */
+ if (key->value == NULL && newlen == 0) return REDISMODULE_OK;
+
+ if (key->value == NULL) {
+ /* Empty key: create it with the new size. */
+ robj *o = createObject(OBJ_STRING,sdsnewlen(NULL, newlen));
+ setKey(key->db,key->key,o);
+ key->value = o;
+ decrRefCount(o);
+ } else {
+ /* Unshare and resize. */
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+ size_t curlen = sdslen(key->value->ptr);
+ if (newlen > curlen) {
+ key->value->ptr = sdsgrowzero(key->value->ptr,newlen);
+ } else if (newlen < curlen) {
+ sdsrange(key->value->ptr,0,newlen-1);
+ /* If the string is too wasteful, reallocate it. */
+ if (sdslen(key->value->ptr) < sdsavail(key->value->ptr))
+ key->value->ptr = sdsRemoveFreeSpace(key->value->ptr);
+ }
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for List type
+ * -------------------------------------------------------------------------- */
+
+/* Push an element into a list, on head or tail depending on 'where' argumnet.
+ * If the key pointer is about an empty key opened for writing, the key
+ * is created. On error (key opened for read-only operations or of the wrong
+ * type) REDISMODULE_ERR is returned, otherwise REDISMODULE_OK is returned. */
+int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_LIST) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_LIST);
+ listTypePush(key->value, ele,
+ (where == REDISMODULE_LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL);
+ return REDISMODULE_OK;
+}
+
+/* Pop an element from the list, and returns it as a module string object
+ * that the user should be free with RM_FreeString() or by enabling
+ * automatic memory. 'where' specifies if the element should be popped from
+ * head or tail. The command returns NULL if:
+ * 1) The list is empty.
+ * 2) The key was not open for writing.
+ * 3) The key is not a list. */
+RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) {
+ if (!(key->mode & REDISMODULE_WRITE) ||
+ key->value == NULL ||
+ key->value->type != OBJ_LIST) return NULL;
+ robj *ele = listTypePop(key->value,
+ (where == REDISMODULE_LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL);
+ robj *decoded = getDecodedObject(ele);
+ decrRefCount(ele);
+ moduleDelKeyIfEmpty(key);
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,decoded);
+ return decoded;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Sorted Set type
+ * -------------------------------------------------------------------------- */
+
+/* Conversion from/to public flags of the Modules API and our private flags,
+ * so that we have everything decoupled. */
+int RM_ZsetAddFlagsToCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & REDISMODULE_ZADD_XX) retflags |= ZADD_XX;
+ if (flags & REDISMODULE_ZADD_NX) retflags |= ZADD_NX;
+ return retflags;
+}
+
+/* See previous function comment. */
+int RM_ZsetAddFlagsFromCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & ZADD_ADDED) retflags |= REDISMODULE_ZADD_ADDED;
+ if (flags & ZADD_UPDATED) retflags |= REDISMODULE_ZADD_UPDATED;
+ if (flags & ZADD_NOP) retflags |= REDISMODULE_ZADD_NOP;
+ return retflags;
+}
+
+/* Add a new element into a sorted set, with the specified 'score'.
+ * If the element already exists, the score is updated.
+ *
+ * A new sorted set is created at value if the key is an empty open key
+ * setup for writing.
+ *
+ * Additional flags can be passed to the function via a pointer, the flags
+ * are both used to receive input and to communicate state when the function
+ * returns. 'flagsptr' can be NULL if no special flags are used.
+ *
+ * The input flags are:
+ *
+ * REDISMODULE_ZADD_XX: Element must already exist. Do nothing otherwise.
+ * REDISMODULE_ZADD_NX: Element must not exist. Do nothing otherwise.
+ *
+ * The output flags are:
+ *
+ * REDISMODULE_ZADD_ADDED: The new element was added to the sorted set.
+ * REDISMODULE_ZADD_UPDATED: The score of the element was updated.
+ * REDISMODULE_ZADD_NOP: No operation was performed because XX or NX flags.
+ *
+ * On success the function returns REDISMODULE_OK. On the following errors
+ * REDISMODULE_ERR is returned:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ * * 'score' double value is not a number (NaN).
+ */
+int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr) {
+ int flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ if (zsetAdd(key->value,score,ele->ptr,&flags,NULL) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ return REDISMODULE_OK;
+}
+
+/* This function works exactly like RM_ZsetAdd(), but instead of setting
+ * a new score, the score of the existing element is incremented, or if the
+ * element does not already exist, it is added assuming the old score was
+ * zero.
+ *
+ * The input and output flags, and the return value, have the same exact
+ * meaning, with the only difference that this function will return
+ * REDISMODULE_ERR even when 'score' is a valid double number, but adding it
+ * to the existing score resuts into a NaN (not a number) condition.
+ *
+ * This function has an additional field 'newscore', if not NULL is filled
+ * with the new score of the element after the increment, if no error
+ * is returned. */
+int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore) {
+ int flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ flags |= ZADD_INCR;
+ if (zsetAdd(key->value,score,ele->ptr,&flags,newscore) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ /* zsetAdd() may signal back that the resulting score is not a number. */
+ if (flagsptr && (*flagsptr & ZADD_NAN)) {
+ *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ return REDISMODULE_OK;
+}
+
+/* Remove the specified element from the sorted set.
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR
+ * on one of the following conditions:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ *
+ * The return value does NOT indicate the fact the element was really
+ * removed (since it existed) or not, just if the function was executed
+ * with success.
+ *
+ * In order to know if the element was removed, the additional argument
+ * 'deleted' must be passed, that populates the integer by reference
+ * setting it to 1 or 0 depending on the outcome of the operation.
+ * The 'deleted' argument can be NULL if the caller is not interested
+ * to know if the element was really removed.
+ *
+ * Empty keys will be handled correctly by doing nothing. */
+int RM_ZsetRem(RedisModuleKey *key, RedisModuleString *ele, int *deleted) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value != NULL && zsetDel(key->value,ele->ptr)) {
+ if (deleted) *deleted = 1;
+ } else {
+ if (deleted) *deleted = 0;
+ }
+ return REDISMODULE_OK;
+}
+
+/* On success retrieve the double score associated at the sorted set element
+ * 'ele' and returns REDISMODULE_OK. Otherwise REDISMODULE_ERR is returned
+ * to signal one of the following conditions:
+ *
+ * * There is no such element 'ele' in the sorted set.
+ * * The key is not a sorted set.
+ * * The key is an open empty key.
+ */
+int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) {
+ if (key->value == NULL) return REDISMODULE_ERR;
+ if (key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (zsetScore(key->value,ele->ptr,score) == C_ERR) return REDISMODULE_ERR;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Sorted Set iterator
+ * -------------------------------------------------------------------------- */
+
+void zsetKeyReset(RedisModuleKey *key) {
+ key->ztype = REDISMODULE_ZSET_RANGE_NONE;
+ key->zcurrent = NULL;
+ key->zer = 1;
+}
+
+/* Stop a sorted set iteration. */
+void RM_ZsetRangeStop(RedisModuleKey *key) {
+ /* Free resources if needed. */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_LEX)
+ zslFreeLexRange(&key->zlrs);
+ /* Setup sensible values so that misused iteration API calls when an
+ * iterator is not active will result into something more sensible
+ * than crashing. */
+ zsetKeyReset(key);
+}
+
+/* Return the "End of range" flag value to signal the end of the iteration. */
+int RM_ZsetRangeEndReached(RedisModuleKey *key) {
+ return key->zer;
+}
+
+/* Helper function for RM_ZsetFirstInScoreRange() and RM_ZsetLastInScoreRange().
+ * Setup the sorted set iteration according to the specified score range
+ * (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR. */
+int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->ztype = REDISMODULE_ZSET_RANGE_SCORE;
+ key->zer = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zrangespec *zrs = &key->zrs;
+ zrs->min = min;
+ zrs->max = max;
+ zrs->minex = minex;
+ zrs->maxex = maxex;
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ key->zcurrent = first ? zzlFirstInRange(key->value->ptr,zrs) :
+ zzlLastInRange(key->value->ptr,zrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->zcurrent = first ? zslFirstInRange(zsl,zrs) :
+ zslLastInRange(zsl,zrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->zcurrent == NULL) key->zer = 1;
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * range. Returns REDISMODULE_OK if the iterator was correctly initialized
+ * otherwise REDISMODULE_ERR is returned in the following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ *
+ * The range is specified according to the two double values 'min' and 'max'.
+ * Both can be infinite using the following two macros:
+ *
+ * REDISMODULE_POSITIVE_INFINITE for positive infinite value
+ * REDISMODULE_NEGATIVE_INFINITE for negative infinite value
+ *
+ * 'minex' and 'maxex' parameters, if true, respectively setup a range
+ * where the min and max value are exclusive (not included) instead of
+ * inclusive. */
+int RM_ZsetFirstInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInScoreRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,0);
+}
+
+/* Helper function for RM_ZsetFirstInLexRange() and RM_ZsetLastInLexRange().
+ * Setup the sorted set iteration according to the specified lexicographical
+ * range (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR.
+ *
+ * Note that this function takes 'min' and 'max' in the same form of the
+ * Redis ZRANGEBYLEX command. */
+int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->zer = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zlexrangespec *zlrs = &key->zlrs;
+ if (zslParseLexRange(min, max, zlrs) == C_ERR) return REDISMODULE_ERR;
+
+ /* Set the range type to lex only after successfully parsing the range,
+ * otherwise we don't want the zlexrangespec to be freed. */
+ key->ztype = REDISMODULE_ZSET_RANGE_LEX;
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ key->zcurrent = first ? zzlFirstInLexRange(key->value->ptr,zlrs) :
+ zzlLastInLexRange(key->value->ptr,zlrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->zcurrent = first ? zslFirstInLexRange(zsl,zlrs) :
+ zslLastInLexRange(zsl,zlrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->zcurrent == NULL) key->zer = 1;
+
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * lexicographical range. Returns REDISMODULE_OK if the iterator was correctly
+ * initialized otherwise REDISMODULE_ERR is returned in the
+ * following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ * 2. The lexicographical range 'min' and 'max' format is invalid.
+ *
+ * 'min' and 'max' should be provided as two RedisModuleString objects
+ * in the same format as the parameters passed to the ZRANGEBYLEX command.
+ * The function does not take ownership of the objects, so they can be released
+ * ASAP after the iterator is setup. */
+int RM_ZsetFirstInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInLexRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,0);
+}
+
+/* Return the current sorted set element of an active sorted set iterator
+ * or NULL if the range specified in the iterator does not include any
+ * element. */
+RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score) {
+ RedisModuleString *str;
+
+ if (key->zcurrent == NULL) return NULL;
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr, *sptr;
+ eptr = key->zcurrent;
+ sds ele = ziplistGetObject(eptr);
+ if (score) {
+ sptr = ziplistNext(key->value->ptr,eptr);
+ *score = zzlGetScore(sptr);
+ }
+ str = createObject(OBJ_STRING,ele);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent;
+ if (score) *score = ln->score;
+ str = createStringObject(ln->ele,sdslen(ln->ele));
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,str);
+ return str;
+}
+
+/* Go to the next element of the sorted set iterator. Returns 1 if there was
+ * a next element, 0 if we are already at the latest element or the range
+ * does not include any item at all. */
+int RM_ZsetRangeNext(RedisModuleKey *key) {
+ if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->zcurrent;
+ unsigned char *next;
+ next = ziplistNext(zl,eptr); /* Skip element. */
+ if (next) next = ziplistNext(zl,next); /* Skip score. */
+ if (next == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the next element score for the
+ * range check. */
+ unsigned char *saved_next = next;
+ next = ziplistNext(zl,next); /* Skip next element. */
+ double score = zzlGetScore(next); /* Obtain the next score. */
+ if (!zslValueLteMax(score,&key->zrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ next = saved_next;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueLteMax(next,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = next;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent, *next = ln->level[0].forward;
+ if (next == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueLteMax(next->score,&key->zrs))
+ {
+ key->zer = 1;
+ return 0;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueLteMax(next->ele,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = next;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* Go to the previous element of the sorted set iterator. Returns 1 if there was
+ * a previous element, 0 if we are already at the first element or the range
+ * does not include any item at all. */
+int RM_ZsetRangePrev(RedisModuleKey *key) {
+ if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->zcurrent;
+ unsigned char *prev;
+ prev = ziplistPrev(zl,eptr); /* Go back to previous score. */
+ if (prev) prev = ziplistPrev(zl,prev); /* Back to previous ele. */
+ if (prev == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the previous element score for the
+ * range check. */
+ unsigned char *saved_prev = prev;
+ prev = ziplistNext(zl,prev); /* Skip element to get the score.*/
+ double score = zzlGetScore(prev); /* Obtain the prev score. */
+ if (!zslValueGteMin(score,&key->zrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ prev = saved_prev;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueGteMin(prev,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = prev;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent, *prev = ln->backward;
+ if (prev == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueGteMin(prev->score,&key->zrs))
+ {
+ key->zer = 1;
+ return 0;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueGteMin(prev->ele,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = prev;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Hash type
+ * -------------------------------------------------------------------------- */
+
+/* Set the field of the specified hash field to the specified value.
+ * If the key is an empty key open for writing, it is created with an empty
+ * hash value, in order to set the specified field.
+ *
+ * The function is variadic and the user must specify pairs of field
+ * names and values, both as RedisModuleString pointers (unless the
+ * CFIELD option is set, see later).
+ *
+ * Example to set the hash argv[1] to the value argv[2]:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],argv[2],NULL);
+ *
+ * The function can also be used in order to delete fields (if they exist)
+ * by setting them to the specified value of REDISMODULE_HASH_DELETE:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * The behavior of the command changes with the specified flags, that can be
+ * set to REDISMODULE_HASH_NONE if no special behavior is needed.
+ *
+ * REDISMODULE_HASH_NX: The operation is performed only if the field was not
+ * already existing in the hash.
+ * REDISMODULE_HASH_XX: The operation is performed only if the field was
+ * already existing, so that a new value could be
+ * associated to an existing filed, but no new fields
+ * are created.
+ * REDISMODULE_HASH_CFIELDS: The field names passed are null terminated C
+ * strings instead of RedisModuleString objects.
+ *
+ * Unless NX is specified, the command overwrites the old field value with
+ * the new one.
+ *
+ * When using REDISMODULE_HASH_CFIELDS, field names are reported using
+ * normal C strings, so for example to delete the field "foo" the following
+ * code can be used:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_CFIELDS,"foo",
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * Return value:
+ *
+ * The number of fields updated (that may be less than the number of fields
+ * specified because of the XX or NX options).
+ *
+ * In the following case the return value is always zero:
+ *
+ * * The key was not open for writing.
+ * * The key was associated with a non Hash value.
+ */
+int RM_HashSet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (!(key->mode & REDISMODULE_WRITE)) return 0;
+ if (key->value && key->value->type != OBJ_HASH) return 0;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_HASH);
+
+ int updated = 0;
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, *value;
+ /* Get the field and value objects. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+ value = va_arg(ap,RedisModuleString*);
+
+ /* Handle XX and NX */
+ if (flags & (REDISMODULE_HASH_XX|REDISMODULE_HASH_NX)) {
+ int exists = hashTypeExists(key->value, field->ptr);
+ if (((flags & REDISMODULE_HASH_XX) && !exists) ||
+ ((flags & REDISMODULE_HASH_NX) && exists))
+ {
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+ }
+
+ /* Handle deletion if value is REDISMODULE_HASH_DELETE. */
+ if (value == REDISMODULE_HASH_DELETE) {
+ updated += hashTypeDelete(key->value, field->ptr);
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+
+ int low_flags = HASH_SET_COPY;
+ /* If CFIELDS is active, we can pass the ownership of the
+ * SDS object to the low level function that sets the field
+ * to avoid a useless copy. */
+ if (flags & REDISMODULE_HASH_CFIELDS)
+ low_flags |= HASH_SET_TAKE_FIELD;
+ updated += hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
+
+ /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
+ * however we still have to release the 'field' object shell. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ field->ptr = NULL; /* Prevent the SDS string from being freed. */
+ decrRefCount(field);
+ }
+ }
+ va_end(ap);
+ moduleDelKeyIfEmpty(key);
+ return updated;
+}
+
+/* Get fields from an hash value. This function is called using a variable
+ * number of arguments, alternating a field name (as a StringRedisModule
+ * pointer) with a pointer to a StringRedisModule pointer, that is set to the
+ * value of the field if the field exist, or NULL if the field did not exist.
+ * At the end of the field/value-ptr pairs, NULL must be specified as last
+ * argument to signal the end of the arguments in the variadic function.
+ *
+ * This is an example usage:
+ *
+ * RedisModuleString *first, *second;
+ * RedisModule_HashGet(mykey,REDISMODULE_HASH_NONE,argv[1],&first,
+ * argv[2],&second,NULL);
+ *
+ * As with RedisModule_HashSet() the behavior of the command can be specified
+ * passing flags different than REDISMODULE_HASH_NONE:
+ *
+ * REDISMODULE_HASH_CFIELD: field names as null terminated C strings.
+ *
+ * REDISMODULE_HASH_EXISTS: instead of setting the value of the field
+ * expecting a RedisModuleString pointer to pointer, the function just
+ * reports if the field esists or not and expects an integer pointer
+ * as the second element of each pair.
+ *
+ * Example of REDISMODULE_HASH_CFIELD:
+ *
+ * RedisModuleString *username, *hashedpass;
+ * RedisModule_HashGet(mykey,"username",&username,"hp",&hashedpass, NULL);
+ *
+ * Example of REDISMODULE_HASH_EXISTS:
+ *
+ * int exists;
+ * RedisModule_HashGet(mykey,argv[1],&exists,NULL);
+ *
+ * The function returns REDISMODULE_OK on success and REDISMODULE_ERR if
+ * the key is not an hash value.
+ *
+ * Memory management:
+ *
+ * The returned RedisModuleString objects should be released with
+ * RedisModule_FreeString(), or by enabling automatic memory management.
+ */
+int RM_HashGet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (key->value && key->value->type != OBJ_HASH) return REDISMODULE_ERR;
+
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, **valueptr;
+ int *existsptr;
+ /* Get the field object and the value pointer to pointer. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+
+ /* Query the hash for existence or value object. */
+ if (flags & REDISMODULE_HASH_EXISTS) {
+ existsptr = va_arg(ap,int*);
+ if (key->value)
+ *existsptr = hashTypeExists(key->value,field->ptr);
+ else
+ *existsptr = 0;
+ } else {
+ valueptr = va_arg(ap,RedisModuleString**);
+ if (key->value) {
+ *valueptr = hashTypeGetValueObject(key->value,field->ptr);
+ if (*valueptr) {
+ robj *decoded = getDecodedObject(*valueptr);
+ decrRefCount(*valueptr);
+ *valueptr = decoded;
+ }
+ if (*valueptr)
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,*valueptr);
+ } else {
+ *valueptr = NULL;
+ }
+ }
+
+ /* Cleanup */
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ }
+ va_end(ap);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Redis <-> Modules generic Call() API
+ * -------------------------------------------------------------------------- */
+
+/* Create a new RedisModuleCallReply object. The processing of the reply
+ * is lazy, the object is just populated with the raw protocol and later
+ * is processed as needed. Initially we just make sure to set the right
+ * reply type, which is extremely cheap to do. */
+RedisModuleCallReply *moduleCreateCallReplyFromProto(RedisModuleCtx *ctx, sds proto) {
+ RedisModuleCallReply *reply = zmalloc(sizeof(*reply));
+ reply->ctx = ctx;
+ reply->proto = proto;
+ reply->protolen = sdslen(proto);
+ reply->flags = REDISMODULE_REPLYFLAG_TOPARSE; /* Lazy parsing. */
+ switch(proto[0]) {
+ case '$':
+ case '+': reply->type = REDISMODULE_REPLY_STRING; break;
+ case '-': reply->type = REDISMODULE_REPLY_ERROR; break;
+ case ':': reply->type = REDISMODULE_REPLY_INTEGER; break;
+ case '*': reply->type = REDISMODULE_REPLY_ARRAY; break;
+ default: reply->type = REDISMODULE_REPLY_UNKNOWN; break;
+ }
+ if ((proto[0] == '*' || proto[0] == '$') && proto[1] == '-')
+ reply->type = REDISMODULE_REPLY_NULL;
+ return reply;
+}
+
+void moduleParseCallReply_Int(RedisModuleCallReply *reply);
+void moduleParseCallReply_BulkString(RedisModuleCallReply *reply);
+void moduleParseCallReply_SimpleString(RedisModuleCallReply *reply);
+void moduleParseCallReply_Array(RedisModuleCallReply *reply);
+
+/* Do nothing if REDISMODULE_REPLYFLAG_TOPARSE is false, otherwise
+ * use the protcol of the reply in reply->proto in order to fill the
+ * reply with parsed data according to the reply type. */
+void moduleParseCallReply(RedisModuleCallReply *reply) {
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_TOPARSE)) return;
+ reply->flags &= ~REDISMODULE_REPLYFLAG_TOPARSE;
+
+ switch(reply->proto[0]) {
+ case ':': moduleParseCallReply_Int(reply); break;
+ case '$': moduleParseCallReply_BulkString(reply); break;
+ case '-': /* handled by next item. */
+ case '+': moduleParseCallReply_SimpleString(reply); break;
+ case '*': moduleParseCallReply_Array(reply); break;
+ }
+}
+
+void moduleParseCallReply_Int(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+
+ string2ll(proto+1,p-proto-1,&reply->val.ll);
+ reply->protolen = p-proto+2;
+ reply->type = REDISMODULE_REPLY_INTEGER;
+}
+
+void moduleParseCallReply_BulkString(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+ long long bulklen;
+
+ string2ll(proto+1,p-proto-1,&bulklen);
+ if (bulklen == -1) {
+ reply->protolen = p-proto+2;
+ reply->type = REDISMODULE_REPLY_NULL;
+ } else {
+ reply->val.str = p+2;
+ reply->len = bulklen;
+ reply->protolen = p-proto+2+bulklen+2;
+ reply->type = REDISMODULE_REPLY_STRING;
+ }
+}
+
+void moduleParseCallReply_SimpleString(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+
+ reply->val.str = proto+1;
+ reply->len = p-proto-1;
+ reply->protolen = p-proto+2;
+ reply->type = proto[0] == '+' ? REDISMODULE_REPLY_STRING :
+ REDISMODULE_REPLY_ERROR;
+}
+
+void moduleParseCallReply_Array(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+ long long arraylen, j;
+
+ string2ll(proto+1,p-proto-1,&arraylen);
+ p += 2;
+
+ if (arraylen == -1) {
+ reply->protolen = p-proto;
+ reply->type = REDISMODULE_REPLY_NULL;
+ return;
+ }
+
+ reply->val.array = zmalloc(sizeof(RedisModuleCallReply)*arraylen);
+ reply->len = arraylen;
+ for (j = 0; j < arraylen; j++) {
+ RedisModuleCallReply *ele = reply->val.array+j;
+ ele->flags = REDISMODULE_REPLYFLAG_NESTED |
+ REDISMODULE_REPLYFLAG_TOPARSE;
+ ele->proto = p;
+ ele->ctx = reply->ctx;
+ moduleParseCallReply(ele);
+ p += ele->protolen;
+ }
+ reply->protolen = p-proto;
+ reply->type = REDISMODULE_REPLY_ARRAY;
+}
+
+/* Free a Call reply and all the nested replies it contains if it's an
+ * array. */
+void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){
+ /* Don't free nested replies by default: the user must always free the
+ * toplevel reply. However be gentle and don't crash if the module
+ * misuses the API. */
+ if (!freenested && reply->flags & REDISMODULE_REPLYFLAG_NESTED) return;
+
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_TOPARSE)) {
+ if (reply->type == REDISMODULE_REPLY_ARRAY) {
+ size_t j;
+ for (j = 0; j < reply->len; j++)
+ RM_FreeCallReply_Rec(reply->val.array+j,1);
+ zfree(reply->val.array);
+ }
+ }
+
+ /* For nested replies, we don't free reply->proto (which if not NULL
+ * references the parent reply->proto buffer), nor the structure
+ * itself which is allocated as an array of structures, and is freed
+ * when the array value is released. */
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_NESTED)) {
+ if (reply->proto) sdsfree(reply->proto);
+ zfree(reply);
+ }
+}
+
+/* Wrapper for the recursive free reply function. This is needed in order
+ * to have the first level function to return on nested replies, but only
+ * if called by the module API. */
+void RM_FreeCallReply(RedisModuleCallReply *reply) {
+
+ RedisModuleCtx *ctx = reply->ctx;
+ RM_FreeCallReply_Rec(reply,0);
+ autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply);
+}
+
+/* Return the reply type. */
+int RM_CallReplyType(RedisModuleCallReply *reply) {
+ if (!reply) return REDISMODULE_REPLY_UNKNOWN;
+ return reply->type;
+}
+
+/* Return the reply type length, where applicable. */
+size_t RM_CallReplyLength(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ switch(reply->type) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ case REDISMODULE_REPLY_ARRAY:
+ return reply->len;
+ default:
+ return 0;
+ }
+}
+
+/* Return the 'idx'-th nested call reply element of an array reply, or NULL
+ * if the reply type is wrong or the index is out of range. */
+RedisModuleCallReply *RM_CallReplyArrayElement(RedisModuleCallReply *reply, size_t idx) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_ARRAY) return NULL;
+ if (idx >= reply->len) return NULL;
+ return reply->val.array+idx;
+}
+
+/* Return the long long of an integer reply. */
+long long RM_CallReplyInteger(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_INTEGER) return LLONG_MIN;
+ return reply->val.ll;
+}
+
+/* Return the pointer and length of a string or error reply. */
+const char *RM_CallReplyStringPtr(RedisModuleCallReply *reply, size_t *len) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_STRING &&
+ reply->type != REDISMODULE_REPLY_ERROR) return NULL;
+ if (len) *len = reply->len;
+ return reply->val.str;
+}
+
+/* Return a new string object from a call reply of type string, error or
+ * integer. Otherwise (wrong reply type) return NULL. */
+RedisModuleString *RM_CreateStringFromCallReply(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ switch(reply->type) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ return RM_CreateString(reply->ctx,reply->val.str,reply->len);
+ case REDISMODULE_REPLY_INTEGER: {
+ char buf[64];
+ int len = ll2string(buf,sizeof(buf),reply->val.ll);
+ return RM_CreateString(reply->ctx,buf,len);
+ }
+ default: return NULL;
+ }
+}
+
+/* Returns an array of robj pointers, and populates *argc with the number
+ * of items, by parsing the format specifier "fmt" as described for
+ * the RM_Call(), RM_Replicate() and other module APIs.
+ *
+ * The integer pointed by 'flags' is populated with flags according
+ * to special modifiers in "fmt". For now only one exists:
+ *
+ * "!" -> REDISMODULE_ARGV_REPLICATE
+ *
+ * On error (format specifier error) NULL is returned and nothing is
+ * allocated. On success the argument vector is returned. */
+
+#define REDISMODULE_ARGV_REPLICATE (1<<0)
+
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap) {
+ int argc = 0, argv_size, j;
+ robj **argv = NULL;
+
+ /* As a first guess to avoid useless reallocations, size argv to
+ * hold one argument for each char specifier in 'fmt'. */
+ argv_size = strlen(fmt)+1; /* +1 because of the command name. */
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ /* Build the arguments vector based on the format specifier. */
+ argv[0] = createStringObject(cmdname,strlen(cmdname));
+ argc++;
+
+ /* Create the client and dispatch the command. */
+ const char *p = fmt;
+ while(*p) {
+ if (*p == 'c') {
+ char *cstr = va_arg(ap,char*);
+ argv[argc++] = createStringObject(cstr,strlen(cstr));
+ } else if (*p == 's') {
+ robj *obj = va_arg(ap,void*);
+ argv[argc++] = obj;
+ incrRefCount(obj);
+ } else if (*p == 'b') {
+ char *buf = va_arg(ap,char*);
+ size_t len = va_arg(ap,size_t);
+ argv[argc++] = createStringObject(buf,len);
+ } else if (*p == 'l') {
+ long ll = va_arg(ap,long long);
+ argv[argc++] = createObject(OBJ_STRING,sdsfromlonglong(ll));
+ } else if (*p == 'v') {
+ /* A vector of strings */
+ robj **v = va_arg(ap, void*);
+ size_t vlen = va_arg(ap, size_t);
+
+ /* We need to grow argv to hold the vector's elements.
+ * We resize by vector_len-1 elements, because we held
+ * one element in argv for the vector already */
+ argv_size += vlen-1;
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ size_t i = 0;
+ for (i = 0; i < vlen; i++) {
+ incrRefCount(v[i]);
+ argv[argc++] = v[i];
+ }
+ } else if (*p == '!') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_REPLICATE;
+ } else {
+ goto fmterr;
+ }
+ p++;
+ }
+ *argcp = argc;
+ return argv;
+
+fmterr:
+ for (j = 0; j < argc; j++)
+ decrRefCount(argv[j]);
+ zfree(argv);
+ return NULL;
+}
+
+/* Exported API to call any Redis command from modules.
+ * On success a RedisModuleCallReply object is returned, otherwise
+ * NULL is returned and errno is set to the following values:
+ *
+ * EINVAL: command non existing, wrong arity, wrong format specifier.
+ * EPERM: operation in Cluster instance with key in non local slot. */
+RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ struct redisCommand *cmd;
+ client *c = NULL;
+ robj **argv = NULL;
+ int argc = 0, flags = 0;
+ va_list ap;
+ RedisModuleCallReply *reply = NULL;
+ int replicate = 0; /* Replicate this command? */
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* Create the client and dispatch the command. */
+ va_start(ap, fmt);
+ c = createClient(-1);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ replicate = flags & REDISMODULE_ARGV_REPLICATE;
+ va_end(ap);
+
+ /* Setup our fake client for command execution. */
+ c->flags |= CLIENT_MODULE;
+ c->db = ctx->client->db;
+ c->argv = argv;
+ c->argc = argc;
+ c->cmd = c->lastcmd = cmd;
+ /* We handle the above format error only when the client is setup so that
+ * we can free it normally. */
+ if (argv == NULL) goto cleanup;
+
+ /* Basic arity checks. */
+ if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) {
+ errno = EINVAL;
+ goto cleanup;
+ }
+
+ /* If this is a Redis Cluster node, we need to make sure the module is not
+ * trying to access non-local keys, with the exception of commands
+ * received from our master. */
+ if (server.cluster_enabled && !(ctx->client->flags & CLIENT_MASTER)) {
+ /* Duplicate relevant flags in the module client. */
+ c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
+ c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+ server.cluster->myself)
+ {
+ errno = EPERM;
+ goto cleanup;
+ }
+ }
+
+ /* If we are using single commands replication, we need to wrap what
+ * we propagate into a MULTI/EXEC block, so that it will be atomic like
+ * a Lua script in the context of AOF and slaves. */
+ if (replicate) moduleReplicateMultiIfNeeded(ctx);
+
+ /* Run the command */
+ int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
+ if (replicate) {
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c,call_flags);
+
+ /* Convert the result of the Redis command into a suitable Lua type.
+ * The first thing we need is to create a single string from the client
+ * output buffers. */
+ sds proto = sdsnewlen(c->buf,c->bufpos);
+ c->bufpos = 0;
+ while(listLength(c->reply)) {
+ sds o = listNodeValue(listFirst(c->reply));
+
+ proto = sdscatsds(proto,o);
+ listDelNode(c->reply,listFirst(c->reply));
+ }
+ reply = moduleCreateCallReplyFromProto(ctx,proto);
+ autoMemoryAdd(ctx,REDISMODULE_AM_REPLY,reply);
+
+cleanup:
+ freeClient(c);
+ return reply;
+}
+
+/* Return a pointer, and a length, to the protocol returned by the command
+ * that returned the reply object. */
+const char *RM_CallReplyProto(RedisModuleCallReply *reply, size_t *len) {
+ if (reply->proto) *len = sdslen(reply->proto);
+ return reply->proto;
+}
+
+/* --------------------------------------------------------------------------
+ * Modules data types
+ *
+ * When String DMA or using existing data structures is not enough, it is
+ * possible to create new data types from scratch and export them to
+ * Redis. The module must provide a set of callbacks for handling the
+ * new values exported (for example in order to provide RDB saving/loading,
+ * AOF rewrite, and so forth). In this section we define this API.
+ * -------------------------------------------------------------------------- */
+
+/* Turn a 9 chars name in the specified charset and a 10 bit encver into
+ * a single 64 bit unsigned integer that represents this exact module name
+ * and version. This final number is called a "type ID" and is used when
+ * writing module exported values to RDB files, in order to re-associate the
+ * value to the right module to load them during RDB loading.
+ *
+ * If the string is not of the right length or the charset is wrong, or
+ * if encver is outside the unsigned 10 bit integer range, 0 is returned,
+ * otherwise the function returns the right type ID.
+ *
+ * The resulting 64 bit integer is composed as follows:
+ *
+ * (high order bits) 6|6|6|6|6|6|6|6|6|10 (low order bits)
+ *
+ * The first 6 bits value is the first character, name[0], while the last
+ * 6 bits value, immediately before the 10 bits integer, is name[8].
+ * The last 10 bits are the encoding version.
+ *
+ * Note that a name and encver combo of "AAAAAAAAA" and 0, will produce
+ * zero as return value, that is the same we use to signal errors, thus
+ * this combination is invalid, and also useless since type names should
+ * try to be vary to avoid collisions. */
+
+const char *ModuleTypeNameCharSet =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_";
+
+uint64_t moduleTypeEncodeId(const char *name, int encver) {
+ /* We use 64 symbols so that we can map each character into 6 bits
+ * of the final output. */
+ const char *cset = ModuleTypeNameCharSet;
+ if (strlen(name) != 9) return 0;
+ if (encver < 0 || encver > 1023) return 0;
+
+ uint64_t id = 0;
+ for (int j = 0; j < 9; j++) {
+ char *p = strchr(cset,name[j]);
+ if (!p) return 0;
+ unsigned long pos = p-cset;
+ id = (id << 6) | pos;
+ }
+ id = (id << 10) | encver;
+ return id;
+}
+
+/* Search, in the list of exported data types of all the modules registered,
+ * a type with the same name as the one given. Returns the moduleType
+ * structure pointer if such a module is found, or NULL otherwise. */
+moduleType *moduleTypeLookupModuleByName(const char *name) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *mt = ln->value;
+ if (memcmp(name,mt->name,sizeof(mt->name)) == 0) {
+ dictReleaseIterator(di);
+ return mt;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+ return NULL;
+}
+
+/* Lookup a module by ID, with caching. This function is used during RDB
+ * loading. Modules exporting data types should never be able to unload, so
+ * our cache does not need to expire. */
+#define MODULE_LOOKUP_CACHE_SIZE 3
+
+moduleType *moduleTypeLookupModuleByID(uint64_t id) {
+ static struct {
+ uint64_t id;
+ moduleType *mt;
+ } cache[MODULE_LOOKUP_CACHE_SIZE];
+
+ /* Search in cache to start. */
+ int j;
+ for (j = 0; j < MODULE_LOOKUP_CACHE_SIZE && cache[j].mt != NULL; j++)
+ if (cache[j].id == id) return cache[j].mt;
+
+ /* Slow module by module lookup. */
+ moduleType *mt = NULL;
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL && mt == NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *this_mt = ln->value;
+ /* Compare only the 54 bit module identifier and not the
+ * encoding version. */
+ if (this_mt->id >> 10 == id >> 10) {
+ mt = this_mt;
+ break;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Add to cache if possible. */
+ if (mt && j < MODULE_LOOKUP_CACHE_SIZE) {
+ cache[j].id = id;
+ cache[j].mt = mt;
+ }
+ return mt;
+}
+
+/* Turn an (unresolved) module ID into a type name, to show the user an
+ * error when RDB files contain module data we can't load.
+ * The buffer pointed by 'name' must be 10 bytes at least. The function will
+ * fill it with a null terminated module name. */
+void moduleTypeNameByID(char *name, uint64_t moduleid) {
+ const char *cset = ModuleTypeNameCharSet;
+
+ name[9] = '\0';
+ char *p = name+8;
+ moduleid >>= 10;
+ for (int j = 0; j < 9; j++) {
+ *p-- = cset[moduleid & 63];
+ moduleid >>= 6;
+ }
+}
+
+/* Register a new data type exported by the module. The parameters are the
+ * following. Please for in depth documentation check the modules API
+ * documentation, especially the TYPES.md file.
+ *
+ * * **name**: A 9 characters data type name that MUST be unique in the Redis
+ * Modules ecosystem. Be creative... and there will be no collisions. Use
+ * the charset A-Z a-z 9-0, plus the two "-_" characters. A good
+ * idea is to use, for example `<typename>-<vendor>`. For example
+ * "tree-AntZ" may mean "Tree data structure by @antirez". To use both
+ * lower case and upper case letters helps in order to prevent collisions.
+ * * **encver**: Encoding version, which is, the version of the serialization
+ * that a module used in order to persist data. As long as the "name"
+ * matches, the RDB loading will be dispatched to the type callbacks
+ * whatever 'encver' is used, however the module can understand if
+ * the encoding it must load are of an older version of the module.
+ * For example the module "tree-AntZ" initially used encver=0. Later
+ * after an upgrade, it started to serialize data in a different format
+ * and to register the type with encver=1. However this module may
+ * still load old data produced by an older version if the rdb_load
+ * callback is able to check the encver value and act accordingly.
+ * The encver must be a positive value between 0 and 1023.
+ * * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure
+ * that should be populated with the methods callbacks and structure
+ * version, like in the following example:
+ *
+ * RedisModuleTypeMethods tm = {
+ * .version = REDISMODULE_TYPE_METHOD_VERSION,
+ * .rdb_load = myType_RDBLoadCallBack,
+ * .rdb_save = myType_RDBSaveCallBack,
+ * .aof_rewrite = myType_AOFRewriteCallBack,
+ * .free = myType_FreeCallBack,
+ *
+ * // Optional fields
+ * .digest = myType_DigestCallBack,
+ * .mem_usage = myType_MemUsageCallBack,
+ * }
+ *
+ * * **rdb_load**: A callback function pointer that loads data from RDB files.
+ * * **rdb_save**: A callback function pointer that saves data to RDB files.
+ * * **aof_rewrite**: A callback function pointer that rewrites data as commands.
+ * * **digest**: A callback function pointer that is used for `DEBUG DIGEST`.
+ * * **free**: A callback function pointer that can free a type value.
+ *
+ * The **digest* and **mem_usage** methods should currently be omitted since
+ * they are not yet implemented inside the Redis modules core.
+ *
+ * Note: the module name "AAAAAAAAA" is reserved and produces an error, it
+ * happens to be pretty lame as well.
+ *
+ * If there is already a module registering a type with the same name,
+ * and if the module name or encver is invalid, NULL is returned.
+ * Otherwise the new type is registered into Redis, and a reference of
+ * type RedisModuleType is returned: the caller of the function should store
+ * this reference into a gobal variable to make future use of it in the
+ * modules type API, since a single module may register multiple types.
+ * Example code fragment:
+ *
+ * static RedisModuleType *BalancedTreeType;
+ *
+ * int RedisModule_OnLoad(RedisModuleCtx *ctx) {
+ * // some code here ...
+ * BalancedTreeType = RM_CreateDataType(...);
+ * }
+ */
+moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr) {
+ uint64_t id = moduleTypeEncodeId(name,encver);
+ if (id == 0) return NULL;
+ if (moduleTypeLookupModuleByName(name) != NULL) return NULL;
+
+ long typemethods_version = ((long*)typemethods_ptr)[0];
+ if (typemethods_version == 0) return NULL;
+
+ struct typemethods {
+ uint64_t version;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ } *tms = (struct typemethods*) typemethods_ptr;
+
+ moduleType *mt = zcalloc(sizeof(*mt));
+ mt->id = id;
+ mt->module = ctx->module;
+ mt->rdb_load = tms->rdb_load;
+ mt->rdb_save = tms->rdb_save;
+ mt->aof_rewrite = tms->aof_rewrite;
+ mt->mem_usage = tms->mem_usage;
+ mt->digest = tms->digest;
+ mt->free = tms->free;
+ memcpy(mt->name,name,sizeof(mt->name));
+ listAddNodeTail(ctx->module->types,mt);
+ return mt;
+}
+
+/* If the key is open for writing, set the specified module type object
+ * as the value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_ModuleTypeSetValue(RedisModuleKey *key, moduleType *mt, void *value) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ robj *o = createModuleObject(mt,value);
+ setKey(key->db,key->key,o);
+ decrRefCount(o);
+ key->value = o;
+ return REDISMODULE_OK;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the moduel type pointer of the value stored at key.
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+moduleType *RM_ModuleTypeGetType(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->type;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the module type low-level value stored at key, as
+ * it was set by the user via RedisModule_ModuleTypeSet().
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+void *RM_ModuleTypeGetValue(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->value;
+}
+
+/* --------------------------------------------------------------------------
+ * RDB loading and saving functions
+ * -------------------------------------------------------------------------- */
+
+/* Called when there is a load error in the context of a module. This cannot
+ * be recovered like for the built-in types. */
+void moduleRDBLoadError(RedisModuleIO *io) {
+ serverLog(LL_WARNING,
+ "Error loading data from RDB (short read or EOF). "
+ "Read performed by module '%s' about type '%s' "
+ "after reading '%llu' bytes of a value.",
+ io->type->module->name,
+ io->type->name,
+ (unsigned long long)io->bytes);
+ exit(1);
+}
+
+/* Save an unsigned 64 bit value into the RDB file. This function should only
+ * be called in the context of the rdb_save method of modules implementing new
+ * data types. */
+void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveLen(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Load an unsigned 64 bit value from the RDB file. This function should only
+ * be called in the context of the rdb_load method of modules implementing
+ * new data types. */
+uint64_t RM_LoadUnsigned(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_UINT) goto loaderr;
+ }
+ uint64_t value;
+ int retval = rdbLoadLenByRef(io->rio, NULL, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* Like RedisModule_SaveUnsigned() but for signed 64 bit values. */
+void RM_SaveSigned(RedisModuleIO *io, int64_t value) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.i = value;
+ RM_SaveUnsigned(io,conv.u);
+}
+
+/* Like RedisModule_LoadUnsigned() but for signed 64 bit values. */
+int64_t RM_LoadSigned(RedisModuleIO *io) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.u = RM_LoadUnsigned(io);
+ return conv.i;
+}
+
+/* In the context of the rdb_save method of a module type, saves a
+ * string into the RDB file taking as input a RedisModuleString.
+ *
+ * The string can be later loaded with RedisModule_LoadString() or
+ * other Load family functions expecting a serialized string inside
+ * the RDB file. */
+void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveStringObject(io->rio, s);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Like RedisModule_SaveString() but takes a raw C pointer and length
+ * as input. */
+void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveRawString(io->rio, (unsigned char*)str,len);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Implements RM_LoadString() and RM_LoadStringBuffer() */
+void *moduleLoadString(RedisModuleIO *io, int plain, size_t *lenptr) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_STRING) goto loaderr;
+ }
+ void *s = rdbGenericLoadStringObject(io->rio,
+ plain ? RDB_LOAD_PLAIN : RDB_LOAD_NONE, lenptr);
+ if (s == NULL) goto loaderr;
+ return s;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return NULL; /* Never reached. */
+}
+
+/* In the context of the rdb_load method of a module data type, loads a string
+ * from the RDB file, that was previously saved with RedisModule_SaveString()
+ * functions family.
+ *
+ * The returned string is a newly allocated RedisModuleString object, and
+ * the user should at some point free it with a call to RedisModule_FreeString().
+ *
+ * If the data structure does not store strings as RedisModuleString objects,
+ * the similar function RedisModule_LoadStringBuffer() could be used instead. */
+RedisModuleString *RM_LoadString(RedisModuleIO *io) {
+ return moduleLoadString(io,0,NULL);
+}
+
+/* Like RedisModule_LoadString() but returns an heap allocated string that
+ * was allocated with RedisModule_Alloc(), and can be resized or freed with
+ * RedisModule_Realloc() or RedisModule_Free().
+ *
+ * The size of the string is stored at '*lenptr' if not NULL.
+ * The returned string is not automatically NULL termianted, it is loaded
+ * exactly as it was stored inisde the RDB file. */
+char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) {
+ return moduleLoadString(io,1,lenptr);
+}
+
+/* In the context of the rdb_save method of a module data type, saves a double
+ * value to the RDB file. The double can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadDouble(). */
+void RM_SaveDouble(RedisModuleIO *io, double value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryDoubleValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * double value saved by RedisModule_SaveDouble(). */
+double RM_LoadDouble(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_DOUBLE) goto loaderr;
+ }
+ double value;
+ int retval = rdbLoadBinaryDoubleValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* In the context of the rdb_save method of a module data type, saves a float
+ * value to the RDB file. The float can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadFloat(). */
+void RM_SaveFloat(RedisModuleIO *io, float value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryFloatValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * float value saved by RedisModule_SaveFloat(). */
+float RM_LoadFloat(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_FLOAT) goto loaderr;
+ }
+ float value;
+ int retval = rdbLoadBinaryFloatValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* --------------------------------------------------------------------------
+ * Key digest API (DEBUG DIGEST interface for modules types)
+ * -------------------------------------------------------------------------- */
+
+/* Add a new element to the digest. This function can be called multiple times
+ * one element after the other, for all the elements that constitute a given
+ * data structure. The function call must be followed by the call to
+ * `RedisModule_DigestEndSequence` eventually, when all the elements that are
+ * always in a given order are added. See the Redis Modules data types
+ * documentation for more info. However this is a quick example that uses Redis
+ * data types as an example.
+ *
+ * To add a sequence of unordered elements (for example in the case of a Redis
+ * Set), the pattern to use is:
+ *
+ * foreach element {
+ * AddElement(element);
+ * EndSequence();
+ * }
+ *
+ * Because Sets are not ordered, so every element added has a position that
+ * does not depend from the other. However if instead our elements are
+ * ordered in pairs, like field-value pairs of an Hash, then one should
+ * use:
+ *
+ * foreach key,value {
+ * AddElement(key);
+ * AddElement(value);
+ * EndSquence();
+ * }
+ *
+ * Because the key and value will be always in the above order, while instead
+ * the single key-value pairs, can appear in any position into a Redis hash.
+ *
+ * A list of ordered elements would be implemented with:
+ *
+ * foreach element {
+ * AddElement(element);
+ * }
+ * EndSequence();
+ *
+ */
+void RM_DigestAddStringBuffer(RedisModuleDigest *md, unsigned char *ele, size_t len) {
+ mixDigest(md->o,ele,len);
+}
+
+/* Like `RedisModule_DigestAddStringBuffer()` but takes a long long as input
+ * that gets converted into a string before adding it to the digest. */
+void RM_DigestAddLongLong(RedisModuleDigest *md, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ mixDigest(md->o,buf,len);
+}
+
+/* See the doucmnetation for `RedisModule_DigestAddElement()`. */
+void RM_DigestEndSequence(RedisModuleDigest *md) {
+ xorDigest(md->x,md->o,sizeof(md->o));
+ memset(md->o,0,sizeof(md->o));
+}
+
+/* --------------------------------------------------------------------------
+ * AOF API for modules data types
+ * -------------------------------------------------------------------------- */
+
+/* Emits a command into the AOF during the AOF rewriting process. This function
+ * is only called in the context of the aof_rewrite method of data types exported
+ * by a module. The command works exactly like RedisModule_Call() in the way
+ * the parameters are passed, but it does not return anything as the error
+ * handling is performed by Redis itself. */
+void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) {
+ if (io->error) return;
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "emit unknown command '%s'",
+ io->type->name, cmdname);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Emit the arguments into the AOF in Redis protocol format. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "call RedisModule_EmitAOF() with wrong format specifiers '%s'",
+ io->type->name, fmt);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Bulk count. */
+ if (!io->error && rioWriteBulkCount(io->rio,'*',argc) == 0)
+ io->error = 1;
+
+ /* Arguments. */
+ for (j = 0; j < argc; j++) {
+ if (!io->error && rioWriteBulkObject(io->rio,argv[j]) == 0)
+ io->error = 1;
+ decrRefCount(argv[j]);
+ }
+ zfree(argv);
+ return;
+}
+
+/* --------------------------------------------------------------------------
+ * IO context handling
+ * -------------------------------------------------------------------------- */
+
+RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) {
+ if (io->ctx) return io->ctx; /* Can't have more than one... */
+ RedisModuleCtx ctxtemplate = REDISMODULE_CTX_INIT;
+ io->ctx = zmalloc(sizeof(RedisModuleCtx));
+ *(io->ctx) = ctxtemplate;
+ io->ctx->module = io->type->module;
+ io->ctx->client = NULL;
+ return io->ctx;
+}
+
+/* --------------------------------------------------------------------------
+ * Logging
+ * -------------------------------------------------------------------------- */
+
+/* This is the low level function implementing both:
+ *
+ * RM_Log()
+ * RM_LogIOError()
+ *
+ */
+void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) {
+ char msg[LOG_MAX_LEN];
+ size_t name_len;
+ int level;
+
+ if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG;
+ else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE;
+ else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE;
+ else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING;
+ else level = LL_VERBOSE; /* Default. */
+
+ name_len = snprintf(msg, sizeof(msg),"<%s> ", module->name);
+ vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap);
+ serverLogRaw(level,msg);
+}
+
+/* Produces a log message to the standard Redis log, the format accepts
+ * printf-alike specifiers, while level is a string describing the log
+ * level to use when emitting the log, and must be one of the following:
+ *
+ * * "debug"
+ * * "verbose"
+ * * "notice"
+ * * "warning"
+ *
+ * If the specified log level is invalid, verbose is used by default.
+ * There is a fixed limit to the length of the log line this function is able
+ * to emit, this limti is not specified but is guaranteed to be more than
+ * a few lines of text.
+ */
+void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
+ if (!ctx->module) return; /* Can only log if module is initialized */
+
+ va_list ap;
+ va_start(ap, fmt);
+ RM_LogRaw(ctx->module,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* Log errors from RDB / AOF serialization callbacks.
+ *
+ * This function should be used when a callback is returning a critical
+ * error to the caller since cannot load or save the data for some
+ * critical reason. */
+void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ RM_LogRaw(io->type->module,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* --------------------------------------------------------------------------
+ * Blocking clients from modules
+ * -------------------------------------------------------------------------- */
+
+/* Readable handler for the awake pipe. We do nothing here, the awake bytes
+ * will be actually read in a more appropriate place in the
+ * moduleHandleBlockedClients() function that is where clients are actually
+ * served. */
+void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
+ UNUSED(privdata);
+}
+
+/* This is called from blocked.c in order to unblock a client: may be called
+ * for multiple reasons while the client is in the middle of being blocked
+ * because the client is terminated, but is also called for cleanup when a
+ * client is unblocked in a clean way after replaying.
+ *
+ * What we do here is just to set the client to NULL in the redis module
+ * blocked client handle. This way if the client is terminated while there
+ * is a pending threaded operation involving the blocked client, we'll know
+ * that the client no longer exists and no reply callback should be called.
+ *
+ * The structure RedisModuleBlockedClient will be always deallocated when
+ * running the list of clients blocked by a module that need to be unblocked. */
+void unblockClientFromModule(client *c) {
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+ bc->client = NULL;
+ /* Reset the client for a new query since, for blocking commands implemented
+ * into modules, we do not it immediately after the command returns (and
+ * the client blocks) in order to be still able to access the argument
+ * vector from callbacks. */
+ resetClient(c);
+}
+
+/* Block a client in the context of a blocking command, returning an handle
+ * which will be used, later, in order to unblock the client with a call to
+ * RedisModule_UnblockClient(). The arguments specify callback functions
+ * and a timeout after which the client is unblocked.
+ *
+ * The callbacks are called in the following contexts:
+ *
+ * reply_callback: called after a successful RedisModule_UnblockClient()
+ * call in order to reply to the client and unblock it.
+ *
+ * reply_timeout: called when the timeout is reached in order to send an
+ * error to the client.
+ *
+ * free_privdata: called in order to free the privata data that is passed
+ * by RedisModule_UnblockClient() call.
+ */
+RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms) {
+ client *c = ctx->client;
+ int islua = c->flags & CLIENT_LUA;
+
+ c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient));
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+
+ /* We need to handle the invalid operation of calling modules blocking
+ * commands from Lua. We actually create an already aborted (client set to
+ * NULL) blocked client handle, and actually reply to Lua with an error. */
+ bc->client = islua ? NULL : c;
+ bc->module = ctx->module;
+ bc->reply_callback = reply_callback;
+ bc->timeout_callback = timeout_callback;
+ bc->free_privdata = free_privdata;
+ bc->privdata = NULL;
+ bc->reply_client = createClient(-1);
+ bc->reply_client->flags |= CLIENT_MODULE;
+ bc->dbid = c->db->id;
+ c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0;
+
+ if (islua) {
+ c->bpop.module_blocked_handle = NULL;
+ addReplyError(c,"Blocking module command called from Lua script");
+ } else {
+ blockClient(c,BLOCKED_MODULE);
+ }
+ return bc;
+}
+
+/* Unblock a client blocked by `RedisModule_BlockedClient`. This will trigger
+ * the reply callbacks to be called in order to reply to the client.
+ * The 'privdata' argument will be accessible by the reply callback, so
+ * the caller of this function can pass any value that is needed in order to
+ * actually reply to the client.
+ *
+ * A common usage for 'privdata' is a thread that computes something that
+ * needs to be passed to the client, included but not limited some slow
+ * to compute reply or some reply obtained via networking.
+ *
+ * Note: this function can be called from threads spawned by the module. */
+int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) {
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ bc->privdata = privdata;
+ listAddNodeTail(moduleUnblockedClients,bc);
+ if (write(server.module_blocked_pipe[1],"A",1) != 1) {
+ /* Ignore the error, this is best-effort. */
+ }
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+ return REDISMODULE_OK;
+}
+
+/* Abort a blocked client blocking operation: the client will be unblocked
+ * without firing the reply callback. */
+int RM_AbortBlock(RedisModuleBlockedClient *bc) {
+ bc->reply_callback = NULL;
+ return RM_UnblockClient(bc,NULL);
+}
+
+/* This function will check the moduleUnblockedClients queue in order to
+ * call the reply callback and really unblock the client.
+ *
+ * Clients end into this list because of calls to RM_UnblockClient(),
+ * however it is possible that while the module was doing work for the
+ * blocked client, it was terminated by Redis (for timeout or other reasons).
+ * When this happens the RedisModuleBlockedClient structure in the queue
+ * will have the 'client' field set to NULL. */
+void moduleHandleBlockedClients(void) {
+ listNode *ln;
+ RedisModuleBlockedClient *bc;
+
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ /* Here we unblock all the pending clients blocked in modules operations
+ * so we can read every pending "awake byte" in the pipe. */
+ char buf[1];
+ while (read(server.module_blocked_pipe[0],buf,1) == 1);
+ while (listLength(moduleUnblockedClients)) {
+ ln = listFirst(moduleUnblockedClients);
+ bc = ln->value;
+ client *c = bc->client;
+ listDelNode(moduleUnblockedClients,ln);
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+
+ /* Release the lock during the loop, as long as we don't
+ * touch the shared list. */
+
+ /* Call the reply callback if the client is valid and we have
+ * any callback. */
+ if (c && bc->reply_callback) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY;
+ ctx.blocked_privdata = bc->privdata;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ bc->reply_callback(&ctx,(void**)c->argv,c->argc);
+ moduleHandlePropagationAfterCommandCallback(&ctx);
+ moduleFreeContext(&ctx);
+ }
+
+ /* Free privdata if any. */
+ if (bc->privdata && bc->free_privdata)
+ bc->free_privdata(bc->privdata);
+
+ /* It is possible that this blocked client object accumulated
+ * replies to send to the client in a thread safe context.
+ * We need to glue such replies to the client output buffer and
+ * free the temporary client we just used for the replies. */
+ if (c) {
+ if (bc->reply_client->bufpos)
+ addReplyString(c,bc->reply_client->buf,
+ bc->reply_client->bufpos);
+ if (listLength(bc->reply_client->reply))
+ listJoin(c->reply,bc->reply_client->reply);
+ c->reply_bytes += bc->reply_client->reply_bytes;
+ }
+ freeClient(bc->reply_client);
+
+ if (c != NULL) {
+ unblockClient(c);
+ /* Put the client in the list of clients that need to write
+ * if there are pending replies here. This is needed since
+ * during a non blocking command the client may receive output. */
+ if (clientHasPendingReplies(c) &&
+ !(c->flags & CLIENT_PENDING_WRITE))
+ {
+ c->flags |= CLIENT_PENDING_WRITE;
+ listAddNodeHead(server.clients_pending_write,c);
+ }
+ }
+
+ /* Free 'bc' only after unblocking the client, since it is
+ * referenced in the client blocking context, and must be valid
+ * when calling unblockClient(). */
+ zfree(bc);
+
+ /* Lock again before to iterate the loop. */
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ }
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+}
+
+/* Called when our client timed out. After this function unblockClient()
+ * is called, and it will invalidate the blocked client. So this function
+ * does not need to do any cleanup. Eventually the module will call the
+ * API to unblock the client and the memory will be released. */
+void moduleBlockedClientTimedOut(client *c) {
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.flags |= REDISMODULE_CTX_BLOCKED_TIMEOUT;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
+ moduleFreeContext(&ctx);
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client. */
+int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_REPLY) != 0;
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client that timed out. */
+int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_TIMEOUT) != 0;
+}
+
+/* Get the privata data set by RedisModule_UnblockClient() */
+void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) {
+ return ctx->blocked_privdata;
+}
+
+/* --------------------------------------------------------------------------
+ * Thread Safe Contexts
+ * -------------------------------------------------------------------------- */
+
+/* Return a context which can be used inside threads to make Redis context
+ * calls with certain modules APIs. If 'bc' is not NULL then the module will
+ * be bound to a blocked client, and it will be possible to use the
+ * `RedisModule_Reply*` family of functions to accumulate a reply for when the
+ * client will be unblocked. Otherwise the thread safe context will be
+ * detached by a specific client.
+ *
+ * To call non-reply APIs, the thread safe context must be prepared with:
+ *
+ * RedisModule_ThreadSafeCallStart(ctx);
+ * ... make your call here ...
+ * RedisModule_ThreadSafeCallStop(ctx);
+ *
+ * This is not needed when using `RedisModule_Reply*` functions, assuming
+ * that a blocked client was used when the context was created, otherwise
+ * no RedisModule_Reply* call should be made at all.
+ *
+ * TODO: thread safe contexts do not inherit the blocked client
+ * selected database. */
+RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) {
+ RedisModuleCtx *ctx = zmalloc(sizeof(*ctx));
+ RedisModuleCtx empty = REDISMODULE_CTX_INIT;
+ memcpy(ctx,&empty,sizeof(empty));
+ if (bc) {
+ ctx->blocked_client = bc;
+ ctx->module = bc->module;
+ }
+ ctx->flags |= REDISMODULE_CTX_THREAD_SAFE;
+ /* Even when the context is associated with a blocked client, we can't
+ * access it safely from another thread, so we create a fake client here
+ * in order to keep things like the currently selected database and similar
+ * things. */
+ ctx->client = createClient(-1);
+ if (bc) selectDb(ctx->client,bc->dbid);
+ return ctx;
+}
+
+/* Release a thread safe context. */
+void RM_FreeThreadSafeContext(RedisModuleCtx *ctx) {
+ moduleFreeContext(ctx);
+ zfree(ctx);
+}
+
+/* Acquire the server lock before executing a thread safe API call.
+ * This is not needed for `RedisModule_Reply*` calls when there is
+ * a blocked client connected to the thread safe context. */
+void RM_ThreadSafeContextLock(RedisModuleCtx *ctx) {
+ DICT_NOTUSED(ctx);
+ moduleAcquireGIL();
+}
+
+/* Release the server lock after a thread safe API call was executed. */
+void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) {
+ DICT_NOTUSED(ctx);
+ moduleReleaseGIL();
+}
+
+void moduleAcquireGIL(void) {
+ pthread_mutex_lock(&moduleGIL);
+}
+
+void moduleReleaseGIL(void) {
+ pthread_mutex_unlock(&moduleGIL);
+}
+
+/* --------------------------------------------------------------------------
+ * Modules API internals
+ * -------------------------------------------------------------------------- */
+
+/* server.moduleapi dictionary type. Only uses plain C strings since
+ * this gets queries from modules. */
+
+uint64_t dictCStringKeyHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+int dictCStringKeyCompare(void *privdata, const void *key1, const void *key2) {
+ DICT_NOTUSED(privdata);
+ return strcmp(key1,key2) == 0;
+}
+
+dictType moduleAPIDictType = {
+ dictCStringKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictCStringKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+int moduleRegisterApi(const char *funcname, void *funcptr) {
+ return dictAdd(server.moduleapi, (char*)funcname, funcptr);
+}
+
+#define REGISTER_API(name) \
+ moduleRegisterApi("RedisModule_" #name, (void *)(unsigned long)RM_ ## name)
+
+/* Global initialization at Redis startup. */
+void moduleRegisterCoreAPI(void);
+
+void moduleInitModulesSystem(void) {
+ moduleUnblockedClients = listCreate();
+
+ server.loadmodule_queue = listCreate();
+ modules = dictCreate(&modulesDictType,NULL);
+ moduleRegisterCoreAPI();
+ if (pipe(server.module_blocked_pipe) == -1) {
+ serverLog(LL_WARNING,
+ "Can't create the pipe for module blocking commands: %s",
+ strerror(errno));
+ exit(1);
+ }
+ /* Make the pipe non blocking. This is just a best effort aware mechanism
+ * and we do not want to block not in the read nor in the write half. */
+ anetNonBlock(NULL,server.module_blocked_pipe[0]);
+ anetNonBlock(NULL,server.module_blocked_pipe[1]);
+
+ /* Our thread-safe contexts GIL must start with already locked:
+ * it is just unlocked when it's safe. */
+ pthread_mutex_lock(&moduleGIL);
+}
+
+/* Load all the modules in the server.loadmodule_queue list, which is
+ * populated by `loadmodule` directives in the configuration file.
+ * We can't load modules directly when processing the configuration file
+ * because the server must be fully initialized before loading modules.
+ *
+ * The function aborts the server on errors, since to start with missing
+ * modules is not considered sane: clients may rely on the existance of
+ * given commands, loading AOF also may need some modules to exist, and
+ * if this instance is a slave, it must understand commands from master. */
+void moduleLoadFromQueue(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.loadmodule_queue,&li);
+ while((ln = listNext(&li))) {
+ struct moduleLoadQueueEntry *loadmod = ln->value;
+ if (moduleLoad(loadmod->path,(void **)loadmod->argv,loadmod->argc)
+ == C_ERR)
+ {
+ serverLog(LL_WARNING,
+ "Can't load module from %s: server aborting",
+ loadmod->path);
+ exit(1);
+ }
+ }
+}
+
+void moduleFreeModuleStructure(struct RedisModule *module) {
+ listRelease(module->types);
+ sdsfree(module->name);
+ zfree(module);
+}
+
+/* Load a module and initialize it. On success C_OK is returned, otherwise
+ * C_ERR is returned. */
+int moduleLoad(const char *path, void **module_argv, int module_argc) {
+ int (*onload)(void *, void **, int);
+ void *handle;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ handle = dlopen(path,RTLD_NOW|RTLD_LOCAL);
+ if (handle == NULL) {
+ serverLog(LL_WARNING, "Module %s failed to load: %s", path, dlerror());
+ return C_ERR;
+ }
+ onload = (int (*)(void *, void **, int))(unsigned long) dlsym(handle,"RedisModule_OnLoad");
+ if (onload == NULL) {
+ serverLog(LL_WARNING,
+ "Module %s does not export RedisModule_OnLoad() "
+ "symbol. Module not loaded.",path);
+ return C_ERR;
+ }
+ if (onload((void*)&ctx,module_argv,module_argc) == REDISMODULE_ERR) {
+ if (ctx.module) moduleFreeModuleStructure(ctx.module);
+ dlclose(handle);
+ serverLog(LL_WARNING,
+ "Module %s initialization failed. Module not loaded",path);
+ return C_ERR;
+ }
+
+ /* Redis module loaded! Register it. */
+ dictAdd(modules,ctx.module->name,ctx.module);
+ ctx.module->handle = handle;
+ serverLog(LL_NOTICE,"Module '%s' loaded from %s",ctx.module->name,path);
+ moduleFreeContext(&ctx);
+ return C_OK;
+}
+
+/* Unload the module registered with the specified name. On success
+ * C_OK is returned, otherwise C_ERR is returned and errno is set
+ * to the following values depending on the type of error:
+ *
+ * * ENONET: No such module having the specified name.
+ * * EBUSY: The module exports a new data type and can only be reloaded. */
+int moduleUnload(sds name) {
+ struct RedisModule *module = dictFetchValue(modules,name);
+
+ if (module == NULL) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+
+ if (listLength(module->types)) {
+ errno = EBUSY;
+ return REDISMODULE_ERR;
+ }
+
+ /* Unregister all the commands registered by this module. */
+ dictIterator *di = dictGetSafeIterator(server.commands);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (cmd->proc == RedisModuleCommandDispatcher) {
+ RedisModuleCommandProxy *cp =
+ (void*)(unsigned long)cmd->getkeys_proc;
+ sds cmdname = cp->rediscmd->name;
+ if (cp->module == module) {
+ dictDelete(server.commands,cmdname);
+ dictDelete(server.orig_commands,cmdname);
+ sdsfree(cmdname);
+ zfree(cp->rediscmd);
+ zfree(cp);
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Unregister all the hooks. TODO: Yet no hooks support here. */
+
+ /* Unload the dynamic library. */
+ if (dlclose(module->handle) == -1) {
+ char *error = dlerror();
+ if (error == NULL) error = "Unknown error";
+ serverLog(LL_WARNING,"Error when trying to close the %s module: %s",
+ module->name, error);
+ }
+
+ /* Remove from list of modules. */
+ serverLog(LL_NOTICE,"Module %s unloaded",module->name);
+ dictDelete(modules,module->name);
+ module->name = NULL; /* The name was already freed by dictDelete(). */
+ moduleFreeModuleStructure(module);
+
+ return REDISMODULE_OK;
+}
+
+/* Redis MODULE command.
+ *
+ * MODULE LOAD <path> [args...] */
+void moduleCommand(client *c) {
+ char *subcmd = c->argv[1]->ptr;
+
+ if (!strcasecmp(subcmd,"load") && c->argc >= 3) {
+ robj **argv = NULL;
+ int argc = 0;
+
+ if (c->argc > 3) {
+ argc = c->argc - 3;
+ argv = &c->argv[3];
+ }
+
+ if (moduleLoad(c->argv[2]->ptr,(void **)argv,argc) == C_OK)
+ addReply(c,shared.ok);
+ else
+ addReplyError(c,
+ "Error loading the extension. Please check the server logs.");
+ } else if (!strcasecmp(subcmd,"unload") && c->argc == 3) {
+ if (moduleUnload(c->argv[2]->ptr) == C_OK)
+ addReply(c,shared.ok);
+ else {
+ char *errmsg;
+ switch(errno) {
+ case ENOENT:
+ errmsg = "no such module with that name";
+ break;
+ case EBUSY:
+ errmsg = "the module exports one or more module-side data types, can't unload";
+ break;
+ default:
+ errmsg = "operation not possible.";
+ break;
+ }
+ addReplyErrorFormat(c,"Error unloading module: %s",errmsg);
+ }
+ } else if (!strcasecmp(subcmd,"list") && c->argc == 2) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ addReplyMultiBulkLen(c,dictSize(modules));
+ while ((de = dictNext(di)) != NULL) {
+ sds name = dictGetKey(de);
+ struct RedisModule *module = dictGetVal(de);
+ addReplyMultiBulkLen(c,4);
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,name,sdslen(name));
+ addReplyBulkCString(c,"ver");
+ addReplyLongLong(c,module->ver);
+ }
+ dictReleaseIterator(di);
+ } else {
+ addReply(c,shared.syntaxerr);
+ }
+}
+
+/* Return the number of registered modules. */
+size_t moduleCount(void) {
+ return dictSize(modules);
+}
+
+/* Register all the APIs we export. Keep this function at the end of the
+ * file so that's easy to seek it to add new entries. */
+void moduleRegisterCoreAPI(void) {
+ server.moduleapi = dictCreate(&moduleAPIDictType,NULL);
+ REGISTER_API(Alloc);
+ REGISTER_API(Calloc);
+ REGISTER_API(Realloc);
+ REGISTER_API(Free);
+ REGISTER_API(Strdup);
+ REGISTER_API(CreateCommand);
+ REGISTER_API(SetModuleAttribs);
+ REGISTER_API(WrongArity);
+ REGISTER_API(ReplyWithLongLong);
+ REGISTER_API(ReplyWithError);
+ REGISTER_API(ReplyWithSimpleString);
+ REGISTER_API(ReplyWithArray);
+ REGISTER_API(ReplySetArrayLength);
+ REGISTER_API(ReplyWithString);
+ REGISTER_API(ReplyWithStringBuffer);
+ REGISTER_API(ReplyWithNull);
+ REGISTER_API(ReplyWithCallReply);
+ REGISTER_API(ReplyWithDouble);
+ REGISTER_API(GetSelectedDb);
+ REGISTER_API(SelectDb);
+ REGISTER_API(OpenKey);
+ REGISTER_API(CloseKey);
+ REGISTER_API(KeyType);
+ REGISTER_API(ValueLength);
+ REGISTER_API(ListPush);
+ REGISTER_API(ListPop);
+ REGISTER_API(StringToLongLong);
+ REGISTER_API(StringToDouble);
+ REGISTER_API(Call);
+ REGISTER_API(CallReplyProto);
+ REGISTER_API(FreeCallReply);
+ REGISTER_API(CallReplyInteger);
+ REGISTER_API(CallReplyType);
+ REGISTER_API(CallReplyLength);
+ REGISTER_API(CallReplyArrayElement);
+ REGISTER_API(CallReplyStringPtr);
+ REGISTER_API(CreateStringFromCallReply);
+ REGISTER_API(CreateString);
+ REGISTER_API(CreateStringFromLongLong);
+ REGISTER_API(CreateStringFromString);
+ REGISTER_API(CreateStringPrintf);
+ REGISTER_API(FreeString);
+ REGISTER_API(StringPtrLen);
+ REGISTER_API(AutoMemory);
+ REGISTER_API(Replicate);
+ REGISTER_API(ReplicateVerbatim);
+ REGISTER_API(DeleteKey);
+ REGISTER_API(StringSet);
+ REGISTER_API(StringDMA);
+ REGISTER_API(StringTruncate);
+ REGISTER_API(SetExpire);
+ REGISTER_API(GetExpire);
+ REGISTER_API(ZsetAdd);
+ REGISTER_API(ZsetIncrby);
+ REGISTER_API(ZsetScore);
+ REGISTER_API(ZsetRem);
+ REGISTER_API(ZsetRangeStop);
+ REGISTER_API(ZsetFirstInScoreRange);
+ REGISTER_API(ZsetLastInScoreRange);
+ REGISTER_API(ZsetFirstInLexRange);
+ REGISTER_API(ZsetLastInLexRange);
+ REGISTER_API(ZsetRangeCurrentElement);
+ REGISTER_API(ZsetRangeNext);
+ REGISTER_API(ZsetRangePrev);
+ REGISTER_API(ZsetRangeEndReached);
+ REGISTER_API(HashSet);
+ REGISTER_API(HashGet);
+ REGISTER_API(IsKeysPositionRequest);
+ REGISTER_API(KeyAtPos);
+ REGISTER_API(GetClientId);
+ REGISTER_API(PoolAlloc);
+ REGISTER_API(CreateDataType);
+ REGISTER_API(ModuleTypeSetValue);
+ REGISTER_API(ModuleTypeGetType);
+ REGISTER_API(ModuleTypeGetValue);
+ REGISTER_API(SaveUnsigned);
+ REGISTER_API(LoadUnsigned);
+ REGISTER_API(SaveSigned);
+ REGISTER_API(LoadSigned);
+ REGISTER_API(SaveString);
+ REGISTER_API(SaveStringBuffer);
+ REGISTER_API(LoadString);
+ REGISTER_API(LoadStringBuffer);
+ REGISTER_API(SaveDouble);
+ REGISTER_API(LoadDouble);
+ REGISTER_API(SaveFloat);
+ REGISTER_API(LoadFloat);
+ REGISTER_API(EmitAOF);
+ REGISTER_API(Log);
+ REGISTER_API(LogIOError);
+ REGISTER_API(StringAppendBuffer);
+ REGISTER_API(RetainString);
+ REGISTER_API(StringCompare);
+ REGISTER_API(GetContextFromIO);
+ REGISTER_API(BlockClient);
+ REGISTER_API(UnblockClient);
+ REGISTER_API(IsBlockedReplyRequest);
+ REGISTER_API(IsBlockedTimeoutRequest);
+ REGISTER_API(GetBlockedClientPrivateData);
+ REGISTER_API(AbortBlock);
+ REGISTER_API(Milliseconds);
+ REGISTER_API(GetThreadSafeContext);
+ REGISTER_API(FreeThreadSafeContext);
+ REGISTER_API(ThreadSafeContextLock);
+ REGISTER_API(ThreadSafeContextUnlock);
+ REGISTER_API(DigestAddStringBuffer);
+ REGISTER_API(DigestAddLongLong);
+ REGISTER_API(DigestEndSequence);
+}
diff --git a/src/modules/.gitignore b/src/modules/.gitignore
new file mode 100644
index 000000000..4de1735ec
--- /dev/null
+++ b/src/modules/.gitignore
@@ -0,0 +1,2 @@
+*.so
+*.xo
diff --git a/src/modules/Makefile b/src/modules/Makefile
new file mode 100644
index 000000000..066e65e9b
--- /dev/null
+++ b/src/modules/Makefile
@@ -0,0 +1,42 @@
+
+# find the OS
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+
+# Compile flags for linux / osx
+ifeq ($(uname_S),Linux)
+ SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -shared
+else
+ SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
+endif
+
+.SUFFIXES: .c .so .xo .o
+
+all: helloworld.so hellotype.so helloblock.so testmodule.so
+
+.c.xo:
+ $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
+
+helloworld.xo: ../redismodule.h
+
+helloworld.so: helloworld.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellotype.xo: ../redismodule.h
+
+hellotype.so: hellotype.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+helloblock.xo: ../redismodule.h
+
+helloblock.so: helloblock.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lpthread -lc
+
+testmodule.xo: ../redismodule.h
+
+testmodule.so: testmodule.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+clean:
+ rm -rf *.xo *.so
diff --git a/src/modules/gendoc.rb b/src/modules/gendoc.rb
new file mode 100644
index 000000000..516f5d795
--- /dev/null
+++ b/src/modules/gendoc.rb
@@ -0,0 +1,51 @@
+# gendoc.rb -- Converts the top-comments inside module.c to modules API
+# reference documentaiton in markdown format.
+
+# Convert the C comment to markdown
+def markdown(s)
+ s = s.gsub(/\*\/$/,"")
+ s = s.gsub(/^ \* {0,1}/,"")
+ s = s.gsub(/^\/\* /,"")
+ s.chop! while s[-1] == "\n" || s[-1] == " "
+ lines = s.split("\n")
+ newlines = []
+ lines.each{|l|
+ if l[0] != ' '
+ l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"}
+ l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"}
+ l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"}
+ end
+ newlines << l
+ }
+ return newlines.join("\n")
+end
+
+# Given the source code array and the index at which an exported symbol was
+# detected, extracts and outputs the documentation.
+def docufy(src,i)
+ m = /RM_[A-z0-9]+/.match(src[i])
+ name = m[0]
+ name = name.sub("RM_","RedisModule_")
+ proto = src[i].sub("{","").strip+";\n"
+ proto = proto.sub("RM_","RedisModule_")
+ puts "## `#{name}`\n\n"
+ puts " #{proto}\n"
+ comment = ""
+ while true
+ i = i-1
+ comment = src[i]+comment
+ break if src[i] =~ /\/\*/
+ end
+ comment = markdown(comment)
+ puts comment+"\n\n"
+end
+
+puts "# Modules API reference\n\n"
+src = File.open("../module.c").to_a
+src.each_with_index{|line,i|
+ if line =~ /RM_/ && line[0] != ' ' && line[0] != '#' && line[0] != '/'
+ if src[i-1] =~ /\*\//
+ docufy(src,i)
+ end
+ end
+}
diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c
new file mode 100644
index 000000000..c74fcd30f
--- /dev/null
+++ b/src/modules/helloblock.c
@@ -0,0 +1,196 @@
+/* Helloblock module -- An example of blocking command implementation
+ * with threads.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_EXPERIMENTAL_API
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+/* Reply callback for blocking command HELLO.BLOCK */
+int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
+ return RedisModule_ReplyWithLongLong(ctx,*myint);
+}
+
+/* Timeout callback for blocking command HELLO.BLOCK */
+int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
+}
+
+/* Private data freeing callback for HELLO.BLOCK command. */
+void HelloBlock_FreeData(void *privdata) {
+ RedisModule_Free(privdata);
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.BLOCK. */
+void *HelloBlock_ThreadMain(void *arg) {
+ void **targ = arg;
+ RedisModuleBlockedClient *bc = targ[0];
+ long long delay = (unsigned long)targ[1];
+ RedisModule_Free(targ);
+
+ sleep(delay);
+ int *r = RedisModule_Alloc(sizeof(int));
+ *r = rand();
+ RedisModule_UnblockClient(bc,r);
+ return NULL;
+}
+
+/* HELLO.BLOCK <delay> <timeout> -- Block for <count> seconds, then reply with
+ * a random number. Timeout is the command timeout, so that you can test
+ * what happens when the delay is greater than the timeout. */
+int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*2);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+
+ if (pthread_create(&tid,NULL,HelloBlock_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.KEYS.
+ *
+ * Note: this implementation is very simple on purpose, so no duplicated
+ * keys (returned by SCAN) are filtered. However adding such a functionality
+ * would be trivial just using any data structure implementing a dictionary
+ * in order to filter the duplicated items. */
+void *HelloKeys_ThreadMain(void *arg) {
+ RedisModuleBlockedClient *bc = arg;
+ RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc);
+ long long cursor = 0;
+ size_t replylen = 0;
+
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ do {
+ RedisModule_ThreadSafeContextLock(ctx);
+ RedisModuleCallReply *reply = RedisModule_Call(ctx,
+ "SCAN","l",(long long)cursor);
+ RedisModule_ThreadSafeContextUnlock(ctx);
+
+ RedisModuleCallReply *cr_cursor =
+ RedisModule_CallReplyArrayElement(reply,0);
+ RedisModuleCallReply *cr_keys =
+ RedisModule_CallReplyArrayElement(reply,1);
+
+ RedisModuleString *s = RedisModule_CreateStringFromCallReply(cr_cursor);
+ RedisModule_StringToLongLong(s,&cursor);
+ RedisModule_FreeString(ctx,s);
+
+ size_t items = RedisModule_CallReplyLength(cr_keys);
+ for (size_t j = 0; j < items; j++) {
+ RedisModuleCallReply *ele =
+ RedisModule_CallReplyArrayElement(cr_keys,j);
+ RedisModule_ReplyWithCallReply(ctx,ele);
+ replylen++;
+ }
+ RedisModule_FreeCallReply(reply);
+ } while (cursor != 0);
+ RedisModule_ReplySetArrayLength(ctx,replylen);
+
+ RedisModule_FreeThreadSafeContext(ctx);
+ RedisModule_UnblockClient(bc,NULL);
+ return NULL;
+}
+
+/* HELLO.KEYS -- Return all the keys in the current database without blocking
+ * the server. The keys do not represent a point-in-time state so only the keys
+ * that were in the database from the start to the end are guaranteed to be
+ * there. */
+int HelloKeys_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ if (argc != 1) return RedisModule_WrongArity(ctx);
+
+ pthread_t tid;
+
+ /* Note that when blocking the client we do not set any callback: no
+ * timeout is possible since we passed '0', nor we need a reply callback
+ * because we'll use the thread safe context to accumulate a reply. */
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,NULL,NULL,NULL,0);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the reference to the blocked client handle. */
+ if (pthread_create(&tid,NULL,HelloKeys_ThreadMain,bc) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"helloblock",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.block",
+ HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx,"hello.keys",
+ HelloKeys_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c
new file mode 100644
index 000000000..ba634c4a1
--- /dev/null
+++ b/src/modules/hellotype.c
@@ -0,0 +1,286 @@
+/* This file implements a new module native data type called "HELLOTYPE".
+ * The data structure implemented is a very simple ordered linked list of
+ * 64 bit integers, in order to have something that is real world enough, but
+ * at the same time, extremely simple to understand, to show how the API
+ * works, how a new data type is created, and how to write basic methods
+ * for RDB loading, saving and AOF rewriting.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdint.h>
+
+static RedisModuleType *HelloType;
+
+/* ========================== Internal data structure =======================
+ * This is just a linked list of 64 bit integers where elements are inserted
+ * in-place, so it's ordered. There is no pop/push operation but just insert
+ * because it is enough to show the implementation of new data types without
+ * making things complex. */
+
+struct HelloTypeNode {
+ int64_t value;
+ struct HelloTypeNode *next;
+};
+
+struct HelloTypeObject {
+ struct HelloTypeNode *head;
+ size_t len; /* Number of elements added. */
+};
+
+struct HelloTypeObject *createHelloTypeObject(void) {
+ struct HelloTypeObject *o;
+ o = RedisModule_Alloc(sizeof(*o));
+ o->head = NULL;
+ o->len = 0;
+ return o;
+}
+
+void HelloTypeInsert(struct HelloTypeObject *o, int64_t ele) {
+ struct HelloTypeNode *next = o->head, *newnode, *prev = NULL;
+
+ while(next && next->value < ele) {
+ prev = next;
+ next = next->next;
+ }
+ newnode = RedisModule_Alloc(sizeof(*newnode));
+ newnode->value = ele;
+ newnode->next = next;
+ if (prev) {
+ prev->next = newnode;
+ } else {
+ o->head = newnode;
+ }
+ o->len++;
+}
+
+void HelloTypeReleaseObject(struct HelloTypeObject *o) {
+ struct HelloTypeNode *cur, *next;
+ cur = o->head;
+ while(cur) {
+ next = cur->next;
+ RedisModule_Free(cur);
+ cur = next;
+ }
+ RedisModule_Free(o);
+}
+
+/* ========================= "hellotype" type commands ======================= */
+
+/* HELLOTYPE.INSERT key value */
+int HelloTypeInsert_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long value;
+ if ((RedisModule_StringToLongLong(argv[2],&value) != REDISMODULE_OK)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid value: must be a signed 64 bit integer");
+ }
+
+ /* Create an empty value object if the key is currently empty. */
+ struct HelloTypeObject *hto;
+ if (type == REDISMODULE_KEYTYPE_EMPTY) {
+ hto = createHelloTypeObject();
+ RedisModule_ModuleTypeSetValue(key,HelloType,hto);
+ } else {
+ hto = RedisModule_ModuleTypeGetValue(key);
+ }
+
+ /* Insert the new element. */
+ HelloTypeInsert(hto,value);
+
+ RedisModule_ReplyWithLongLong(ctx,hto->len);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.RANGE key first count */
+int HelloTypeRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long first, count;
+ if (RedisModule_StringToLongLong(argv[2],&first) != REDISMODULE_OK ||
+ RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK ||
+ first < 0 || count < 0)
+ {
+ return RedisModule_ReplyWithError(ctx,
+ "ERR invalid first or count parameters");
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ struct HelloTypeNode *node = hto ? hto->head : NULL;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ long long arraylen = 0;
+ while(node && count--) {
+ RedisModule_ReplyWithLongLong(ctx,node->value);
+ arraylen++;
+ node = node->next;
+ }
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.LEN key */
+int HelloTypeLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ RedisModule_ReplyWithLongLong(ctx,hto ? hto->len : 0);
+ return REDISMODULE_OK;
+}
+
+
+/* ========================== "hellotype" type methods ======================= */
+
+void *HelloTypeRdbLoad(RedisModuleIO *rdb, int encver) {
+ if (encver != 0) {
+ /* RedisModule_Log("warning","Can't load data with version %d", encver);*/
+ return NULL;
+ }
+ uint64_t elements = RedisModule_LoadUnsigned(rdb);
+ struct HelloTypeObject *hto = createHelloTypeObject();
+ while(elements--) {
+ int64_t ele = RedisModule_LoadSigned(rdb);
+ HelloTypeInsert(hto,ele);
+ }
+ return hto;
+}
+
+void HelloTypeRdbSave(RedisModuleIO *rdb, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ RedisModule_SaveUnsigned(rdb,hto->len);
+ while(node) {
+ RedisModule_SaveSigned(rdb,node->value);
+ node = node->next;
+ }
+}
+
+void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_EmitAOF(aof,"HELLOTYPE.INSERT","sl",key,node->value);
+ node = node->next;
+ }
+}
+
+/* The goal of this function is to return the amount of memory used by
+ * the HelloType value. */
+size_t HelloTypeMemUsage(const void *value) {
+ const struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ return sizeof(*hto) + sizeof(*node)*hto->len;
+}
+
+void HelloTypeFree(void *value) {
+ HelloTypeReleaseObject(value);
+}
+
+void HelloTypeDigest(RedisModuleDigest *md, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_DigestAddLongLong(md,node->value);
+ node = node->next;
+ }
+ RedisModule_DigestEndSequence(md);
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellotype",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ RedisModuleTypeMethods tm = {
+ .version = REDISMODULE_TYPE_METHOD_VERSION,
+ .rdb_load = HelloTypeRdbLoad,
+ .rdb_save = HelloTypeRdbSave,
+ .aof_rewrite = HelloTypeAofRewrite,
+ .mem_usage = HelloTypeMemUsage,
+ .free = HelloTypeFree,
+ .digest = HelloTypeDigest
+ };
+
+ HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,&tm);
+ if (HelloType == NULL) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.insert",
+ HelloTypeInsert_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.range",
+ HelloTypeRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.len",
+ HelloTypeLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c
new file mode 100644
index 000000000..3b00dea77
--- /dev/null
+++ b/src/modules/helloworld.c
@@ -0,0 +1,621 @@
+/* Helloworld module -- A few examples of the Redis Modules API in the form
+ * of commands showing how to accomplish common tasks.
+ *
+ * This module does not do anything useful, if not for a few commands. The
+ * examples are designed in order to show the API.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* HELLO.SIMPLE is among the simplest commands you can implement.
+ * It just returns the currently selected DB id, a functionality which is
+ * missing in Redis. The command uses two important API calls: one to
+ * fetch the currently selected DB, the other in order to send the client
+ * an integer reply as response. */
+int HelloSimple_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_ReplyWithLongLong(ctx,RedisModule_GetSelectedDb(ctx));
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.NATIVE re-implements RPUSH, and shows the low level modules API
+ * where you can "open" keys, make low level operations, create new keys by
+ * pushing elements into non-existing keys, and so forth.
+ *
+ * You'll find this command to be roughly as fast as the actual RPUSH
+ * command. */
+int HelloPushNative_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ RedisModule_ListPush(key,REDISMODULE_LIST_TAIL,argv[2]);
+ size_t newlen = RedisModule_ValueLength(key);
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithLongLong(ctx,newlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL implements RPUSH using an higher level approach, calling
+ * a Redis command instead of working with the key in a low level way. This
+ * approach is useful when you need to call Redis commands that are not
+ * available as low level APIs, or when you don't need the maximum speed
+ * possible but instead prefer implementation simplicity. */
+int HelloPushCall_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ long long len = RedisModule_CallReplyInteger(reply);
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL2
+ * This is exaxctly as HELLO.PUSH.CALL, but shows how we can reply to the
+ * client using directly a reply object that Call() returned. */
+int HelloPushCall2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ RedisModule_ReplyWithCallReply(ctx,reply);
+ RedisModule_FreeCallReply(reply);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SUM.LEN returns the total length of all the items inside
+ * a Redis list, by using the high level Call() API.
+ * This command is an example of the array reply access. */
+int HelloListSumLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"LRANGE","sll",argv[1],(long long)0,(long long)-1);
+ size_t strlen = 0;
+ size_t items = RedisModule_CallReplyLength(reply);
+ size_t j;
+ for (j = 0; j < items; j++) {
+ RedisModuleCallReply *ele = RedisModule_CallReplyArrayElement(reply,j);
+ strlen += RedisModule_CallReplyLength(ele);
+ }
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,strlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SPLICE srclist dstlist count
+ * Moves 'count' elements from the tail of 'srclist' to the head of
+ * 'dstlist'. If less than count elements are available, it moves as much
+ * elements as possible. */
+int HelloListSplice_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0)) {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ RedisModule_FreeString(ctx,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* Like the HELLO.LIST.SPLICE above, but uses automatic memory management
+ * in order to avoid freeing stuff. */
+int HelloListSpliceAuto_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0))
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.RAND.ARRAY <count>
+ * Shows how to generate arrays as commands replies.
+ * It just outputs <count> random numbers. */
+int HelloRandArray_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ long long count;
+ if (RedisModule_StringToLongLong(argv[1],&count) != REDISMODULE_OK ||
+ count < 0)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+
+ /* To reply with an array, we call RedisModule_ReplyWithArray() followed
+ * by other "count" calls to other reply functions in order to generate
+ * the elements of the array. */
+ RedisModule_ReplyWithArray(ctx,count);
+ while(count--) RedisModule_ReplyWithLongLong(ctx,rand());
+ return REDISMODULE_OK;
+}
+
+/* This is a simple command to test replication. Because of the "!" modified
+ * in the RedisModule_Call() call, the two INCRs get replicated.
+ * Also note how the ECHO is replicated in an unexpected position (check
+ * comments the function implementation). */
+int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_AutoMemory(ctx);
+
+ /* This will be replicated *after* the two INCR statements, since
+ * the Call() replication has precedence, so the actual replication
+ * stream will be:
+ *
+ * MULTI
+ * INCR foo
+ * INCR bar
+ * ECHO c foo
+ * EXEC
+ */
+ RedisModule_Replicate(ctx,"ECHO","c","foo");
+
+ /* Using the "!" modifier we replicate the command if it
+ * modified the dataset in some way. */
+ RedisModule_Call(ctx,"INCR","c!","foo");
+ RedisModule_Call(ctx,"INCR","c!","bar");
+
+ RedisModule_ReplyWithLongLong(ctx,0);
+
+ return REDISMODULE_OK;
+}
+
+/* Another command to show replication. In this case, we call
+ * RedisModule_ReplicateVerbatim() to mean we want just the command to be
+ * propagated to slaves / AOF exactly as it was called by the user.
+ *
+ * This command also shows how to work with string objects.
+ * It takes a list, and increments all the elements (that must have
+ * a numerical value) by 1, returning the sum of all the elements
+ * as reply.
+ *
+ * Usage: HELLO.REPL2 <list-key> */
+int HelloRepl2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST)
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+
+ size_t listlen = RedisModule_ValueLength(key);
+ long long sum = 0;
+
+ /* Rotate and increment. */
+ while(listlen--) {
+ RedisModuleString *ele = RedisModule_ListPop(key,REDISMODULE_LIST_TAIL);
+ long long val;
+ if (RedisModule_StringToLongLong(ele,&val) != REDISMODULE_OK) val = 0;
+ val++;
+ sum += val;
+ RedisModuleString *newele = RedisModule_CreateStringFromLongLong(ctx,val);
+ RedisModule_ListPush(key,REDISMODULE_LIST_HEAD,newele);
+ }
+ RedisModule_ReplyWithLongLong(ctx,sum);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* This is an example of strings DMA access. Given a key containing a string
+ * it toggles the case of each character from lower to upper case or the
+ * other way around.
+ *
+ * No automatic memory management is used in this example (for the sake
+ * of variety).
+ *
+ * HELLO.TOGGLE.CASE key */
+int HelloToggleCase_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ int keytype = RedisModule_KeyType(key);
+ if (keytype != REDISMODULE_KEYTYPE_STRING &&
+ keytype != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ RedisModule_CloseKey(key);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (keytype == REDISMODULE_KEYTYPE_STRING) {
+ size_t len, j;
+ char *s = RedisModule_StringDMA(key,&len,REDISMODULE_WRITE);
+ for (j = 0; j < len; j++) {
+ if (isupper(s[j])) {
+ s[j] = tolower(s[j]);
+ } else {
+ s[j] = toupper(s[j]);
+ }
+ }
+ }
+
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.MORE.EXPIRE key milliseconds.
+ *
+ * If they key has already an associated TTL, extends it by "milliseconds"
+ * milliseconds. Otherwise no operation is performed. */
+int HelloMoreExpire_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ mstime_t addms, expire;
+
+ if (RedisModule_StringToLongLong(argv[2],&addms) != REDISMODULE_OK)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid expire time");
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ expire = RedisModule_GetExpire(key);
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += addms;
+ RedisModule_SetExpire(key,expire);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx,"OK");
+}
+
+/* HELLO.ZSUMRANGE key startscore endscore
+ * Return the sum of all the scores elements between startscore and endscore.
+ *
+ * The computation is performed two times, one time from start to end and
+ * another time backward. The two scores, returned as a two element array,
+ * should match.*/
+int HelloZsumRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ double score_start, score_end;
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if (RedisModule_StringToDouble(argv[2],&score_start) != REDISMODULE_OK ||
+ RedisModule_StringToDouble(argv[3],&score_end) != REDISMODULE_OK)
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid range");
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ double scoresum_a = 0;
+ double scoresum_b = 0;
+
+ RedisModule_ZsetFirstInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_a += score;
+ RedisModule_ZsetRangeNext(key);
+ }
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_ZsetLastInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_b += score;
+ RedisModule_ZsetRangePrev(key);
+ }
+
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_CloseKey(key);
+
+ RedisModule_ReplyWithArray(ctx,2);
+ RedisModule_ReplyWithDouble(ctx,scoresum_a);
+ RedisModule_ReplyWithDouble(ctx,scoresum_b);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEXRANGE key min_lex max_lex min_age max_age
+ * This command expects a sorted set stored at key in the following form:
+ * - All the elements have score 0.
+ * - Elements are pairs of "<name>:<age>", for example "Anna:52".
+ * The command will return all the sorted set items that are lexicographically
+ * between the specified range (using the same format as ZRANGEBYLEX)
+ * and having an age between min_age and max_age. */
+int HelloLexRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 6) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (RedisModule_ZsetFirstInLexRange(key,argv[2],argv[3]) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"invalid range");
+ }
+
+ int arraylen = 0;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_ReplyWithString(ctx,ele);
+ RedisModule_FreeString(ctx,ele);
+ RedisModule_ZsetRangeNext(key);
+ arraylen++;
+ }
+ RedisModule_ZsetRangeStop(key);
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.HCOPY key srcfield dstfield
+ * This is just an example command that sets the hash field dstfield to the
+ * same value of srcfield. If srcfield does not exist no operation is
+ * performed.
+ *
+ * The command returns 1 if the copy is performed (srcfield exists) otherwise
+ * 0 is returned. */
+int HelloHCopy_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_HASH &&
+ type != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ /* Get the old field value. */
+ RedisModuleString *oldval;
+ RedisModule_HashGet(key,REDISMODULE_HASH_NONE,argv[2],&oldval,NULL);
+ if (oldval) {
+ RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[3],oldval,NULL);
+ }
+ RedisModule_ReplyWithLongLong(ctx,oldval != NULL);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEFTPAD str len ch
+ * This is an implementation of the infamous LEFTPAD function, that
+ * was at the center of an issue with the npm modules system in March 2016.
+ *
+ * LEFTPAD is a good example of using a Redis Modules API called
+ * "pool allocator", that was a famous way to allocate memory in yet another
+ * open source project, the Apache web server.
+ *
+ * The concept is very simple: there is memory that is useful to allocate
+ * only in the context of serving a request, and must be freed anyway when
+ * the callback implementing the command returns. So in that case the module
+ * does not need to retain a reference to these allocations, it is just
+ * required to free the memory before returning. When this is the case the
+ * module can call RedisModule_PoolAlloc() instead, that works like malloc()
+ * but will automatically free the memory when the module callback returns.
+ *
+ * Note that PoolAlloc() does not necessarily require AutoMemory to be
+ * active. */
+int HelloLeftPad_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ long long padlen;
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if ((RedisModule_StringToLongLong(argv[2],&padlen) != REDISMODULE_OK) ||
+ (padlen< 0)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid padding length");
+ }
+ size_t strlen, chlen;
+ const char *str = RedisModule_StringPtrLen(argv[1], &strlen);
+ const char *ch = RedisModule_StringPtrLen(argv[3], &chlen);
+
+ /* If the string is already larger than the target len, just return
+ * the string itself. */
+ if (strlen >= (size_t)padlen)
+ return RedisModule_ReplyWithString(ctx,argv[1]);
+
+ /* Padding must be a single character in this simple implementation. */
+ if (chlen != 1)
+ return RedisModule_ReplyWithError(ctx,
+ "ERR padding must be a single char");
+
+ /* Here we use our pool allocator, for our throw-away allocation. */
+ padlen -= strlen;
+ char *buf = RedisModule_PoolAlloc(ctx,padlen+strlen);
+ for (long long j = 0; j < padlen; j++) buf[j] = *ch;
+ memcpy(buf+padlen,str,strlen);
+
+ RedisModule_ReplyWithStringBuffer(ctx,buf,padlen+strlen);
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (RedisModule_Init(ctx,"helloworld",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ /* Log the list of parameters passing loading the module. */
+ for (int j = 0; j < argc; j++) {
+ const char *s = RedisModule_StringPtrLen(argv[j],NULL);
+ printf("Module loaded with ARGV[%d] = %s\n", j, s);
+ }
+
+ if (RedisModule_CreateCommand(ctx,"hello.simple",
+ HelloSimple_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.native",
+ HelloPushNative_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call",
+ HelloPushCall_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call2",
+ HelloPushCall2_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.sum.len",
+ HelloListSumLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice",
+ HelloListSplice_RedisCommand,"write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice.auto",
+ HelloListSpliceAuto_RedisCommand,
+ "write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.rand.array",
+ HelloRandArray_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl1",
+ HelloRepl1_RedisCommand,"write",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl2",
+ HelloRepl2_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.toggle.case",
+ HelloToggleCase_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.more.expire",
+ HelloMoreExpire_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.zsumrange",
+ HelloZsumRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.lexrange",
+ HelloLexRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.hcopy",
+ HelloHCopy_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.leftpad",
+ HelloLeftPad_RedisCommand,"",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c
new file mode 100644
index 000000000..8da45c0ea
--- /dev/null
+++ b/src/modules/testmodule.c
@@ -0,0 +1,237 @@
+/* Module designed to test the Redis modules subsystem.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <string.h>
+
+/* --------------------------------- Helpers -------------------------------- */
+
+/* Return true if the reply and the C null term string matches. */
+int TestMatchReply(RedisModuleCallReply *reply, char *str) {
+ RedisModuleString *mystr;
+ mystr = RedisModule_CreateStringFromCallReply(reply);
+ if (!mystr) return 0;
+ const char *ptr = RedisModule_StringPtrLen(mystr,NULL);
+ return strcmp(ptr,str) == 0;
+}
+
+/* ------------------------------- Test units ------------------------------- */
+
+/* TEST.CALL -- Test Call() API. */
+int TestCall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleCallReply *reply;
+
+ RedisModule_Call(ctx,"DEL","c","mylist");
+ RedisModuleString *mystr = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_Call(ctx,"RPUSH","csl","mylist",mystr,(long long)1234);
+ reply = RedisModule_Call(ctx,"LRANGE","ccc","mylist","0","-1");
+ long long items = RedisModule_CallReplyLength(reply);
+ if (items != 2) goto fail;
+
+ RedisModuleCallReply *item0, *item1;
+
+ item0 = RedisModule_CallReplyArrayElement(reply,0);
+ item1 = RedisModule_CallReplyArrayElement(reply,1);
+ if (!TestMatchReply(item0,"foo")) goto fail;
+ if (!TestMatchReply(item1,"1234")) goto fail;
+
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ return REDISMODULE_OK;
+
+fail:
+ RedisModule_ReplyWithSimpleString(ctx,"ERR");
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.APPEND -- Test appending to an existing string object. */
+int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_StringAppendBuffer(ctx,s,"bar",3);
+ RedisModule_ReplyWithString(ctx,s);
+ RedisModule_FreeString(ctx,s);
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.APPEND.AM -- Test append with retain when auto memory is on. */
+int TestStringAppendAM(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_RetainString(ctx,s);
+ RedisModule_StringAppendBuffer(ctx,s,"bar",3);
+ RedisModule_ReplyWithString(ctx,s);
+ RedisModule_FreeString(ctx,s);
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.PRINTF -- Test string formatting. */
+int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx);
+ if (argc < 3) {
+ return RedisModule_WrongArity(ctx);
+ }
+ RedisModuleString *s = RedisModule_CreateStringPrintf(ctx,
+ "Got %d args. argv[1]: %s, argv[2]: %s",
+ argc,
+ RedisModule_StringPtrLen(argv[1], NULL),
+ RedisModule_StringPtrLen(argv[2], NULL)
+ );
+
+ RedisModule_ReplyWithString(ctx,s);
+
+ return REDISMODULE_OK;
+}
+
+
+/* ----------------------------- Test framework ----------------------------- */
+
+/* Return 1 if the reply matches the specified string, otherwise log errors
+ * in the server log and return 0. */
+int TestAssertStringReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, char *str, size_t len) {
+ RedisModuleString *mystr, *expected;
+
+ if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_STRING) {
+ RedisModule_Log(ctx,"warning","Unexpected reply type %d",
+ RedisModule_CallReplyType(reply));
+ return 0;
+ }
+ mystr = RedisModule_CreateStringFromCallReply(reply);
+ expected = RedisModule_CreateString(ctx,str,len);
+ if (RedisModule_StringCompare(mystr,expected) != 0) {
+ const char *mystr_ptr = RedisModule_StringPtrLen(mystr,NULL);
+ const char *expected_ptr = RedisModule_StringPtrLen(expected,NULL);
+ RedisModule_Log(ctx,"warning",
+ "Unexpected string reply '%s' (instead of '%s')",
+ mystr_ptr, expected_ptr);
+ return 0;
+ }
+ return 1;
+}
+
+/* Return 1 if the reply matches the specified integer, otherwise log errors
+ * in the server log and return 0. */
+int TestAssertIntegerReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, long long expected) {
+ if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_INTEGER) {
+ RedisModule_Log(ctx,"warning","Unexpected reply type %d",
+ RedisModule_CallReplyType(reply));
+ return 0;
+ }
+ long long val = RedisModule_CallReplyInteger(reply);
+ if (val != expected) {
+ RedisModule_Log(ctx,"warning",
+ "Unexpected integer reply '%lld' (instead of '%lld')",
+ val, expected);
+ return 0;
+ }
+ return 1;
+}
+
+#define T(name,...) \
+ do { \
+ RedisModule_Log(ctx,"warning","Testing %s", name); \
+ reply = RedisModule_Call(ctx,name,__VA_ARGS__); \
+ } while (0);
+
+/* TEST.IT -- Run all the tests. */
+int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleCallReply *reply;
+
+ /* Make sure the DB is empty before to proceed. */
+ T("dbsize","");
+ if (!TestAssertIntegerReply(ctx,reply,0)) goto fail;
+
+ T("ping","");
+ if (!TestAssertStringReply(ctx,reply,"PONG",4)) goto fail;
+
+ T("test.call","");
+ if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail;
+
+ T("test.string.append","");
+ if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail;
+
+ T("test.string.append.am","");
+ if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail;
+
+ T("test.string.printf", "cc", "foo", "bar");
+ if (!TestAssertStringReply(ctx,reply,"Got 3 args. argv[1]: foo, argv[2]: bar",38)) goto fail;
+
+ RedisModule_ReplyWithSimpleString(ctx,"ALL TESTS PASSED");
+ return REDISMODULE_OK;
+
+fail:
+ RedisModule_ReplyWithSimpleString(ctx,
+ "SOME TEST NOT PASSED! Check server logs");
+ return REDISMODULE_OK;
+}
+
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"test",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.call",
+ TestCall,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.append",
+ TestStringAppend,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.append.am",
+ TestStringAppendAM,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.printf",
+ TestStringPrintf,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.it",
+ TestIt,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/multi.c b/src/multi.c
index 3d78e1488..112ce0605 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -27,18 +27,18 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* ================================ MULTI/EXEC ============================== */
/* Client state initialization for MULTI/EXEC */
-void initClientMultiState(redisClient *c) {
+void initClientMultiState(client *c) {
c->mstate.commands = NULL;
c->mstate.count = 0;
}
/* Release all the resources associated with MULTI/EXEC state */
-void freeClientMultiState(redisClient *c) {
+void freeClientMultiState(client *c) {
int j;
for (j = 0; j < c->mstate.count; j++) {
@@ -53,7 +53,7 @@ void freeClientMultiState(redisClient *c) {
}
/* Add a new command into the MULTI commands queue */
-void queueMultiCommand(redisClient *c) {
+void queueMultiCommand(client *c) {
multiCmd *mc;
int j;
@@ -69,31 +69,31 @@ void queueMultiCommand(redisClient *c) {
c->mstate.count++;
}
-void discardTransaction(redisClient *c) {
+void discardTransaction(client *c) {
freeClientMultiState(c);
initClientMultiState(c);
- c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS|REDIS_DIRTY_EXEC);;
+ c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
unwatchAllKeys(c);
}
/* Flag the transacation as DIRTY_EXEC so that EXEC will fail.
* Should be called every time there is an error while queueing a command. */
-void flagTransaction(redisClient *c) {
- if (c->flags & REDIS_MULTI)
- c->flags |= REDIS_DIRTY_EXEC;
+void flagTransaction(client *c) {
+ if (c->flags & CLIENT_MULTI)
+ c->flags |= CLIENT_DIRTY_EXEC;
}
-void multiCommand(redisClient *c) {
- if (c->flags & REDIS_MULTI) {
+void multiCommand(client *c) {
+ if (c->flags & CLIENT_MULTI) {
addReplyError(c,"MULTI calls can not be nested");
return;
}
- c->flags |= REDIS_MULTI;
+ c->flags |= CLIENT_MULTI;
addReply(c,shared.ok);
}
-void discardCommand(redisClient *c) {
- if (!(c->flags & REDIS_MULTI)) {
+void discardCommand(client *c) {
+ if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"DISCARD without MULTI");
return;
}
@@ -103,22 +103,23 @@ void discardCommand(redisClient *c) {
/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
* implementation for more information. */
-void execCommandPropagateMulti(redisClient *c) {
+void execCommandPropagateMulti(client *c) {
robj *multistring = createStringObject("MULTI",5);
propagate(server.multiCommand,c->db->id,&multistring,1,
- REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|PROPAGATE_REPL);
decrRefCount(multistring);
}
-void execCommand(redisClient *c) {
+void execCommand(client *c) {
int j;
robj **orig_argv;
int orig_argc;
struct redisCommand *orig_cmd;
int must_propagate = 0; /* Need to propagate MULTI/EXEC to AOF / slaves? */
+ int was_master = server.masterhost == NULL;
- if (!(c->flags & REDIS_MULTI)) {
+ if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"EXEC without MULTI");
return;
}
@@ -129,8 +130,8 @@ void execCommand(redisClient *c) {
* A failed EXEC in the first case returns a multi bulk nil object
* (technically it is not an error but a special behavior), while
* in the second an EXECABORT error is returned. */
- if (c->flags & (REDIS_DIRTY_CAS|REDIS_DIRTY_EXEC)) {
- addReply(c, c->flags & REDIS_DIRTY_EXEC ? shared.execaborterr :
+ if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC)) {
+ addReply(c, c->flags & CLIENT_DIRTY_EXEC ? shared.execaborterr :
shared.nullmultibulk);
discardTransaction(c);
goto handle_monitor;
@@ -147,16 +148,17 @@ void execCommand(redisClient *c) {
c->argv = c->mstate.commands[j].argv;
c->cmd = c->mstate.commands[j].cmd;
- /* Propagate a MULTI request once we encounter the first write op.
+ /* Propagate a MULTI request once we encounter the first command which
+ * is not readonly nor an administrative one.
* This way we'll deliver the MULTI/..../EXEC block as a whole and
* both the AOF and the replication link will have the same consistency
* and atomicity guarantees. */
- if (!must_propagate && !(c->cmd->flags & REDIS_CMD_READONLY)) {
+ if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
- call(c,REDIS_CALL_FULL);
+ call(c,CMD_CALL_FULL);
/* Commands may alter argc/argv, restore mstate. */
c->mstate.commands[j].argc = c->argc;
@@ -167,15 +169,28 @@ void execCommand(redisClient *c) {
c->argc = orig_argc;
c->cmd = orig_cmd;
discardTransaction(c);
+
/* Make sure the EXEC command will be propagated as well if MULTI
* was already propagated. */
- if (must_propagate) server.dirty++;
+ if (must_propagate) {
+ int is_master = server.masterhost == NULL;
+ server.dirty++;
+ /* If inside the MULTI/EXEC block this instance was suddenly
+ * switched from master to slave (using the SLAVEOF command), the
+ * initial MULTI was propagated into the replication backlog, but the
+ * rest was not. We need to make sure to at least terminate the
+ * backlog with the final EXEC. */
+ if (server.repl_backlog && was_master && !is_master) {
+ char *execcmd = "*1\r\n$4\r\nEXEC\r\n";
+ feedReplicationBacklog(execcmd,strlen(execcmd));
+ }
+ }
handle_monitor:
/* Send EXEC to clients waiting data from MONITOR. We do it here
* since the natural order of commands execution is actually:
* MUTLI, EXEC, ... commands inside transaction ...
- * Instead EXEC is flagged as REDIS_CMD_SKIP_MONITOR in the command
+ * Instead EXEC is flagged as CMD_SKIP_MONITOR in the command
* table, and we do it here with correct ordering. */
if (listLength(server.monitors) && !server.loading)
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
@@ -199,7 +214,7 @@ typedef struct watchedKey {
} watchedKey;
/* Watch for the specified key */
-void watchForKey(redisClient *c, robj *key) {
+void watchForKey(client *c, robj *key) {
list *clients = NULL;
listIter li;
listNode *ln;
@@ -230,7 +245,7 @@ void watchForKey(redisClient *c, robj *key) {
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
* flag is up to the caller. */
-void unwatchAllKeys(redisClient *c) {
+void unwatchAllKeys(client *c) {
listIter li;
listNode *ln;
@@ -244,7 +259,7 @@ void unwatchAllKeys(redisClient *c) {
* from the list */
wk = listNodeValue(ln);
clients = dictFetchValue(wk->db->watched_keys, wk->key);
- redisAssertWithInfo(c,NULL,clients != NULL);
+ serverAssertWithInfo(c,NULL,clients != NULL);
listDelNode(clients,listSearchKey(clients,c));
/* Kill the entry at all if this was the only client */
if (listLength(clients) == 0)
@@ -267,13 +282,13 @@ void touchWatchedKey(redisDb *db, robj *key) {
clients = dictFetchValue(db->watched_keys, key);
if (!clients) return;
- /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
+ /* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
/* Check if we are already watching for this key */
listRewind(clients,&li);
while((ln = listNext(&li))) {
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
- c->flags |= REDIS_DIRTY_CAS;
+ c->flags |= CLIENT_DIRTY_CAS;
}
}
@@ -288,7 +303,7 @@ void touchWatchedKeysOnFlush(int dbid) {
/* For every client, check all the waited keys */
listRewind(server.clients,&li1);
while((ln = listNext(&li1))) {
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
listRewind(c->watched_keys,&li2);
while((ln = listNext(&li2))) {
watchedKey *wk = listNodeValue(ln);
@@ -298,16 +313,16 @@ void touchWatchedKeysOnFlush(int dbid) {
* removed. */
if (dbid == -1 || wk->db->id == dbid) {
if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
- c->flags |= REDIS_DIRTY_CAS;
+ c->flags |= CLIENT_DIRTY_CAS;
}
}
}
}
-void watchCommand(redisClient *c) {
+void watchCommand(client *c) {
int j;
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
addReplyError(c,"WATCH inside MULTI is not allowed");
return;
}
@@ -316,8 +331,8 @@ void watchCommand(redisClient *c) {
addReply(c,shared.ok);
}
-void unwatchCommand(redisClient *c) {
+void unwatchCommand(client *c) {
unwatchAllKeys(c);
- c->flags &= (~REDIS_DIRTY_CAS);
+ c->flags &= (~CLIENT_DIRTY_CAS);
addReply(c,shared.ok);
}
diff --git a/src/networking.c b/src/networking.c
index 9c100db19..aeaeca967 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -27,45 +27,51 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
+#include "atomicvar.h"
#include <sys/uio.h>
#include <math.h>
+#include <ctype.h>
-static void setProtocolError(redisClient *c, int pos);
+static void setProtocolError(const char *errstr, client *c, int pos);
-/* To evaluate the output buffer size of a client we need to get size of
- * allocated objects, however we can't used zmalloc_size() directly on sds
- * strings because of the trick they use to work (the header is before the
- * returned pointer), so we use this helper function. */
-size_t zmalloc_size_sds(sds s) {
- return zmalloc_size(s-sizeof(struct sdshdr));
+/* Return the size consumed from the allocator, for the specified SDS string,
+ * including internal fragmentation. This function is used in order to compute
+ * the client output buffer size. */
+size_t sdsZmallocSize(sds s) {
+ void *sh = sdsAllocPtr(s);
+ return zmalloc_size(sh);
}
/* Return the amount of memory used by the sds string at object->ptr
* for a string object. */
size_t getStringObjectSdsUsedMemory(robj *o) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
switch(o->encoding) {
- case REDIS_ENCODING_RAW: return zmalloc_size_sds(o->ptr);
- case REDIS_ENCODING_EMBSTR: return sdslen(o->ptr);
+ case OBJ_ENCODING_RAW: return sdsZmallocSize(o->ptr);
+ case OBJ_ENCODING_EMBSTR: return zmalloc_size(o)-sizeof(robj);
default: return 0; /* Just integer encoding for now. */
}
}
+/* Client.reply list dup and free methods. */
void *dupClientReplyValue(void *o) {
- incrRefCount((robj*)o);
- return o;
+ return sdsdup(o);
+}
+
+void freeClientReplyValue(void *o) {
+ sdsfree(o);
}
int listMatchObjects(void *a, void *b) {
return equalStringObjects(a,b);
}
-redisClient *createClient(int fd) {
- redisClient *c = zmalloc(sizeof(redisClient));
+client *createClient(int fd) {
+ client *c = zmalloc(sizeof(client));
/* passing -1 as fd it is possible to create a non connected client.
- * This is useful since all the Redis commands needs to be executed
+ * This is useful since all the commands needs to be executed
* in the context of a client. When commands are executed in other
* contexts (for instance a Lua script) we need a non connected client. */
if (fd != -1) {
@@ -83,11 +89,14 @@ redisClient *createClient(int fd) {
}
selectDb(c,0);
- c->id = server.next_client_id++;
+ uint64_t client_id;
+ atomicGetIncr(server.next_client_id,client_id,1);
+ c->id = client_id;
c->fd = fd;
c->name = NULL;
c->bufpos = 0;
c->querybuf = sdsempty();
+ c->pending_querybuf = sdsempty();
c->querybuf_peak = 0;
c->reqtype = 0;
c->argc = 0;
@@ -99,25 +108,29 @@ redisClient *createClient(int fd) {
c->flags = 0;
c->ctime = c->lastinteraction = server.unixtime;
c->authenticated = 0;
- c->replstate = REDIS_REPL_NONE;
+ c->replstate = REPL_STATE_NONE;
+ c->repl_put_online_on_ack = 0;
c->reploff = 0;
+ c->read_reploff = 0;
c->repl_ack_off = 0;
c->repl_ack_time = 0;
c->slave_listening_port = 0;
+ c->slave_ip[0] = '\0';
+ c->slave_capa = SLAVE_CAPA_NONE;
c->reply = listCreate();
c->reply_bytes = 0;
c->obuf_soft_limit_reached_time = 0;
- listSetFreeMethod(c->reply,decrRefCountVoid);
+ listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
- c->btype = REDIS_BLOCKED_NONE;
+ c->btype = BLOCKED_NONE;
c->bpop.timeout = 0;
- c->bpop.keys = dictCreate(&setDictType,NULL);
+ c->bpop.keys = dictCreate(&objectKeyPointerValueDictType,NULL);
c->bpop.target = NULL;
c->bpop.numreplicas = 0;
c->bpop.reploffset = 0;
c->woff = 0;
c->watched_keys = listCreate();
- c->pubsub_channels = dictCreate(&setDictType,NULL);
+ c->pubsub_channels = dictCreate(&objectKeyPointerValueDictType,NULL);
c->pubsub_patterns = listCreate();
c->peerid = NULL;
listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid);
@@ -131,91 +144,104 @@ redisClient *createClient(int fd) {
* to the client. The behavior is the following:
*
* If the client should receive new data (normal clients will) the function
- * returns REDIS_OK, and make sure to install the write handler in our event
+ * returns C_OK, and make sure to install the write handler in our event
* loop so that when the socket is writable new data gets written.
*
- * If the client should not receive new data, because it is a fake client,
- * a master, a slave not yet online, or because the setup of the write handler
- * failed, the function returns REDIS_ERR.
+ * If the client should not receive new data, because it is a fake client
+ * (used to load AOF in memory), a master or because the setup of the write
+ * handler failed, the function returns C_ERR.
+ *
+ * The function may return C_OK without actually installing the write
+ * event handler in the following cases:
+ *
+ * 1) The event handler should already be installed since the output buffer
+ * already contains something.
+ * 2) The client is a slave but not yet online, so we want to just accumulate
+ * writes in the buffer but not actually sending them yet.
*
* Typically gets called every time a reply is built, before adding more
- * data to the clients output buffers. If the function returns REDIS_ERR no
+ * data to the clients output buffers. If the function returns C_ERR no
* data should be appended to the output buffers. */
-int prepareClientToWrite(redisClient *c) {
- if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK;
- if ((c->flags & REDIS_MASTER) &&
- !(c->flags & REDIS_MASTER_FORCE_REPLY)) return REDIS_ERR;
- if (c->fd <= 0) return REDIS_ERR; /* Fake client */
- if (c->bufpos == 0 && listLength(c->reply) == 0 &&
- (c->replstate == REDIS_REPL_NONE ||
- c->replstate == REDIS_REPL_ONLINE) &&
- aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
- return REDIS_OK;
-}
-
-/* Create a duplicate of the last object in the reply list when
- * it is not exclusively owned by the reply list. */
-robj *dupLastObjectIfNeeded(list *reply) {
- robj *new, *cur;
- listNode *ln;
- redisAssert(listLength(reply) > 0);
- ln = listLast(reply);
- cur = listNodeValue(ln);
- if (cur->refcount > 1) {
- new = dupStringObject(cur);
- decrRefCount(cur);
- listNodeValue(ln) = new;
+int prepareClientToWrite(client *c) {
+ /* If it's the Lua client we always return ok without installing any
+ * handler since there is no socket at all. */
+ if (c->flags & (CLIENT_LUA|CLIENT_MODULE)) return C_OK;
+
+ /* CLIENT REPLY OFF / SKIP handling: don't send replies. */
+ if (c->flags & (CLIENT_REPLY_OFF|CLIENT_REPLY_SKIP)) return C_ERR;
+
+ /* Masters don't receive replies, unless CLIENT_MASTER_FORCE_REPLY flag
+ * is set. */
+ if ((c->flags & CLIENT_MASTER) &&
+ !(c->flags & CLIENT_MASTER_FORCE_REPLY)) return C_ERR;
+
+ if (c->fd <= 0) return C_ERR; /* Fake client for AOF loading. */
+
+ /* Schedule the client to write the output buffers to the socket only
+ * if not already done (there were no pending writes already and the client
+ * was yet not flagged), and, for slaves, if the slave can actually
+ * receive writes at this stage. */
+ if (!clientHasPendingReplies(c) &&
+ !(c->flags & CLIENT_PENDING_WRITE) &&
+ (c->replstate == REPL_STATE_NONE ||
+ (c->replstate == SLAVE_STATE_ONLINE && !c->repl_put_online_on_ack)))
+ {
+ /* Here instead of installing the write handler, we just flag the
+ * client and put it into a list of clients that have something
+ * to write to the socket. This way before re-entering the event
+ * loop, we can try to directly write to the client sockets avoiding
+ * a system call. We'll only really install the write handler if
+ * we'll not be able to write the whole reply at once. */
+ c->flags |= CLIENT_PENDING_WRITE;
+ listAddNodeHead(server.clients_pending_write,c);
}
- return listNodeValue(ln);
+
+ /* Authorize the caller to queue in the output buffer of this client. */
+ return C_OK;
}
/* -----------------------------------------------------------------------------
* Low level functions to add more data to output buffers.
* -------------------------------------------------------------------------- */
-int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
+int _addReplyToBuffer(client *c, const char *s, size_t len) {
size_t available = sizeof(c->buf)-c->bufpos;
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK;
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return C_OK;
/* If there already are entries in the reply list, we cannot
* add anything more to the static buffer. */
- if (listLength(c->reply) > 0) return REDIS_ERR;
+ if (listLength(c->reply) > 0) return C_ERR;
/* Check that the buffer has enough space available for this string. */
- if (len > available) return REDIS_ERR;
+ if (len > available) return C_ERR;
memcpy(c->buf+c->bufpos,s,len);
c->bufpos+=len;
- return REDIS_OK;
+ return C_OK;
}
-void _addReplyObjectToList(redisClient *c, robj *o) {
- robj *tail;
-
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
+void _addReplyObjectToList(client *c, robj *o) {
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
- incrRefCount(o);
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
+ sds s = sdsdup(o->ptr);
+ listAddNodeTail(c->reply,s);
+ c->reply_bytes += sdslen(s);
} else {
- tail = listNodeValue(listLast(c->reply));
-
- /* Append to this object when possible. */
- if (tail->ptr != NULL &&
- tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
+ listNode *ln = listLast(c->reply);
+ sds tail = listNodeValue(ln);
+
+ /* Append to this object when possible. If tail == NULL it was
+ * set via addDeferredMultiBulkLength(). */
+ if (tail && sdslen(tail)+sdslen(o->ptr) <= PROTO_REPLY_CHUNK_BYTES) {
+ tail = sdscatsds(tail,o->ptr);
+ listNodeValue(ln) = tail;
+ c->reply_bytes += sdslen(o->ptr);
} else {
- incrRefCount(o);
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
+ sds s = sdsdup(o->ptr);
+ listAddNodeTail(c->reply,s);
+ c->reply_bytes += sdslen(s);
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
@@ -223,63 +249,55 @@ void _addReplyObjectToList(redisClient *c, robj *o) {
/* This method takes responsibility over the sds. When it is no longer
* needed it will be free'd, otherwise it ends up in a robj. */
-void _addReplySdsToList(redisClient *c, sds s) {
- robj *tail;
-
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) {
+void _addReplySdsToList(client *c, sds s) {
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
sdsfree(s);
return;
}
if (listLength(c->reply) == 0) {
- listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
- c->reply_bytes += zmalloc_size_sds(s);
+ listAddNodeTail(c->reply,s);
+ c->reply_bytes += sdslen(s);
} else {
- tail = listNodeValue(listLast(c->reply));
-
- /* Append to this object when possible. */
- if (tail->ptr != NULL && tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
+ listNode *ln = listLast(c->reply);
+ sds tail = listNodeValue(ln);
+
+ /* Append to this object when possible. If tail == NULL it was
+ * set via addDeferredMultiBulkLength(). */
+ if (tail && sdslen(tail)+sdslen(s) <= PROTO_REPLY_CHUNK_BYTES) {
+ tail = sdscatsds(tail,s);
+ listNodeValue(ln) = tail;
+ c->reply_bytes += sdslen(s);
sdsfree(s);
} else {
- listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
- c->reply_bytes += zmalloc_size_sds(s);
+ listAddNodeTail(c->reply,s);
+ c->reply_bytes += sdslen(s);
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
-void _addReplyStringToList(redisClient *c, char *s, size_t len) {
- robj *tail;
-
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
+void _addReplyStringToList(client *c, const char *s, size_t len) {
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
if (listLength(c->reply) == 0) {
- robj *o = createStringObject(s,len);
-
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
+ sds node = sdsnewlen(s,len);
+ listAddNodeTail(c->reply,node);
+ c->reply_bytes += len;
} else {
- tail = listNodeValue(listLast(c->reply));
-
- /* Append to this object when possible. */
- if (tail->ptr != NULL && tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,s,len);
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
+ listNode *ln = listLast(c->reply);
+ sds tail = listNodeValue(ln);
+
+ /* Append to this object when possible. If tail == NULL it was
+ * set via addDeferredMultiBulkLength(). */
+ if (tail && sdslen(tail)+len <= PROTO_REPLY_CHUNK_BYTES) {
+ tail = sdscatlen(tail,s,len);
+ listNodeValue(ln) = tail;
+ c->reply_bytes += len;
} else {
- robj *o = createStringObject(s,len);
-
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
+ sds node = sdsnewlen(s,len);
+ listAddNodeTail(c->reply,node);
+ c->reply_bytes += len;
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
@@ -290,8 +308,8 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) {
* The following functions are the ones that commands implementations will call.
* -------------------------------------------------------------------------- */
-void addReply(redisClient *c, robj *obj) {
- if (prepareClientToWrite(c) != REDIS_OK) return;
+void addReply(client *c, robj *obj) {
+ if (prepareClientToWrite(c) != C_OK) return;
/* This is an important place where we can avoid copy-on-write
* when there is a saving child running, avoiding touching the
@@ -301,9 +319,9 @@ void addReply(redisClient *c, robj *obj) {
* we'll be able to send the object to the client without
* messing with its page. */
if (sdsEncodedObject(obj)) {
- if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
+ if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != C_OK)
_addReplyObjectToList(c,obj);
- } else if (obj->encoding == REDIS_ENCODING_INT) {
+ } else if (obj->encoding == OBJ_ENCODING_INT) {
/* Optimization: if there is room in the static buffer for 32 bytes
* (more than the max chars a 64 bit integer can take as string) we
* avoid decoding the object and go for the lower level approach. */
@@ -312,27 +330,27 @@ void addReply(redisClient *c, robj *obj) {
int len;
len = ll2string(buf,sizeof(buf),(long)obj->ptr);
- if (_addReplyToBuffer(c,buf,len) == REDIS_OK)
+ if (_addReplyToBuffer(c,buf,len) == C_OK)
return;
/* else... continue with the normal code path, but should never
* happen actually since we verified there is room. */
}
obj = getDecodedObject(obj);
- if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
+ if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != C_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else {
- redisPanic("Wrong obj->encoding in addReply()");
+ serverPanic("Wrong obj->encoding in addReply()");
}
}
-void addReplySds(redisClient *c, sds s) {
- if (prepareClientToWrite(c) != REDIS_OK) {
+void addReplySds(client *c, sds s) {
+ if (prepareClientToWrite(c) != C_OK) {
/* The caller expects the sds to be free'd. */
sdsfree(s);
return;
}
- if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) {
+ if (_addReplyToBuffer(c,s,sdslen(s)) == C_OK) {
sdsfree(s);
} else {
/* This method free's the sds when it is no longer needed. */
@@ -340,23 +358,31 @@ void addReplySds(redisClient *c, sds s) {
}
}
-void addReplyString(redisClient *c, char *s, size_t len) {
- if (prepareClientToWrite(c) != REDIS_OK) return;
- if (_addReplyToBuffer(c,s,len) != REDIS_OK)
+/* This low level function just adds whatever protocol you send it to the
+ * client buffer, trying the static buffer initially, and using the string
+ * of objects if not possible.
+ *
+ * It is efficient because does not create an SDS object nor an Redis object
+ * if not needed. The object will only be created by calling
+ * _addReplyStringToList() if we fail to extend the existing tail object
+ * in the list of objects. */
+void addReplyString(client *c, const char *s, size_t len) {
+ if (prepareClientToWrite(c) != C_OK) return;
+ if (_addReplyToBuffer(c,s,len) != C_OK)
_addReplyStringToList(c,s,len);
}
-void addReplyErrorLength(redisClient *c, char *s, size_t len) {
+void addReplyErrorLength(client *c, const char *s, size_t len) {
addReplyString(c,"-ERR ",5);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
}
-void addReplyError(redisClient *c, char *err) {
+void addReplyError(client *c, const char *err) {
addReplyErrorLength(c,err,strlen(err));
}
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
+void addReplyErrorFormat(client *c, const char *fmt, ...) {
size_t l, j;
va_list ap;
va_start(ap,fmt);
@@ -372,17 +398,17 @@ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
sdsfree(s);
}
-void addReplyStatusLength(redisClient *c, char *s, size_t len) {
+void addReplyStatusLength(client *c, const char *s, size_t len) {
addReplyString(c,"+",1);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
}
-void addReplyStatus(redisClient *c, char *status) {
+void addReplyStatus(client *c, const char *status) {
addReplyStatusLength(c,status,strlen(status));
}
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
+void addReplyStatusFormat(client *c, const char *fmt, ...) {
va_list ap;
va_start(ap,fmt);
sds s = sdscatvprintf(sdsempty(),fmt,ap);
@@ -393,44 +419,44 @@ void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
/* Adds an empty object to the reply list that will contain the multi bulk
* length, which is not known when this function is called. */
-void *addDeferredMultiBulkLength(redisClient *c) {
+void *addDeferredMultiBulkLength(client *c) {
/* Note that we install the write event here even if the object is not
* ready to be sent, since we are sure that before returning to the
* event loop setDeferredMultiBulkLength() will be called. */
- if (prepareClientToWrite(c) != REDIS_OK) return NULL;
- listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL));
+ if (prepareClientToWrite(c) != C_OK) return NULL;
+ listAddNodeTail(c->reply,NULL); /* NULL is our placeholder. */
return listLast(c->reply);
}
/* Populate the length object and try gluing it to the next chunk. */
-void setDeferredMultiBulkLength(redisClient *c, void *node, long length) {
+void setDeferredMultiBulkLength(client *c, void *node, long length) {
listNode *ln = (listNode*)node;
- robj *len, *next;
+ sds len, next;
- /* Abort when *node is NULL (see addDeferredMultiBulkLength). */
+ /* Abort when *node is NULL: when the client should not accept writes
+ * we return NULL in addDeferredMultiBulkLength() */
if (node == NULL) return;
- len = listNodeValue(ln);
- len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
- len->encoding = REDIS_ENCODING_RAW; /* in case it was an EMBSTR. */
- c->reply_bytes += zmalloc_size_sds(len->ptr);
+ len = sdscatprintf(sdsnewlen("*",1),"%ld\r\n",length);
+ listNodeValue(ln) = len;
+ c->reply_bytes += sdslen(len);
if (ln->next != NULL) {
next = listNodeValue(ln->next);
/* Only glue when the next node is non-NULL (an sds in this case) */
- if (next->ptr != NULL) {
- c->reply_bytes -= zmalloc_size_sds(len->ptr);
- c->reply_bytes -= getStringObjectSdsUsedMemory(next);
- len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr));
- c->reply_bytes += zmalloc_size_sds(len->ptr);
+ if (next != NULL) {
+ len = sdscatsds(len,next);
listDelNode(c->reply,ln->next);
+ listNodeValue(ln) = len;
+ /* No need to update c->reply_bytes: we are just moving the same
+ * amount of bytes from one node to another. */
}
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
/* Add a double as a bulk reply */
-void addReplyDouble(redisClient *c, double d) {
+void addReplyDouble(client *c, double d) {
char dbuf[128], sbuf[128];
int dlen, slen;
if (isinf(d)) {
@@ -444,19 +470,28 @@ void addReplyDouble(redisClient *c, double d) {
}
}
+/* Add a long double as a bulk reply, but uses a human readable formatting
+ * of the double instead of exposing the crude behavior of doubles to the
+ * dear user. */
+void addReplyHumanLongDouble(client *c, long double d) {
+ robj *o = createStringObjectFromLongDouble(d,1);
+ addReplyBulk(c,o);
+ decrRefCount(o);
+}
+
/* Add a long long as integer reply or bulk len / multi bulk count.
* Basically this is used to output <prefix><long long><crlf>. */
-void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
+void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
char buf[128];
int len;
/* Things like $3\r\n or *2\r\n are emitted very often by the protocol
* so we have a few shared objects to use if the integer is small
* like it is most of the times. */
- if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ if (prefix == '*' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.mbulkhdr[ll]);
return;
- } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ } else if (prefix == '$' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.bulkhdr[ll]);
return;
}
@@ -468,7 +503,7 @@ void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
addReplyString(c,buf,len+3);
}
-void addReplyLongLong(redisClient *c, long long ll) {
+void addReplyLongLong(client *c, long long ll) {
if (ll == 0)
addReply(c,shared.czero);
else if (ll == 1)
@@ -477,15 +512,15 @@ void addReplyLongLong(redisClient *c, long long ll) {
addReplyLongLongWithPrefix(c,ll,':');
}
-void addReplyMultiBulkLen(redisClient *c, long length) {
- if (length < REDIS_SHARED_BULKHDR_LEN)
+void addReplyMultiBulkLen(client *c, long length) {
+ if (length < OBJ_SHARED_BULKHDR_LEN)
addReply(c,shared.mbulkhdr[length]);
else
addReplyLongLongWithPrefix(c,length,'*');
}
/* Create the length prefix of a bulk reply, example: $2234 */
-void addReplyBulkLen(redisClient *c, robj *obj) {
+void addReplyBulkLen(client *c, robj *obj) {
size_t len;
if (sdsEncodedObject(obj)) {
@@ -504,28 +539,35 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
}
}
- if (len < REDIS_SHARED_BULKHDR_LEN)
+ if (len < OBJ_SHARED_BULKHDR_LEN)
addReply(c,shared.bulkhdr[len]);
else
addReplyLongLongWithPrefix(c,len,'$');
}
/* Add a Redis Object as a bulk reply */
-void addReplyBulk(redisClient *c, robj *obj) {
+void addReplyBulk(client *c, robj *obj) {
addReplyBulkLen(c,obj);
addReply(c,obj);
addReply(c,shared.crlf);
}
/* Add a C buffer as bulk reply */
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
+void addReplyBulkCBuffer(client *c, const void *p, size_t len) {
addReplyLongLongWithPrefix(c,len,'$');
addReplyString(c,p,len);
addReply(c,shared.crlf);
}
+/* Add sds to reply (takes ownership of sds and frees it) */
+void addReplyBulkSds(client *c, sds s) {
+ addReplyLongLongWithPrefix(c,sdslen(s),'$');
+ addReplySds(c,s);
+ addReply(c,shared.crlf);
+}
+
/* Add a C nul term string as bulk reply */
-void addReplyBulkCString(redisClient *c, char *s) {
+void addReplyBulkCString(client *c, const char *s) {
if (s == NULL) {
addReply(c,shared.nullbulk);
} else {
@@ -534,7 +576,7 @@ void addReplyBulkCString(redisClient *c, char *s) {
}
/* Add a long long as a bulk reply */
-void addReplyBulkLongLong(redisClient *c, long long ll) {
+void addReplyBulkLongLong(client *c, long long ll) {
char buf[64];
int len;
@@ -545,7 +587,7 @@ void addReplyBulkLongLong(redisClient *c, long long ll) {
/* Copy 'src' client output buffers into 'dst' client output buffers.
* The function takes care of freeing the old output buffers of the
* destination client. */
-void copyClientOutputBuffer(redisClient *dst, redisClient *src) {
+void copyClientOutputBuffer(client *dst, client *src) {
listRelease(dst->reply);
dst->reply = listDup(src->reply);
memcpy(dst->buf,src->buf,src->bufpos);
@@ -553,11 +595,17 @@ void copyClientOutputBuffer(redisClient *dst, redisClient *src) {
dst->reply_bytes = src->reply_bytes;
}
+/* Return true if the specified client has pending reply buffers to write to
+ * the socket. */
+int clientHasPendingReplies(client *c) {
+ return c->bufpos || listLength(c->reply);
+}
+
#define MAX_ACCEPTS_PER_CALL 1000
-static void acceptCommonHandler(int fd, int flags) {
- redisClient *c;
+static void acceptCommonHandler(int fd, int flags, char *ip) {
+ client *c;
if ((c = createClient(fd)) == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Error registering fd event for the new client: %s (fd=%d)",
strerror(errno),fd);
close(fd); /* May be already closed, just ignore errors */
@@ -578,50 +626,92 @@ static void acceptCommonHandler(int fd, int flags) {
freeClient(c);
return;
}
+
+ /* If the server is running in protected mode (the default) and there
+ * is no password set, nor a specific interface is bound, we don't accept
+ * requests from non loopback interfaces. Instead we try to explain the
+ * user what to do to fix it if needed. */
+ if (server.protected_mode &&
+ server.bindaddr_count == 0 &&
+ server.requirepass == NULL &&
+ !(flags & CLIENT_UNIX_SOCKET) &&
+ ip != NULL)
+ {
+ if (strcmp(ip,"127.0.0.1") && strcmp(ip,"::1")) {
+ char *err =
+ "-DENIED Redis is running in protected mode because protected "
+ "mode is enabled, no bind address was specified, no "
+ "authentication password is requested to clients. In this mode "
+ "connections are only accepted from the loopback interface. "
+ "If you want to connect from external computers to Redis you "
+ "may adopt one of the following solutions: "
+ "1) Just disable protected mode sending the command "
+ "'CONFIG SET protected-mode no' from the loopback interface "
+ "by connecting to Redis from the same host the server is "
+ "running, however MAKE SURE Redis is not publicly accessible "
+ "from internet if you do so. Use CONFIG REWRITE to make this "
+ "change permanent. "
+ "2) Alternatively you can just disable the protected mode by "
+ "editing the Redis configuration file, and setting the protected "
+ "mode option to 'no', and then restarting the server. "
+ "3) If you started the server manually just for testing, restart "
+ "it with the '--protected-mode no' option. "
+ "4) Setup a bind address or an authentication password. "
+ "NOTE: You only need to do one of the above things in order for "
+ "the server to start accepting connections from the outside.\r\n";
+ if (write(c->fd,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ server.stat_rejected_conn++;
+ freeClient(c);
+ return;
+ }
+ }
+
server.stat_numconnections++;
c->flags |= flags;
}
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
- char cip[REDIS_IP_STR_LEN];
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ char cip[NET_IP_STR_LEN];
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
while(max--) {
cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Accepting client connection: %s", server.neterr);
return;
}
- redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
- acceptCommonHandler(cfd,0);
+ serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
+ acceptCommonHandler(cfd,0,cip);
}
}
void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cfd, max = MAX_ACCEPTS_PER_CALL;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
while(max--) {
cfd = anetUnixAccept(server.neterr, fd);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Accepting client connection: %s", server.neterr);
return;
}
- redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket);
- acceptCommonHandler(cfd,REDIS_UNIX_SOCKET);
+ serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
+ acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL);
}
}
-static void freeClientArgv(redisClient *c) {
+static void freeClientArgv(client *c) {
int j;
for (j = 0; j < c->argc; j++)
decrRefCount(c->argv[j]);
@@ -635,41 +725,67 @@ static void freeClientArgv(redisClient *c) {
void disconnectSlaves(void) {
while (listLength(server.slaves)) {
listNode *ln = listFirst(server.slaves);
- freeClient((redisClient*)ln->value);
+ freeClient((client*)ln->value);
}
}
-/* This function is called when the slave lose the connection with the
- * master into an unexpected way. */
-void replicationHandleMasterDisconnection(void) {
- server.master = NULL;
- server.repl_state = REDIS_REPL_CONNECT;
- server.repl_down_since = server.unixtime;
- /* We lost connection with our master, force our slaves to resync
- * with us as well to load the new data set.
- *
- * If server.masterhost is NULL the user called SLAVEOF NO ONE so
- * slave resync is not needed. */
- if (server.masterhost != NULL) disconnectSlaves();
-}
-
-void freeClient(redisClient *c) {
+/* Remove the specified client from global lists where the client could
+ * be referenced, not including the Pub/Sub channels.
+ * This is used by freeClient() and replicationCacheMaster(). */
+void unlinkClient(client *c) {
listNode *ln;
- /* If this is marked as current client unset it */
+ /* If this is marked as current client unset it. */
if (server.current_client == c) server.current_client = NULL;
+ /* Certain operations must be done only if the client has an active socket.
+ * If the client was already unlinked or if it's a "fake client" the
+ * fd is already set to -1. */
+ if (c->fd != -1) {
+ /* Remove from the list of active clients. */
+ ln = listSearchKey(server.clients,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.clients,ln);
+
+ /* Unregister async I/O handlers and close the socket. */
+ aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ close(c->fd);
+ c->fd = -1;
+ }
+
+ /* Remove from the list of pending writes if needed. */
+ if (c->flags & CLIENT_PENDING_WRITE) {
+ ln = listSearchKey(server.clients_pending_write,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.clients_pending_write,ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ }
+
+ /* When client was just unblocked because of a blocking operation,
+ * remove it from the list of unblocked clients. */
+ if (c->flags & CLIENT_UNBLOCKED) {
+ ln = listSearchKey(server.unblocked_clients,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.unblocked_clients,ln);
+ c->flags &= ~CLIENT_UNBLOCKED;
+ }
+}
+
+void freeClient(client *c) {
+ listNode *ln;
+
/* If it is our master that's beging disconnected we should make sure
* to cache the state to try a partial resynchronization later.
*
* Note that before doing this we make sure that the client is not in
* some unexpected state, by checking its flags. */
- if (server.master && c->flags & REDIS_MASTER) {
- redisLog(REDIS_WARNING,"Connection with master lost.");
- if (!(c->flags & (REDIS_CLOSE_AFTER_REPLY|
- REDIS_CLOSE_ASAP|
- REDIS_BLOCKED|
- REDIS_UNBLOCKED)))
+ if (server.master && c->flags & CLIENT_MASTER) {
+ serverLog(LL_WARNING,"Connection with master lost.");
+ if (!(c->flags & (CLIENT_CLOSE_AFTER_REPLY|
+ CLIENT_CLOSE_ASAP|
+ CLIENT_BLOCKED|
+ CLIENT_UNBLOCKED)))
{
replicationCacheMaster(c);
return;
@@ -677,21 +793,18 @@ void freeClient(redisClient *c) {
}
/* Log link disconnection with slave */
- if ((c->flags & REDIS_SLAVE) && !(c->flags & REDIS_MONITOR)) {
- char ip[REDIS_IP_STR_LEN];
-
- if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) {
- redisLog(REDIS_WARNING,"Connection with slave %s:%d lost.",
- ip, c->slave_listening_port);
- }
+ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR)) {
+ serverLog(LL_WARNING,"Connection with slave %s lost.",
+ replicationGetSlaveName(c));
}
/* Free the query buffer */
sdsfree(c->querybuf);
+ sdsfree(c->pending_querybuf);
c->querybuf = NULL;
/* Deallocate structures used to block on blocking ops. */
- if (c->flags & REDIS_BLOCKED) unblockClient(c);
+ if (c->flags & CLIENT_BLOCKED) unblockClient(c);
dictRelease(c->bpop.keys);
/* UNWATCH all the keys */
@@ -704,59 +817,43 @@ void freeClient(redisClient *c) {
dictRelease(c->pubsub_channels);
listRelease(c->pubsub_patterns);
- /* Close socket, unregister events, and remove list of replies and
- * accumulated arguments. */
- if (c->fd != -1) {
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- close(c->fd);
- }
+ /* Free data structures. */
listRelease(c->reply);
freeClientArgv(c);
- /* Remove from the list of clients */
- if (c->fd != -1) {
- ln = listSearchKey(server.clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.clients,ln);
- }
-
- /* When client was just unblocked because of a blocking operation,
- * remove it from the list of unblocked clients. */
- if (c->flags & REDIS_UNBLOCKED) {
- ln = listSearchKey(server.unblocked_clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.unblocked_clients,ln);
- }
+ /* Unlink the client: this will close the socket, remove the I/O
+ * handlers, and remove references of the client from different
+ * places where active clients may be referenced. */
+ unlinkClient(c);
/* Master/slave cleanup Case 1:
* we lost the connection with a slave. */
- if (c->flags & REDIS_SLAVE) {
- if (c->replstate == REDIS_REPL_SEND_BULK) {
+ if (c->flags & CLIENT_SLAVE) {
+ if (c->replstate == SLAVE_STATE_SEND_BULK) {
if (c->repldbfd != -1) close(c->repldbfd);
if (c->replpreamble) sdsfree(c->replpreamble);
}
- list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
+ list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.slaves;
ln = listSearchKey(l,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(l,ln);
/* We need to remember the time when we started to have zero
* attached slaves, as after some time we'll free the replication
* backlog. */
- if (c->flags & REDIS_SLAVE && listLength(server.slaves) == 0)
+ if (c->flags & CLIENT_SLAVE && listLength(server.slaves) == 0)
server.repl_no_slaves_since = server.unixtime;
refreshGoodSlavesCount();
}
/* Master/slave cleanup Case 2:
* we lost the connection with the master. */
- if (c->flags & REDIS_MASTER) replicationHandleMasterDisconnection();
+ if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
/* If this client was scheduled for async freeing we need to remove it
* from the queue. */
- if (c->flags & REDIS_CLOSE_ASAP) {
+ if (c->flags & CLIENT_CLOSE_ASAP) {
ln = listSearchKey(server.clients_to_close,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(server.clients_to_close,ln);
}
@@ -773,32 +870,31 @@ void freeClient(redisClient *c) {
* This function is useful when we need to terminate a client but we are in
* a context where calling freeClient() is not possible, because the client
* should be valid for the continuation of the flow of the program. */
-void freeClientAsync(redisClient *c) {
- if (c->flags & REDIS_CLOSE_ASAP) return;
- c->flags |= REDIS_CLOSE_ASAP;
+void freeClientAsync(client *c) {
+ if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return;
+ c->flags |= CLIENT_CLOSE_ASAP;
listAddNodeTail(server.clients_to_close,c);
}
void freeClientsInAsyncFreeQueue(void) {
while (listLength(server.clients_to_close)) {
listNode *ln = listFirst(server.clients_to_close);
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
- c->flags &= ~REDIS_CLOSE_ASAP;
+ c->flags &= ~CLIENT_CLOSE_ASAP;
freeClient(c);
listDelNode(server.clients_to_close,ln);
}
}
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen;
- size_t objmem;
- robj *o;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- while(c->bufpos > 0 || listLength(c->reply)) {
+/* Write data in output buffers to client. Return C_OK if the client
+ * is still valid after the call, C_ERR if it was freed. */
+int writeToClient(int fd, client *c, int handler_installed) {
+ ssize_t nwritten = 0, totwritten = 0;
+ size_t objlen;
+ sds o;
+
+ while(clientHasPendingReplies(c)) {
if (c->bufpos > 0) {
nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
if (nwritten <= 0) break;
@@ -807,22 +903,20 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
/* If the buffer was sent, set bufpos to zero to continue with
* the remainder of the reply. */
- if (c->sentlen == c->bufpos) {
+ if ((int)c->sentlen == c->bufpos) {
c->bufpos = 0;
c->sentlen = 0;
}
} else {
o = listNodeValue(listFirst(c->reply));
- objlen = sdslen(o->ptr);
- objmem = getStringObjectSdsUsedMemory(o);
+ objlen = sdslen(o);
if (objlen == 0) {
listDelNode(c->reply,listFirst(c->reply));
- c->reply_bytes -= objmem;
continue;
}
- nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
+ nwritten = write(fd, o + c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
@@ -831,10 +925,14 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
if (c->sentlen == objlen) {
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
- c->reply_bytes -= objmem;
+ c->reply_bytes -= objlen;
+ /* If there are no longer objects in the list, we expect
+ * the count of reply bytes to be exactly zero. */
+ if (listLength(c->reply) == 0)
+ serverAssert(c->reply_bytes == 0);
}
}
- /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT
+ /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT
* bytes, in a single threaded server it's a good idea to serve
* other clients as well, even if a very large request comes from
* super fast link that is always able to accept data (in real world
@@ -842,18 +940,19 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
*
* However if we are over the maxmemory limit we ignore that and
* just deliver as much data as it is possible to deliver. */
- if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
+ if (totwritten > NET_MAX_WRITES_PER_EVENT &&
(server.maxmemory == 0 ||
zmalloc_used_memory() < server.maxmemory)) break;
}
+ server.stat_net_output_bytes += totwritten;
if (nwritten == -1) {
if (errno == EAGAIN) {
nwritten = 0;
} else {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Error writing to client: %s", strerror(errno));
freeClient(c);
- return;
+ return C_ERR;
}
}
if (totwritten > 0) {
@@ -861,32 +960,90 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
* as an interaction, since we always send REPLCONF ACK commands
* that take some time to just fill the socket output buffer.
* We just rely on data / pings received for timeout detection. */
- if (!(c->flags & REDIS_MASTER)) c->lastinteraction = server.unixtime;
+ if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = server.unixtime;
}
- if (c->bufpos == 0 && listLength(c->reply) == 0) {
+ if (!clientHasPendingReplies(c)) {
c->sentlen = 0;
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
/* Close connection after entire reply has been sent. */
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c);
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
+ freeClient(c);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* Write event handler. Just send data to the client. */
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
+ UNUSED(el);
+ UNUSED(mask);
+ writeToClient(fd,privdata,1);
+}
+
+/* This function is called just before entering the event loop, in the hope
+ * we can just write the replies to the client output buffer without any
+ * need to use a syscall in order to install the writable event handler,
+ * get it called, and so forth. */
+int handleClientsWithPendingWrites(void) {
+ listIter li;
+ listNode *ln;
+ int processed = listLength(server.clients_pending_write);
+
+ listRewind(server.clients_pending_write,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ listDelNode(server.clients_pending_write,ln);
+
+ /* Try to write buffers to the client socket. */
+ if (writeToClient(c->fd,c,0) == C_ERR) continue;
+
+ /* If there is nothing left, do nothing. Otherwise install
+ * the write handler. */
+ if (clientHasPendingReplies(c) &&
+ aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
+ sendReplyToClient, c) == AE_ERR)
+ {
+ freeClientAsync(c);
+ }
}
+ return processed;
}
/* resetClient prepare the client to process the next command */
-void resetClient(redisClient *c) {
+void resetClient(client *c) {
redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL;
freeClientArgv(c);
c->reqtype = 0;
c->multibulklen = 0;
c->bulklen = -1;
+
/* We clear the ASKING flag as well if we are not inside a MULTI, and
* if what we just executed is not the ASKING command itself. */
- if (!(c->flags & REDIS_MULTI) && prevcmd != askingCommand)
- c->flags &= (~REDIS_ASKING);
+ if (!(c->flags & CLIENT_MULTI) && prevcmd != askingCommand)
+ c->flags &= ~CLIENT_ASKING;
+
+ /* Remove the CLIENT_REPLY_SKIP flag if any so that the reply
+ * to the next command will be sent, but set the flag if the command
+ * we just processed was "CLIENT REPLY SKIP". */
+ c->flags &= ~CLIENT_REPLY_SKIP;
+ if (c->flags & CLIENT_REPLY_SKIP_NEXT) {
+ c->flags |= CLIENT_REPLY_SKIP;
+ c->flags &= ~CLIENT_REPLY_SKIP_NEXT;
+ }
}
-int processInlineBuffer(redisClient *c) {
+/* Like processMultibulkBuffer(), but for the inline protocol instead of RESP,
+ * this function consumes the client query buffer and creates a command ready
+ * to be executed inside the client structure. Returns C_OK if the command
+ * is ready to be executed, or C_ERR if there is still protocol to read to
+ * have a well formed command. The function also returns C_ERR when there is
+ * a protocol error: in such a case the client structure is setup to reply
+ * with the error and close the connection. */
+int processInlineBuffer(client *c) {
char *newline;
int argc, j;
sds *argv, aux;
@@ -897,11 +1054,11 @@ int processInlineBuffer(redisClient *c) {
/* Nothing to do without a \r\n */
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,"Protocol error: too big inline request");
- setProtocolError(c,0);
+ setProtocolError("too big inline request",c,0);
}
- return REDIS_ERR;
+ return C_ERR;
}
/* Handle the \r\n case. */
@@ -915,86 +1072,117 @@ int processInlineBuffer(redisClient *c) {
sdsfree(aux);
if (argv == NULL) {
addReplyError(c,"Protocol error: unbalanced quotes in request");
- setProtocolError(c,0);
- return REDIS_ERR;
+ setProtocolError("unbalanced quotes in inline request",c,0);
+ return C_ERR;
}
/* Newline from slaves can be used to refresh the last ACK time.
* This is useful for a slave to ping back while loading a big
* RDB file. */
- if (querylen == 0 && c->flags & REDIS_SLAVE)
+ if (querylen == 0 && c->flags & CLIENT_SLAVE)
c->repl_ack_time = server.unixtime;
/* Leave data after the first line of the query in the buffer */
sdsrange(c->querybuf,querylen+2,-1);
/* Setup argv array on client structure */
- if (c->argv) zfree(c->argv);
- c->argv = zmalloc(sizeof(robj*)*argc);
+ if (argc) {
+ if (c->argv) zfree(c->argv);
+ c->argv = zmalloc(sizeof(robj*)*argc);
+ }
/* Create redis objects for all arguments. */
for (c->argc = 0, j = 0; j < argc; j++) {
if (sdslen(argv[j])) {
- c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
+ c->argv[c->argc] = createObject(OBJ_STRING,argv[j]);
c->argc++;
} else {
sdsfree(argv[j]);
}
}
zfree(argv);
- return REDIS_OK;
+ return C_OK;
}
/* Helper function. Trims query buffer to make the function that processes
* multi bulk requests idempotent. */
-static void setProtocolError(redisClient *c, int pos) {
- if (server.verbosity >= REDIS_VERBOSE) {
+#define PROTO_DUMP_LEN 128
+static void setProtocolError(const char *errstr, client *c, int pos) {
+ if (server.verbosity <= LL_VERBOSE) {
sds client = catClientInfoString(sdsempty(),c);
- redisLog(REDIS_VERBOSE,
- "Protocol error from client: %s", client);
+
+ /* Sample some protocol to given an idea about what was inside. */
+ char buf[256];
+ if (sdslen(c->querybuf) < PROTO_DUMP_LEN) {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%s'", c->querybuf);
+ } else {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%.*s' (... more %zu bytes ...) '%.*s'", PROTO_DUMP_LEN/2, c->querybuf, sdslen(c->querybuf)-PROTO_DUMP_LEN, PROTO_DUMP_LEN/2, c->querybuf+sdslen(c->querybuf)-PROTO_DUMP_LEN/2);
+ }
+
+ /* Remove non printable chars. */
+ char *p = buf;
+ while (*p != '\0') {
+ if (!isprint(*p)) *p = '.';
+ p++;
+ }
+
+ /* Log all the client and protocol info. */
+ serverLog(LL_VERBOSE,
+ "Protocol error (%s) from client: %s. %s", errstr, client, buf);
sdsfree(client);
}
- c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
sdsrange(c->querybuf,pos,-1);
}
-int processMultibulkBuffer(redisClient *c) {
+/* Process the query buffer for client 'c', setting up the client argument
+ * vector for command execution. Returns C_OK if after running the function
+ * the client has a well-formed ready to be processed command, otherwise
+ * C_ERR if there is still to read more buffer to get the full command.
+ * The function also returns C_ERR when there is a protocol error: in such a
+ * case the client structure is setup to reply with the error and close
+ * the connection.
+ *
+ * This function is called if processInputBuffer() detects that the next
+ * command is in RESP format, so the first byte in the command is found
+ * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */
+int processMultibulkBuffer(client *c) {
char *newline = NULL;
int pos = 0, ok;
long long ll;
if (c->multibulklen == 0) {
/* The client should have been reset */
- redisAssertWithInfo(c,NULL,c->argc == 0);
+ serverAssertWithInfo(c,NULL,c->argc == 0);
/* Multi bulk length cannot be read without a \r\n */
newline = strchr(c->querybuf,'\r');
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,"Protocol error: too big mbulk count string");
- setProtocolError(c,0);
+ setProtocolError("too big mbulk count string",c,0);
}
- return REDIS_ERR;
+ return C_ERR;
}
/* Buffer should also contain \n */
if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2))
- return REDIS_ERR;
+ return C_ERR;
/* We know for sure there is a whole line since newline != NULL,
* so go ahead and find out the multi bulk length. */
- redisAssertWithInfo(c,NULL,c->querybuf[0] == '*');
+ serverAssertWithInfo(c,NULL,c->querybuf[0] == '*');
ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll);
if (!ok || ll > 1024*1024) {
addReplyError(c,"Protocol error: invalid multibulk length");
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("invalid mbulk count",c,pos);
+ return C_ERR;
}
pos = (newline-c->querybuf)+2;
if (ll <= 0) {
sdsrange(c->querybuf,pos,-1);
- return REDIS_OK;
+ return C_OK;
}
c->multibulklen = ll;
@@ -1004,17 +1192,17 @@ int processMultibulkBuffer(redisClient *c) {
c->argv = zmalloc(sizeof(robj*)*c->multibulklen);
}
- redisAssertWithInfo(c,NULL,c->multibulklen > 0);
+ serverAssertWithInfo(c,NULL,c->multibulklen > 0);
while(c->multibulklen) {
/* Read bulk length if unknown */
if (c->bulklen == -1) {
newline = strchr(c->querybuf+pos,'\r');
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,
"Protocol error: too big bulk count string");
- setProtocolError(c,0);
- return REDIS_ERR;
+ setProtocolError("too big bulk count string",c,0);
+ return C_ERR;
}
break;
}
@@ -1027,19 +1215,19 @@ int processMultibulkBuffer(redisClient *c) {
addReplyErrorFormat(c,
"Protocol error: expected '$', got '%c'",
c->querybuf[pos]);
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("expected $ but got something else",c,pos);
+ return C_ERR;
}
ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll);
if (!ok || ll < 0 || ll > 512*1024*1024) {
addReplyError(c,"Protocol error: invalid bulk length");
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("invalid bulk length",c,pos);
+ return C_ERR;
}
pos += newline-(c->querybuf+pos)+2;
- if (ll >= REDIS_MBULK_BIG_ARG) {
+ if (ll >= PROTO_MBULK_BIG_ARG) {
size_t qblen;
/* If we are going to read a large object from network
@@ -1051,7 +1239,7 @@ int processMultibulkBuffer(redisClient *c) {
qblen = sdslen(c->querybuf);
/* Hint the sds library about the amount of bytes this string is
* going to contain. */
- if (qblen < ll+2)
+ if (qblen < (size_t)ll+2)
c->querybuf = sdsMakeRoomFor(c->querybuf,ll+2-qblen);
}
c->bulklen = ll;
@@ -1066,15 +1254,15 @@ int processMultibulkBuffer(redisClient *c) {
* instead of creating a new object by *copying* the sds we
* just use the current sds string. */
if (pos == 0 &&
- c->bulklen >= REDIS_MBULK_BIG_ARG &&
+ c->bulklen >= PROTO_MBULK_BIG_ARG &&
(signed) sdslen(c->querybuf) == c->bulklen+2)
{
- c->argv[c->argc++] = createObject(REDIS_STRING,c->querybuf);
+ c->argv[c->argc++] = createObject(OBJ_STRING,c->querybuf);
sdsIncrLen(c->querybuf,-2); /* remove CRLF */
- c->querybuf = sdsempty();
/* Assume that if we saw a fat argument we'll see another one
* likely... */
- c->querybuf = sdsMakeRoomFor(c->querybuf,c->bulklen+2);
+ c->querybuf = sdsnewlen(NULL,c->bulklen+2);
+ sdsclear(c->querybuf);
pos = 0;
} else {
c->argv[c->argc++] =
@@ -1090,41 +1278,48 @@ int processMultibulkBuffer(redisClient *c) {
if (pos) sdsrange(c->querybuf,pos,-1);
/* We're done when c->multibulk == 0 */
- if (c->multibulklen == 0) return REDIS_OK;
+ if (c->multibulklen == 0) return C_OK;
- /* Still not read to process the command */
- return REDIS_ERR;
+ /* Still not ready to process the command */
+ return C_ERR;
}
-void processInputBuffer(redisClient *c) {
+/* This function is called every time, in the client structure 'c', there is
+ * more query buffer to process, because we read more data from the socket
+ * or because a client was blocked and later reactivated, so there could be
+ * pending query buffer, already representing a full command, to process. */
+void processInputBuffer(client *c) {
+ server.current_client = c;
/* Keep processing while there is something in the input buffer */
while(sdslen(c->querybuf)) {
/* Return if clients are paused. */
- if (!(c->flags & REDIS_SLAVE) && clientsArePaused()) return;
+ if (!(c->flags & CLIENT_SLAVE) && clientsArePaused()) break;
/* Immediately abort if the client is in the middle of something. */
- if (c->flags & REDIS_BLOCKED) return;
+ if (c->flags & CLIENT_BLOCKED) break;
- /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is
+ /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
* written to the client. Make sure to not let the reply grow after
- * this flag has been set (i.e. don't process more commands). */
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
+ * this flag has been set (i.e. don't process more commands).
+ *
+ * The same applies for clients we want to terminate ASAP. */
+ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break;
/* Determine request type when unknown. */
if (!c->reqtype) {
if (c->querybuf[0] == '*') {
- c->reqtype = REDIS_REQ_MULTIBULK;
+ c->reqtype = PROTO_REQ_MULTIBULK;
} else {
- c->reqtype = REDIS_REQ_INLINE;
+ c->reqtype = PROTO_REQ_INLINE;
}
}
- if (c->reqtype == REDIS_REQ_INLINE) {
- if (processInlineBuffer(c) != REDIS_OK) break;
- } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
- if (processMultibulkBuffer(c) != REDIS_OK) break;
+ if (c->reqtype == PROTO_REQ_INLINE) {
+ if (processInlineBuffer(c) != C_OK) break;
+ } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
+ if (processMultibulkBuffer(c) != C_OK) break;
} else {
- redisPanic("Unknown request type");
+ serverPanic("Unknown request type");
}
/* Multibulk processing could see a <= 0 length. */
@@ -1132,29 +1327,44 @@ void processInputBuffer(redisClient *c) {
resetClient(c);
} else {
/* Only reset the client when the command was executed. */
- if (processCommand(c) == REDIS_OK)
- resetClient(c);
+ if (processCommand(c) == C_OK) {
+ if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
+ /* Update the applied replication offset of our master. */
+ c->reploff = c->read_reploff - sdslen(c->querybuf);
+ }
+
+ /* Don't reset the client structure for clients blocked in a
+ * module blocking command, so that the reply callback will
+ * still be able to access the client argv and argc field.
+ * The client will be reset in unblockClientFromModule(). */
+ if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE)
+ resetClient(c);
+ }
+ /* freeMemoryIfNeeded may flush slave output buffers. This may
+ * result into a slave, that may be the active client, to be
+ * freed. */
+ if (server.current_client == NULL) break;
}
}
+ server.current_client = NULL;
}
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = (redisClient*) privdata;
+ client *c = (client*) privdata;
int nread, readlen;
size_t qblen;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(mask);
- server.current_client = c;
- readlen = REDIS_IOBUF_LEN;
+ readlen = PROTO_IOBUF_LEN;
/* If this is a multi bulk request, and we are processing a bulk reply
* that is large enough, try to maximize the probability that the query
* buffer contains exactly the SDS string representing the object, even
* at the risk of requiring more read(2) calls. This way the function
* processMultiBulkBuffer() can avoid copying buffers to create the
* Redis Object representing the argument. */
- if (c->reqtype == REDIS_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
- && c->bulklen >= REDIS_MBULK_BIG_ARG)
+ if (c->reqtype == PROTO_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
+ && c->bulklen >= PROTO_MBULK_BIG_ARG)
{
int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf);
@@ -1167,42 +1377,62 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
nread = read(fd, c->querybuf+qblen, readlen);
if (nread == -1) {
if (errno == EAGAIN) {
- nread = 0;
+ return;
} else {
- redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
+ serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno));
freeClient(c);
return;
}
} else if (nread == 0) {
- redisLog(REDIS_VERBOSE, "Client closed connection");
+ serverLog(LL_VERBOSE, "Client closed connection");
freeClient(c);
return;
+ } else if (c->flags & CLIENT_MASTER) {
+ /* Append the query buffer to the pending (not applied) buffer
+ * of the master. We'll use this buffer later in order to have a
+ * copy of the string applied by the last command executed. */
+ c->pending_querybuf = sdscatlen(c->pending_querybuf,
+ c->querybuf+qblen,nread);
}
- if (nread) {
- sdsIncrLen(c->querybuf,nread);
- c->lastinteraction = server.unixtime;
- if (c->flags & REDIS_MASTER) c->reploff += nread;
- } else {
- server.current_client = NULL;
- return;
- }
+
+ sdsIncrLen(c->querybuf,nread);
+ c->lastinteraction = server.unixtime;
+ if (c->flags & CLIENT_MASTER) c->read_reploff += nread;
+ server.stat_net_input_bytes += nread;
if (sdslen(c->querybuf) > server.client_max_querybuf_len) {
sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
bytes = sdscatrepr(bytes,c->querybuf,64);
- redisLog(REDIS_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
+ serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
sdsfree(ci);
sdsfree(bytes);
freeClient(c);
return;
}
- processInputBuffer(c);
- server.current_client = NULL;
+
+ /* Time to process the buffer. If the client is a master we need to
+ * compute the difference between the applied offset before and after
+ * processing the buffer, to understand how much of the replication stream
+ * was actually applied to the master state: this quantity, and its
+ * corresponding part of the replication stream, will be propagated to
+ * the sub-slaves and to the replication backlog. */
+ if (!(c->flags & CLIENT_MASTER)) {
+ processInputBuffer(c);
+ } else {
+ size_t prev_offset = c->reploff;
+ processInputBuffer(c);
+ size_t applied = c->reploff - prev_offset;
+ if (applied) {
+ replicationFeedSlavesFromMasterStream(server.slaves,
+ c->pending_querybuf, applied);
+ sdsrange(c->pending_querybuf,applied,-1);
+ }
+ }
}
void getClientsMaxBuffers(unsigned long *longest_output_list,
unsigned long *biggest_input_buffer) {
- redisClient *c;
+ client *c;
listNode *ln;
listIter li;
unsigned long lol = 0, bib = 0;
@@ -1218,52 +1448,34 @@ void getClientsMaxBuffers(unsigned long *longest_output_list,
*biggest_input_buffer = bib;
}
-/* This is a helper function for genClientPeerId().
- * It writes the specified ip/port to "peerid" as a null termiated string
- * in the form ip:port if ip does not contain ":" itself, otherwise
- * [ip]:port format is used (for IPv6 addresses basically). */
-void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port) {
- if (strchr(ip,':'))
- snprintf(peerid,peerid_len,"[%s]:%d",ip,port);
- else
- snprintf(peerid,peerid_len,"%s:%d",ip,port);
-}
-
/* A Redis "Peer ID" is a colon separated ip:port pair.
- * For IPv4 it's in the form x.y.z.k:pork, example: "127.0.0.1:1234".
+ * For IPv4 it's in the form x.y.z.k:port, example: "127.0.0.1:1234".
* For IPv6 addresses we use [] around the IP part, like in "[::1]:1234".
- * For Unix socekts we use path:0, like in "/tmp/redis:0".
+ * For Unix sockets we use path:0, like in "/tmp/redis:0".
*
- * A Peer ID always fits inside a buffer of REDIS_PEER_ID_LEN bytes, including
+ * A Peer ID always fits inside a buffer of NET_PEER_ID_LEN bytes, including
* the null term.
*
- * The function returns REDIS_OK on succcess, and REDIS_ERR on failure.
- *
* On failure the function still populates 'peerid' with the "?:0" string
* in case you want to relax error checking or need to display something
* anyway (see anetPeerToString implementation for more info). */
-int genClientPeerId(redisClient *client, char *peerid, size_t peerid_len) {
- char ip[REDIS_IP_STR_LEN];
- int port;
-
- if (client->flags & REDIS_UNIX_SOCKET) {
+void genClientPeerId(client *client, char *peerid,
+ size_t peerid_len) {
+ if (client->flags & CLIENT_UNIX_SOCKET) {
/* Unix socket client. */
snprintf(peerid,peerid_len,"%s:0",server.unixsocket);
- return REDIS_OK;
} else {
/* TCP client. */
- int retval = anetPeerToString(client->fd,ip,sizeof(ip),&port);
- formatPeerId(peerid,peerid_len,ip,port);
- return (retval == -1) ? REDIS_ERR : REDIS_OK;
+ anetFormatPeer(client->fd,peerid,peerid_len);
}
}
/* This function returns the client peer id, by creating and caching it
- * if client->perrid is NULL, otherwise returning the cached value.
+ * if client->peerid is NULL, otherwise returning the cached value.
* The Peer ID never changes during the life of the client, however it
* is expensive to compute. */
-char *getClientPeerId(redisClient *c) {
- char peerid[REDIS_PEER_ID_LEN];
+char *getClientPeerId(client *c) {
+ char peerid[NET_PEER_ID_LEN];
if (c->peerid == NULL) {
genClientPeerId(c,peerid,sizeof(peerid));
@@ -1274,26 +1486,26 @@ char *getClientPeerId(redisClient *c) {
/* Concatenate a string representing the state of a client in an human
* readable format, into the sds string 's'. */
-sds catClientInfoString(sds s, redisClient *client) {
+sds catClientInfoString(sds s, client *client) {
char flags[16], events[3], *p;
int emask;
p = flags;
- if (client->flags & REDIS_SLAVE) {
- if (client->flags & REDIS_MONITOR)
+ if (client->flags & CLIENT_SLAVE) {
+ if (client->flags & CLIENT_MONITOR)
*p++ = 'O';
else
*p++ = 'S';
}
- if (client->flags & REDIS_MASTER) *p++ = 'M';
- if (client->flags & REDIS_MULTI) *p++ = 'x';
- if (client->flags & REDIS_BLOCKED) *p++ = 'b';
- if (client->flags & REDIS_DIRTY_CAS) *p++ = 'd';
- if (client->flags & REDIS_CLOSE_AFTER_REPLY) *p++ = 'c';
- if (client->flags & REDIS_UNBLOCKED) *p++ = 'u';
- if (client->flags & REDIS_CLOSE_ASAP) *p++ = 'A';
- if (client->flags & REDIS_UNIX_SOCKET) *p++ = 'U';
- if (client->flags & REDIS_READONLY) *p++ = 'r';
+ if (client->flags & CLIENT_MASTER) *p++ = 'M';
+ if (client->flags & CLIENT_MULTI) *p++ = 'x';
+ if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
+ if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
+ if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c';
+ if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u';
+ if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A';
+ if (client->flags & CLIENT_UNIX_SOCKET) *p++ = 'U';
+ if (client->flags & CLIENT_READONLY) *p++ = 'r';
if (p == flags) *p++ = 'N';
*p++ = '\0';
@@ -1314,7 +1526,7 @@ sds catClientInfoString(sds s, redisClient *client) {
client->db->id,
(int) dictSize(client->pubsub_channels),
(int) listLength(client->pubsub_patterns),
- (client->flags & REDIS_MULTI) ? client->mstate.count : -1,
+ (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
(unsigned long long) sdslen(client->querybuf),
(unsigned long long) sdsavail(client->querybuf),
(unsigned long long) client->bufpos,
@@ -1327,10 +1539,9 @@ sds catClientInfoString(sds s, redisClient *client) {
sds getAllClientsInfoString(void) {
listNode *ln;
listIter li;
- redisClient *client;
- sds o = sdsempty();
-
- o = sdsMakeRoomFor(o,200*listLength(server.clients));
+ client *client;
+ sds o = sdsnewlen(NULL,200*listLength(server.clients));
+ sdsclear(o);
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
@@ -1340,16 +1551,30 @@ sds getAllClientsInfoString(void) {
return o;
}
-void clientCommand(redisClient *c) {
+void clientCommand(client *c) {
listNode *ln;
listIter li;
- redisClient *client;
+ client *client;
if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) {
/* CLIENT LIST */
sds o = getAllClientsInfoString();
addReplyBulkCBuffer(c,o,sdslen(o));
sdsfree(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reply") && c->argc == 3) {
+ /* CLIENT REPLY ON|OFF|SKIP */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ c->flags &= ~(CLIENT_REPLY_SKIP|CLIENT_REPLY_OFF);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ c->flags |= CLIENT_REPLY_OFF;
+ } else if (!strcasecmp(c->argv[2]->ptr,"skip")) {
+ if (!(c->flags & CLIENT_REPLY_OFF))
+ c->flags |= CLIENT_REPLY_SKIP_NEXT;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"kill")) {
/* CLIENT KILL <ip:port>
* CLIENT KILL <option> [value] ... <option> [value] */
@@ -1374,7 +1599,7 @@ void clientCommand(redisClient *c) {
long long tmp;
if (getLongLongFromObjectOrReply(c,c->argv[i+1],&tmp,NULL)
- != REDIS_OK) return;
+ != C_OK) return;
id = tmp;
} else if (!strcasecmp(c->argv[i]->ptr,"type") && moreargs) {
type = getClientTypeByName(c->argv[i+1]->ptr);
@@ -1410,9 +1635,7 @@ void clientCommand(redisClient *c) {
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
if (addr && strcmp(getClientPeerId(client),addr) != 0) continue;
- if (type != -1 &&
- (client->flags & REDIS_MASTER ||
- getClientType(client) != type)) continue;
+ if (type != -1 && getClientType(client) != type) continue;
if (id != 0 && client->id != id) continue;
if (c == client && skipme) continue;
@@ -1437,7 +1660,7 @@ void clientCommand(redisClient *c) {
/* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
* only after we queued the reply to its output buffers. */
- if (close_this_client) c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
} else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) {
int j, len = sdslen(c->argv[2]->ptr);
char *p = c->argv[2]->ptr;
@@ -1475,18 +1698,38 @@ void clientCommand(redisClient *c) {
long long duration;
if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,UNIT_MILLISECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
pauseClients(duration);
addReply(c,shared.ok);
} else {
- addReplyError(c, "Syntax error, try CLIENT (LIST | KILL ip:port | GETNAME | SETNAME connection-name)");
+ addReplyError(c, "Syntax error, try CLIENT (LIST | KILL | GETNAME | SETNAME | PAUSE | REPLY)");
}
}
+/* This callback is bound to POST and "Host:" command names. Those are not
+ * really commands, but are used in security attacks in order to talk to
+ * Redis instances via HTTP, with a technique called "cross protocol scripting"
+ * which exploits the fact that services like Redis will discard invalid
+ * HTTP headers and will process what follows.
+ *
+ * As a protection against this attack, Redis will terminate the connection
+ * when a POST or "Host:" header is seen, and will log the event from
+ * time to time (to avoid creating a DOS as a result of too many logs). */
+void securityWarningCommand(client *c) {
+ static time_t logged_time;
+ time_t now = time(NULL);
+
+ if (labs(now-logged_time) > 60) {
+ serverLog(LL_WARNING,"Possible SECURITY ATTACK detected. It looks like somebody is sending POST or Host: commands to Redis. This is likely due to an attacker attempting to use Cross Protocol Scripting to compromise your Redis instance. Connection aborted.");
+ logged_time = now;
+ }
+ freeClientAsync(c);
+}
+
/* Rewrite the command vector of the client. All the new objects ref count
* is incremented. The old command vector is freed, and the old objects
* ref count is decremented. */
-void rewriteClientCommandVector(redisClient *c, int argc, ...) {
+void rewriteClientCommandVector(client *c, int argc, ...) {
va_list ap;
int j;
robj **argv; /* The new argument vector */
@@ -1509,25 +1752,48 @@ void rewriteClientCommandVector(redisClient *c, int argc, ...) {
c->argv = argv;
c->argc = argc;
c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
- redisAssertWithInfo(c,NULL,c->cmd != NULL);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
va_end(ap);
}
+/* Completely replace the client command vector with the provided one. */
+void replaceClientCommandVector(client *c, int argc, robj **argv) {
+ freeClientArgv(c);
+ zfree(c->argv);
+ c->argv = argv;
+ c->argc = argc;
+ c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
+}
+
/* Rewrite a single item in the command vector.
- * The new val ref count is incremented, and the old decremented. */
-void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
+ * The new val ref count is incremented, and the old decremented.
+ *
+ * It is possible to specify an argument over the current size of the
+ * argument vector: in this case the array of objects gets reallocated
+ * and c->argc set to the max value. However it's up to the caller to
+ *
+ * 1. Make sure there are no "holes" and all the arguments are set.
+ * 2. If the original argument vector was longer than the one we
+ * want to end with, it's up to the caller to set c->argc and
+ * free the no longer used objects on c->argv. */
+void rewriteClientCommandArgument(client *c, int i, robj *newval) {
robj *oldval;
- redisAssertWithInfo(c,NULL,i < c->argc);
+ if (i >= c->argc) {
+ c->argv = zrealloc(c->argv,sizeof(robj*)*(i+1));
+ c->argc = i+1;
+ c->argv[i] = NULL;
+ }
oldval = c->argv[i];
c->argv[i] = newval;
incrRefCount(newval);
- decrRefCount(oldval);
+ if (oldval) decrRefCount(oldval);
/* If this is the command name make sure to fix c->cmd. */
if (i == 0) {
c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
- redisAssertWithInfo(c,NULL,c->cmd != NULL);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
}
}
@@ -1544,8 +1810,10 @@ void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
* Note: this function is very fast so can be called as many time as
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
-unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
- unsigned long list_item_size = sizeof(listNode)+sizeof(robj);
+unsigned long getClientOutputBufferMemoryUsage(client *c) {
+ unsigned long list_item_size = sizeof(listNode)+5;
+ /* The +5 above means we assume an sds16 hdr, may not be true
+ * but is not going to be a problem. */
return c->reply_bytes + (list_item_size*listLength(c->reply));
}
@@ -1554,30 +1822,33 @@ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
* classes of clients.
*
* The function will return one of the following:
- * REDIS_CLIENT_TYPE_NORMAL -> Normal client
- * REDIS_CLIENT_TYPE_SLAVE -> Slave or client executing MONITOR command
- * REDIS_CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
+ * CLIENT_TYPE_NORMAL -> Normal client
+ * CLIENT_TYPE_SLAVE -> Slave or client executing MONITOR command
+ * CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
+ * CLIENT_TYPE_MASTER -> The client representing our replication master.
*/
-int getClientType(redisClient *c) {
- if ((c->flags & REDIS_SLAVE) && !(c->flags & REDIS_MONITOR))
- return REDIS_CLIENT_TYPE_SLAVE;
- if (c->flags & REDIS_PUBSUB)
- return REDIS_CLIENT_TYPE_PUBSUB;
- return REDIS_CLIENT_TYPE_NORMAL;
+int getClientType(client *c) {
+ if (c->flags & CLIENT_MASTER) return CLIENT_TYPE_MASTER;
+ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR))
+ return CLIENT_TYPE_SLAVE;
+ if (c->flags & CLIENT_PUBSUB) return CLIENT_TYPE_PUBSUB;
+ return CLIENT_TYPE_NORMAL;
}
int getClientTypeByName(char *name) {
- if (!strcasecmp(name,"normal")) return REDIS_CLIENT_TYPE_NORMAL;
- else if (!strcasecmp(name,"slave")) return REDIS_CLIENT_TYPE_SLAVE;
- else if (!strcasecmp(name,"pubsub")) return REDIS_CLIENT_TYPE_PUBSUB;
+ if (!strcasecmp(name,"normal")) return CLIENT_TYPE_NORMAL;
+ else if (!strcasecmp(name,"slave")) return CLIENT_TYPE_SLAVE;
+ else if (!strcasecmp(name,"pubsub")) return CLIENT_TYPE_PUBSUB;
+ else if (!strcasecmp(name,"master")) return CLIENT_TYPE_MASTER;
else return -1;
}
char *getClientTypeName(int class) {
switch(class) {
- case REDIS_CLIENT_TYPE_NORMAL: return "normal";
- case REDIS_CLIENT_TYPE_SLAVE: return "slave";
- case REDIS_CLIENT_TYPE_PUBSUB: return "pubsub";
+ case CLIENT_TYPE_NORMAL: return "normal";
+ case CLIENT_TYPE_SLAVE: return "slave";
+ case CLIENT_TYPE_PUBSUB: return "pubsub";
+ case CLIENT_TYPE_MASTER: return "master";
default: return NULL;
}
}
@@ -1588,11 +1859,15 @@ char *getClientTypeName(int class) {
*
* Return value: non-zero if the client reached the soft or the hard limit.
* Otherwise zero is returned. */
-int checkClientOutputBufferLimits(redisClient *c) {
+int checkClientOutputBufferLimits(client *c) {
int soft = 0, hard = 0, class;
unsigned long used_mem = getClientOutputBufferMemoryUsage(c);
class = getClientType(c);
+ /* For the purpose of output buffer limiting, masters are handled
+ * like normal clients. */
+ if (class == CLIENT_TYPE_MASTER) class = CLIENT_TYPE_NORMAL;
+
if (server.client_obuf_limits[class].hard_limit_bytes &&
used_mem >= server.client_obuf_limits[class].hard_limit_bytes)
hard = 1;
@@ -1624,40 +1899,48 @@ int checkClientOutputBufferLimits(redisClient *c) {
/* Asynchronously close a client if soft or hard limit is reached on the
* output buffer size. The caller can check if the client will be closed
- * checking if the client REDIS_CLOSE_ASAP flag is set.
+ * checking if the client CLIENT_CLOSE_ASAP flag is set.
*
* Note: we need to close the client asynchronously because this function is
* called from contexts where the client can't be freed safely, i.e. from the
* lower level functions pushing data inside the client output buffers. */
-void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) {
- redisAssert(c->reply_bytes < ULONG_MAX-(1024*64));
- if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return;
+void asyncCloseClientOnOutputBufferLimitReached(client *c) {
+ serverAssert(c->reply_bytes < SIZE_MAX-(1024*64));
+ if (c->reply_bytes == 0 || c->flags & CLIENT_CLOSE_ASAP) return;
if (checkClientOutputBufferLimits(c)) {
sds client = catClientInfoString(sdsempty(),c);
freeClientAsync(c);
- redisLog(REDIS_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
+ serverLog(LL_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
sdsfree(client);
}
}
/* Helper function used by freeMemoryIfNeeded() in order to flush slaves
- * output buffers without returning control to the event loop. */
+ * output buffers without returning control to the event loop.
+ * This is also called by SHUTDOWN for a best-effort attempt to send
+ * slaves the latest writes. */
void flushSlavesOutputBuffers(void) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
+ client *slave = listNodeValue(ln);
int events;
+ /* Note that the following will not flush output buffers of slaves
+ * in STATE_ONLINE but having put_online_on_ack set to true: in this
+ * case the writable event is never installed, since the purpose
+ * of put_online_on_ack is to postpone the moment it is installed.
+ * This is what we want since slaves in this state should not receive
+ * writes before the first ACK. */
events = aeGetFileEvents(server.el,slave->fd);
if (events & AE_WRITABLE &&
- slave->replstate == REDIS_REPL_ONLINE &&
- listLength(slave->reply))
+ slave->replstate == SLAVE_STATE_ONLINE &&
+ clientHasPendingReplies(slave))
{
- sendReplyToClient(server.el,slave->fd,slave,0);
+ writeToClient(slave->fd,slave,0);
}
}
}
@@ -1688,10 +1971,12 @@ void pauseClients(mstime_t end) {
/* Return non-zero if clients are currently paused. As a side effect the
* function checks if the pause time was reached and clear it. */
int clientsArePaused(void) {
- if (server.clients_paused && server.clients_pause_end_time < server.mstime) {
+ if (server.clients_paused &&
+ server.clients_pause_end_time < server.mstime)
+ {
listNode *ln;
listIter li;
- redisClient *c;
+ client *c;
server.clients_paused = 0;
@@ -1701,7 +1986,10 @@ int clientsArePaused(void) {
while ((ln = listNext(&li)) != NULL) {
c = listNodeValue(ln);
- if (c->flags & REDIS_SLAVE) continue;
+ /* Don't touch slaves and blocked clients. The latter pending
+ * requests be processed when unblocked. */
+ if (c->flags & (CLIENT_SLAVE|CLIENT_BLOCKED)) continue;
+ c->flags |= CLIENT_UNBLOCKED;
listAddNodeTail(server.unblocked_clients,c);
}
}
@@ -1715,7 +2003,7 @@ int clientsArePaused(void) {
* and so forth.
*
* It calls the event loop in order to process a few events. Specifically we
- * try to call the event loop for times as long as we receive acknowledge that
+ * try to call the event loop 4 times as long as we receive acknowledge that
* some event was processed, in order to go forward with the accept, read,
* write, close sequence needed to serve a client.
*
@@ -1724,7 +2012,9 @@ int processEventsWhileBlocked(void) {
int iterations = 4; /* See the function top-comment. */
int count = 0;
while (iterations--) {
- int events = aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
+ int events = 0;
+ events += aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
+ events += handleClientsWithPendingWrites();
if (!events) break;
count += events;
}
diff --git a/src/notify.c b/src/notify.c
index f77239ecf..94a1f2e79 100644
--- a/src/notify.c
+++ b/src/notify.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* This file implements keyspace events notification via Pub/Sub ad
* described at http://redis.io/topics/keyspace-events. */
@@ -43,17 +43,17 @@ int keyspaceEventsStringToFlags(char *classes) {
while((c = *p++) != '\0') {
switch(c) {
- case 'A': flags |= REDIS_NOTIFY_ALL; break;
- case 'g': flags |= REDIS_NOTIFY_GENERIC; break;
- case '$': flags |= REDIS_NOTIFY_STRING; break;
- case 'l': flags |= REDIS_NOTIFY_LIST; break;
- case 's': flags |= REDIS_NOTIFY_SET; break;
- case 'h': flags |= REDIS_NOTIFY_HASH; break;
- case 'z': flags |= REDIS_NOTIFY_ZSET; break;
- case 'x': flags |= REDIS_NOTIFY_EXPIRED; break;
- case 'e': flags |= REDIS_NOTIFY_EVICTED; break;
- case 'K': flags |= REDIS_NOTIFY_KEYSPACE; break;
- case 'E': flags |= REDIS_NOTIFY_KEYEVENT; break;
+ case 'A': flags |= NOTIFY_ALL; break;
+ case 'g': flags |= NOTIFY_GENERIC; break;
+ case '$': flags |= NOTIFY_STRING; break;
+ case 'l': flags |= NOTIFY_LIST; break;
+ case 's': flags |= NOTIFY_SET; break;
+ case 'h': flags |= NOTIFY_HASH; break;
+ case 'z': flags |= NOTIFY_ZSET; break;
+ case 'x': flags |= NOTIFY_EXPIRED; break;
+ case 'e': flags |= NOTIFY_EVICTED; break;
+ case 'K': flags |= NOTIFY_KEYSPACE; break;
+ case 'E': flags |= NOTIFY_KEYEVENT; break;
default: return -1;
}
}
@@ -68,20 +68,20 @@ sds keyspaceEventsFlagsToString(int flags) {
sds res;
res = sdsempty();
- if ((flags & REDIS_NOTIFY_ALL) == REDIS_NOTIFY_ALL) {
+ if ((flags & NOTIFY_ALL) == NOTIFY_ALL) {
res = sdscatlen(res,"A",1);
} else {
- if (flags & REDIS_NOTIFY_GENERIC) res = sdscatlen(res,"g",1);
- if (flags & REDIS_NOTIFY_STRING) res = sdscatlen(res,"$",1);
- if (flags & REDIS_NOTIFY_LIST) res = sdscatlen(res,"l",1);
- if (flags & REDIS_NOTIFY_SET) res = sdscatlen(res,"s",1);
- if (flags & REDIS_NOTIFY_HASH) res = sdscatlen(res,"h",1);
- if (flags & REDIS_NOTIFY_ZSET) res = sdscatlen(res,"z",1);
- if (flags & REDIS_NOTIFY_EXPIRED) res = sdscatlen(res,"x",1);
- if (flags & REDIS_NOTIFY_EVICTED) res = sdscatlen(res,"e",1);
+ if (flags & NOTIFY_GENERIC) res = sdscatlen(res,"g",1);
+ if (flags & NOTIFY_STRING) res = sdscatlen(res,"$",1);
+ if (flags & NOTIFY_LIST) res = sdscatlen(res,"l",1);
+ if (flags & NOTIFY_SET) res = sdscatlen(res,"s",1);
+ if (flags & NOTIFY_HASH) res = sdscatlen(res,"h",1);
+ if (flags & NOTIFY_ZSET) res = sdscatlen(res,"z",1);
+ if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1);
+ if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1);
}
- if (flags & REDIS_NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1);
- if (flags & REDIS_NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1);
+ if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1);
+ if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1);
return res;
}
@@ -104,25 +104,25 @@ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) {
eventobj = createStringObject(event,strlen(event));
/* __keyspace@<db>__:<key> <event> notifications. */
- if (server.notify_keyspace_events & REDIS_NOTIFY_KEYSPACE) {
+ if (server.notify_keyspace_events & NOTIFY_KEYSPACE) {
chan = sdsnewlen("__keyspace@",11);
len = ll2string(buf,sizeof(buf),dbid);
chan = sdscatlen(chan, buf, len);
chan = sdscatlen(chan, "__:", 3);
chan = sdscatsds(chan, key->ptr);
- chanobj = createObject(REDIS_STRING, chan);
+ chanobj = createObject(OBJ_STRING, chan);
pubsubPublishMessage(chanobj, eventobj);
decrRefCount(chanobj);
}
/* __keyevente@<db>__:<event> <key> notifications. */
- if (server.notify_keyspace_events & REDIS_NOTIFY_KEYEVENT) {
+ if (server.notify_keyspace_events & NOTIFY_KEYEVENT) {
chan = sdsnewlen("__keyevent@",11);
if (len == -1) len = ll2string(buf,sizeof(buf),dbid);
chan = sdscatlen(chan, buf, len);
chan = sdscatlen(chan, "__:", 3);
chan = sdscatsds(chan, eventobj->ptr);
- chanobj = createObject(REDIS_STRING, chan);
+ chanobj = createObject(OBJ_STRING, chan);
pubsubPublishMessage(chanobj, key);
decrRefCount(chanobj);
}
diff --git a/src/object.c b/src/object.c
index 6b8e42477..08c9ad956 100644
--- a/src/object.c
+++ b/src/object.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
#include <ctype.h>
@@ -36,39 +36,68 @@
#define strtold(a,b) ((long double)strtod((a),(b)))
#endif
+/* ===================== Creation and parsing of objects ==================== */
+
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
- o->encoding = REDIS_ENCODING_RAW;
+ o->encoding = OBJ_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
- /* Set the LRU to the current lruclock (minutes resolution). */
- o->lru = LRU_CLOCK();
+ /* Set the LRU to the current lruclock (minutes resolution), or
+ * alternatively the LFU counter. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
+ } else {
+ o->lru = LRU_CLOCK();
+ }
return o;
}
-/* Create a string object with encoding REDIS_ENCODING_RAW, that is a plain
+/* Set a special refcount in the object to make it "shared":
+ * incrRefCount and decrRefCount() will test for this special refcount
+ * and will not touch the object. This way it is free to access shared
+ * objects such as small integers from different threads without any
+ * mutex.
+ *
+ * A common patter to create shared objects:
+ *
+ * robj *myobject = makeObjectShared(createObject(...));
+ *
+ */
+robj *makeObjectShared(robj *o) {
+ serverAssert(o->refcount == 1);
+ o->refcount = OBJ_SHARED_REFCOUNT;
+ return o;
+}
+
+/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
-robj *createRawStringObject(char *ptr, size_t len) {
- return createObject(REDIS_STRING,sdsnewlen(ptr,len));
+robj *createRawStringObject(const char *ptr, size_t len) {
+ return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}
-/* Create a string object with encoding REDIS_ENCODING_EMBSTR, that is
+/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
-robj *createEmbeddedStringObject(char *ptr, size_t len) {
- robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr)+len+1);
- struct sdshdr *sh = (void*)(o+1);
+robj *createEmbeddedStringObject(const char *ptr, size_t len) {
+ robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
+ struct sdshdr8 *sh = (void*)(o+1);
- o->type = REDIS_STRING;
- o->encoding = REDIS_ENCODING_EMBSTR;
+ o->type = OBJ_STRING;
+ o->encoding = OBJ_ENCODING_EMBSTR;
o->ptr = sh+1;
o->refcount = 1;
- o->lru = LRU_CLOCK();
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
+ } else {
+ o->lru = LRU_CLOCK();
+ }
sh->len = len;
- sh->free = 0;
+ sh->alloc = len;
+ sh->flags = SDS_TYPE_8;
if (ptr) {
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
@@ -79,14 +108,14 @@ robj *createEmbeddedStringObject(char *ptr, size_t len) {
}
/* Create a string object with EMBSTR encoding if it is smaller than
- * REIDS_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
+ * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
* The current limit of 39 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
-#define REDIS_ENCODING_EMBSTR_SIZE_LIMIT 39
-robj *createStringObject(char *ptr, size_t len) {
- if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT)
+#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
+robj *createStringObject(const char *ptr, size_t len) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
@@ -94,42 +123,30 @@ robj *createStringObject(char *ptr, size_t len) {
robj *createStringObjectFromLongLong(long long value) {
robj *o;
- if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
+ if (value >= 0 && value < OBJ_SHARED_INTEGERS) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
if (value >= LONG_MIN && value <= LONG_MAX) {
- o = createObject(REDIS_STRING, NULL);
- o->encoding = REDIS_ENCODING_INT;
+ o = createObject(OBJ_STRING, NULL);
+ o->encoding = OBJ_ENCODING_INT;
o->ptr = (void*)((long)value);
} else {
- o = createObject(REDIS_STRING,sdsfromlonglong(value));
+ o = createObject(OBJ_STRING,sdsfromlonglong(value));
}
}
return o;
}
-/* Note: this function is defined into object.c since here it is where it
- * belongs but it is actually designed to be used just for INCRBYFLOAT */
-robj *createStringObjectFromLongDouble(long double value) {
+/* Create a string object from a long double. If humanfriendly is non-zero
+ * it does not use exponential format and trims trailing zeroes at the end,
+ * however this results in loss of precision. Otherwise exp format is used
+ * and the output of snprintf() is not modified.
+ *
+ * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
char buf[256];
- int len;
-
- /* We use 17 digits precision since with 128 bit floats that precision
- * after rounding is able to represent most small decimal numbers in a way
- * that is "non surprising" for the user (that is, most small decimal
- * numbers will be represented in a way that when converted back into
- * a string are exactly the same as what the user typed.) */
- len = snprintf(buf,sizeof(buf),"%.17Lf", value);
- /* Now remove trailing zeroes after the '.' */
- if (strchr(buf,'.') != NULL) {
- char *p = buf+len-1;
- while(*p == '0') {
- p--;
- len--;
- }
- if (*p == '.') len--;
- }
+ int len = ld2string(buf,sizeof(buf),value,humanfriendly);
return createStringObject(buf,len);
}
@@ -141,60 +158,59 @@ robj *createStringObjectFromLongDouble(long double value) {
* will always result in a fresh object that is unshared (refcount == 1).
*
* The resulting object always has refcount set to 1. */
-robj *dupStringObject(robj *o) {
+robj *dupStringObject(const robj *o) {
robj *d;
- redisAssert(o->type == REDIS_STRING);
+ serverAssert(o->type == OBJ_STRING);
switch(o->encoding) {
- case REDIS_ENCODING_RAW:
+ case OBJ_ENCODING_RAW:
return createRawStringObject(o->ptr,sdslen(o->ptr));
- case REDIS_ENCODING_EMBSTR:
+ case OBJ_ENCODING_EMBSTR:
return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
- case REDIS_ENCODING_INT:
- d = createObject(REDIS_STRING, NULL);
- d->encoding = REDIS_ENCODING_INT;
+ case OBJ_ENCODING_INT:
+ d = createObject(OBJ_STRING, NULL);
+ d->encoding = OBJ_ENCODING_INT;
d->ptr = o->ptr;
return d;
default:
- redisPanic("Wrong encoding.");
+ serverPanic("Wrong encoding.");
break;
}
}
-robj *createListObject(void) {
- list *l = listCreate();
- robj *o = createObject(REDIS_LIST,l);
- listSetFreeMethod(l,decrRefCountVoid);
- o->encoding = REDIS_ENCODING_LINKEDLIST;
+robj *createQuicklistObject(void) {
+ quicklist *l = quicklistCreate();
+ robj *o = createObject(OBJ_LIST,l);
+ o->encoding = OBJ_ENCODING_QUICKLIST;
return o;
}
robj *createZiplistObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_LIST,zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_LIST,zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
- robj *o = createObject(REDIS_SET,d);
- o->encoding = REDIS_ENCODING_HT;
+ robj *o = createObject(OBJ_SET,d);
+ o->encoding = OBJ_ENCODING_HT;
return o;
}
robj *createIntsetObject(void) {
intset *is = intsetNew();
- robj *o = createObject(REDIS_SET,is);
- o->encoding = REDIS_ENCODING_INTSET;
+ robj *o = createObject(OBJ_SET,is);
+ o->encoding = OBJ_ENCODING_INTSET;
return o;
}
robj *createHashObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_HASH, zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_HASH, zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
@@ -204,99 +220,108 @@ robj *createZsetObject(void) {
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
- o = createObject(REDIS_ZSET,zs);
- o->encoding = REDIS_ENCODING_SKIPLIST;
+ o = createObject(OBJ_ZSET,zs);
+ o->encoding = OBJ_ENCODING_SKIPLIST;
return o;
}
robj *createZsetZiplistObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_ZSET,zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_ZSET,zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
+robj *createModuleObject(moduleType *mt, void *value) {
+ moduleValue *mv = zmalloc(sizeof(*mv));
+ mv->type = mt;
+ mv->value = value;
+ return createObject(OBJ_MODULE,mv);
+}
+
void freeStringObject(robj *o) {
- if (o->encoding == REDIS_ENCODING_RAW) {
+ if (o->encoding == OBJ_ENCODING_RAW) {
sdsfree(o->ptr);
}
}
void freeListObject(robj *o) {
- switch (o->encoding) {
- case REDIS_ENCODING_LINKEDLIST:
- listRelease((list*) o->ptr);
- break;
- case REDIS_ENCODING_ZIPLIST:
- zfree(o->ptr);
- break;
- default:
- redisPanic("Unknown list encoding type");
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistRelease(o->ptr);
+ } else {
+ serverPanic("Unknown list encoding type");
}
}
void freeSetObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_HT:
+ case OBJ_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
- case REDIS_ENCODING_INTSET:
+ case OBJ_ENCODING_INTSET:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown set encoding type");
+ serverPanic("Unknown set encoding type");
}
}
void freeZsetObject(robj *o) {
zset *zs;
switch (o->encoding) {
- case REDIS_ENCODING_SKIPLIST:
+ case OBJ_ENCODING_SKIPLIST:
zs = o->ptr;
dictRelease(zs->dict);
zslFree(zs->zsl);
zfree(zs);
break;
- case REDIS_ENCODING_ZIPLIST:
+ case OBJ_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
}
void freeHashObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_HT:
+ case OBJ_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
- case REDIS_ENCODING_ZIPLIST:
+ case OBJ_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown hash encoding type");
+ serverPanic("Unknown hash encoding type");
break;
}
}
+void freeModuleObject(robj *o) {
+ moduleValue *mv = o->ptr;
+ mv->type->free(mv->value);
+ zfree(mv);
+}
+
void incrRefCount(robj *o) {
- o->refcount++;
+ if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount++;
}
void decrRefCount(robj *o) {
- if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
if (o->refcount == 1) {
switch(o->type) {
- case REDIS_STRING: freeStringObject(o); break;
- case REDIS_LIST: freeListObject(o); break;
- case REDIS_SET: freeSetObject(o); break;
- case REDIS_ZSET: freeZsetObject(o); break;
- case REDIS_HASH: freeHashObject(o); break;
- default: redisPanic("Unknown object type"); break;
+ case OBJ_STRING: freeStringObject(o); break;
+ case OBJ_LIST: freeListObject(o); break;
+ case OBJ_SET: freeSetObject(o); break;
+ case OBJ_ZSET: freeZsetObject(o); break;
+ case OBJ_HASH: freeHashObject(o); break;
+ case OBJ_MODULE: freeModuleObject(o); break;
+ default: serverPanic("Unknown object type"); break;
}
zfree(o);
} else {
- o->refcount--;
+ if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
+ if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
}
}
@@ -324,7 +349,7 @@ robj *resetRefCount(robj *obj) {
return obj;
}
-int checkType(redisClient *c, robj *o, int type) {
+int checkType(client *c, robj *o, int type) {
if (o->type != type) {
addReply(c,shared.wrongtypeerr);
return 1;
@@ -332,13 +357,17 @@ int checkType(redisClient *c, robj *o, int type) {
return 0;
}
+int isSdsRepresentableAsLongLong(sds s, long long *llval) {
+ return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
+}
+
int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
- if (o->encoding == REDIS_ENCODING_INT) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (o->encoding == OBJ_ENCODING_INT) {
if (llval) *llval = (long) o->ptr;
- return REDIS_OK;
+ return C_OK;
} else {
- return string2ll(o->ptr,sdslen(o->ptr),llval) ? REDIS_OK : REDIS_ERR;
+ return isSdsRepresentableAsLongLong(o->ptr,llval);
}
}
@@ -352,7 +381,7 @@ robj *tryObjectEncoding(robj *o) {
* in this function. Other types use encoded memory efficient
* representations but are handled by the commands implementing
* the type. */
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
/* We try some specialized encoding only for objects that are
* RAW or EMBSTR encoded, in other words objects that are still
@@ -365,26 +394,25 @@ robj *tryObjectEncoding(robj *o) {
if (o->refcount > 1) return o;
/* Check if we can represent this string as a long integer.
- * Note that we are sure that a string larger than 21 chars is not
+ * Note that we are sure that a string larger than 20 chars is not
* representable as a 32 nor 64 bit integer. */
len = sdslen(s);
- if (len <= 21 && string2l(s,len,&value)) {
+ if (len <= 20 && string2l(s,len,&value)) {
/* This object is encodable as a long. Try to use a shared object.
* Note that we avoid using shared integers when maxmemory is used
* because every object needs to have a private LRU field for the LRU
* algorithm to work well. */
if ((server.maxmemory == 0 ||
- (server.maxmemory_policy != REDIS_MAXMEMORY_VOLATILE_LRU &&
- server.maxmemory_policy != REDIS_MAXMEMORY_ALLKEYS_LRU)) &&
+ !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
value >= 0 &&
- value < REDIS_SHARED_INTEGERS)
+ value < OBJ_SHARED_INTEGERS)
{
decrRefCount(o);
incrRefCount(shared.integers[value]);
return shared.integers[value];
} else {
- if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr);
- o->encoding = REDIS_ENCODING_INT;
+ if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr);
+ o->encoding = OBJ_ENCODING_INT;
o->ptr = (void*) value;
return o;
}
@@ -394,10 +422,10 @@ robj *tryObjectEncoding(robj *o) {
* try the EMBSTR encoding which is more efficient.
* In this representation the object and the SDS string are allocated
* in the same chunk of memory to save space and cache misses. */
- if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
robj *emb;
- if (o->encoding == REDIS_ENCODING_EMBSTR) return o;
+ if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
emb = createEmbeddedStringObject(s,sdslen(s));
decrRefCount(o);
return emb;
@@ -411,8 +439,8 @@ robj *tryObjectEncoding(robj *o) {
*
* We do that only for relatively large strings as this branch
* is only entered if the length of the string is greater than
- * REDIS_ENCODING_EMBSTR_SIZE_LIMIT. */
- if (o->encoding == REDIS_ENCODING_RAW &&
+ * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
+ if (o->encoding == OBJ_ENCODING_RAW &&
sdsavail(s) > len/10)
{
o->ptr = sdsRemoveFreeSpace(o->ptr);
@@ -431,14 +459,14 @@ robj *getDecodedObject(robj *o) {
incrRefCount(o);
return o;
}
- if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
+ if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
char buf[32];
ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else {
- redisPanic("Unknown encoding type");
+ serverPanic("Unknown encoding type");
}
}
@@ -454,7 +482,7 @@ robj *getDecodedObject(robj *o) {
#define REDIS_COMPARE_COLL (1<<1)
int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
- redisAssertWithInfo(NULL,a,a->type == REDIS_STRING && b->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
char bufa[128], bufb[128], *astr, *bstr;
size_t alen, blen, minlen;
@@ -500,8 +528,8 @@ int collateStringObjects(robj *a, robj *b) {
* this function is faster then checking for (compareStringObject(a,b) == 0)
* because it can perform some more optimization. */
int equalStringObjects(robj *a, robj *b) {
- if (a->encoding == REDIS_ENCODING_INT &&
- b->encoding == REDIS_ENCODING_INT){
+ if (a->encoding == OBJ_ENCODING_INT &&
+ b->encoding == OBJ_ENCODING_INT){
/* If both strings are integer encoded just check if the stored
* long is the same. */
return a->ptr == b->ptr;
@@ -511,56 +539,54 @@ int equalStringObjects(robj *a, robj *b) {
}
size_t stringObjectLen(robj *o) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
return sdslen(o->ptr);
} else {
- char buf[32];
-
- return ll2string(buf,32,(long)o->ptr);
+ return sdigits10((long)o->ptr);
}
}
-int getDoubleFromObject(robj *o, double *target) {
+int getDoubleFromObject(const robj *o, double *target) {
double value;
char *eptr;
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
errno = 0;
value = strtod(o->ptr, &eptr);
- if (isspace(((char*)o->ptr)[0]) ||
+ if (isspace(((const char*)o->ptr)[0]) ||
eptr[0] != '\0' ||
(errno == ERANGE &&
(value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
errno == EINVAL ||
isnan(value))
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
double value;
- if (getDoubleFromObject(o, &value) != REDIS_OK) {
+ if (getDoubleFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a valid float");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
int getLongDoubleFromObject(robj *o, long double *target) {
@@ -570,127 +596,411 @@ int getLongDoubleFromObject(robj *o, long double *target) {
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
errno = 0;
value = strtold(o->ptr, &eptr);
if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' ||
errno == ERANGE || isnan(value))
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg) {
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
long double value;
- if (getLongDoubleFromObject(o, &value) != REDIS_OK) {
+ if (getLongDoubleFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a valid float");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
int getLongLongFromObject(robj *o, long long *target) {
long long value;
- char *eptr;
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
- errno = 0;
- value = strtoll(o->ptr, &eptr, 10);
- if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' ||
- errno == ERANGE)
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
if (target) *target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
long long value;
- if (getLongLongFromObject(o, &value) != REDIS_OK) {
+ if (getLongLongFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not an integer or out of range");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
long long value;
- if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
+ if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
if (value < LONG_MIN || value > LONG_MAX) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is out of range");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
char *strEncoding(int encoding) {
switch(encoding) {
- case REDIS_ENCODING_RAW: return "raw";
- case REDIS_ENCODING_INT: return "int";
- case REDIS_ENCODING_HT: return "hashtable";
- case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
- case REDIS_ENCODING_ZIPLIST: return "ziplist";
- case REDIS_ENCODING_INTSET: return "intset";
- case REDIS_ENCODING_SKIPLIST: return "skiplist";
- case REDIS_ENCODING_EMBSTR: return "embstr";
+ case OBJ_ENCODING_RAW: return "raw";
+ case OBJ_ENCODING_INT: return "int";
+ case OBJ_ENCODING_HT: return "hashtable";
+ case OBJ_ENCODING_QUICKLIST: return "quicklist";
+ case OBJ_ENCODING_ZIPLIST: return "ziplist";
+ case OBJ_ENCODING_INTSET: return "intset";
+ case OBJ_ENCODING_SKIPLIST: return "skiplist";
+ case OBJ_ENCODING_EMBSTR: return "embstr";
default: return "unknown";
}
}
-/* Given an object returns the min number of milliseconds the object was never
- * requested, using an approximated LRU algorithm. */
-unsigned long long estimateObjectIdleTime(robj *o) {
- unsigned long long lruclock = LRU_CLOCK();
- if (lruclock >= o->lru) {
- return (lruclock - o->lru) * REDIS_LRU_CLOCK_RESOLUTION;
+/* =========================== Memory introspection ========================== */
+
+/* Returns the size in bytes consumed by the key's value in RAM.
+ * Note that the returned value is just an approximation, especially in the
+ * case of aggregated data types where only "sample_size" elements
+ * are checked and averaged to estimate the total size. */
+#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
+size_t objectComputeSize(robj *o, size_t sample_size) {
+ sds ele, ele2;
+ dict *d;
+ dictIterator *di;
+ struct dictEntry *de;
+ size_t asize = 0, elesize = 0, samples = 0;
+
+ if (o->type == OBJ_STRING) {
+ if(o->encoding == OBJ_ENCODING_INT) {
+ asize = sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_RAW) {
+ asize = sdsAllocSize(o->ptr)+sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
+ asize = sdslen(o->ptr)+2+sizeof(*o);
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ } else if (o->type == OBJ_LIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
+ asize = sizeof(*o)+sizeof(quicklist);
+ do {
+ elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
+ samples++;
+ } while ((node = node->next) && samples < sample_size);
+ asize += (double)elesize/samples*listTypeLength(o);
+ } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+ziplistBlobLen(o->ptr);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (o->type == OBJ_SET) {
+ if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ elesize += sizeof(struct dictEntry) + sdsAllocSize(ele);
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ intset *is = o->ptr;
+ asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (o->type == OBJ_ZSET) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ d = ((zset*)o->ptr)->dict;
+ zskiplist *zsl = ((zset*)o->ptr)->zsl;
+ zskiplistNode *znode = zsl->header->level[0].forward;
+ asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while(znode != NULL && samples < sample_size) {
+ elesize += sdsAllocSize(znode->ele);
+ elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
+ samples++;
+ znode = znode->level[0].forward;
+ }
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ ele2 = dictGetVal(de);
+ elesize += sdsAllocSize(ele) + sdsAllocSize(ele2);
+ elesize += sizeof(struct dictEntry);
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_MODULE) {
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ if (mt->mem_usage != NULL) {
+ asize = mt->mem_usage(mv->value);
+ } else {
+ asize = 0;
+ }
} else {
- return (lruclock + (REDIS_LRU_CLOCK_MAX - o->lru)) *
- REDIS_LRU_CLOCK_RESOLUTION;
+ serverPanic("Unknown object type");
}
+ return asize;
+}
+
+/* Release data obtained with getMemoryOverheadData(). */
+void freeMemoryOverheadData(struct redisMemOverhead *mh) {
+ zfree(mh->db);
+ zfree(mh);
+}
+
+/* Return a struct redisMemOverhead filled with memory overhead
+ * information used for the MEMORY OVERHEAD and INFO command. The returned
+ * structure pointer should be freed calling freeMemoryOverheadData(). */
+struct redisMemOverhead *getMemoryOverheadData(void) {
+ int j;
+ size_t mem_total = 0;
+ size_t mem = 0;
+ size_t zmalloc_used = zmalloc_used_memory();
+ struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
+
+ mh->total_allocated = zmalloc_used;
+ mh->startup_allocated = server.initial_memory_usage;
+ mh->peak_allocated = server.stat_peak_memory;
+ mh->fragmentation =
+ zmalloc_get_fragmentation_ratio(server.resident_set_size);
+ mem_total += server.initial_memory_usage;
+
+ mem = 0;
+ if (server.repl_backlog)
+ mem += zmalloc_size(server.repl_backlog);
+ mh->repl_backlog = mem;
+ mem_total += mem;
+
+ mem = 0;
+ if (listLength(server.slaves)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ mem += getClientOutputBufferMemoryUsage(c);
+ mem += sdsAllocSize(c->querybuf);
+ mem += sizeof(client);
+ }
+ }
+ mh->clients_slaves = mem;
+ mem_total+=mem;
+
+ mem = 0;
+ if (listLength(server.clients)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ if (c->flags & CLIENT_SLAVE)
+ continue;
+ mem += getClientOutputBufferMemoryUsage(c);
+ mem += sdsAllocSize(c->querybuf);
+ mem += sizeof(client);
+ }
+ }
+ mh->clients_normal = mem;
+ mem_total+=mem;
+
+ mem = 0;
+ if (server.aof_state != AOF_OFF) {
+ mem += sdslen(server.aof_buf);
+ mem += aofRewriteBufferSize();
+ }
+ mh->aof_buffer = mem;
+ mem_total+=mem;
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+ long long keyscount = dictSize(db->dict);
+ if (keyscount==0) continue;
+
+ mh->total_keys += keyscount;
+ mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
+ mh->db[mh->num_dbs].dbid = j;
+
+ mem = dictSize(db->dict) * sizeof(dictEntry) +
+ dictSlots(db->dict) * sizeof(dictEntry*) +
+ dictSize(db->dict) * sizeof(robj);
+ mh->db[mh->num_dbs].overhead_ht_main = mem;
+ mem_total+=mem;
+
+ mem = dictSize(db->expires) * sizeof(dictEntry) +
+ dictSlots(db->expires) * sizeof(dictEntry*);
+ mh->db[mh->num_dbs].overhead_ht_expires = mem;
+ mem_total+=mem;
+
+ mh->num_dbs++;
+ }
+
+ mh->overhead_total = mem_total;
+ mh->dataset = zmalloc_used - mem_total;
+ mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
+
+ /* Metrics computed after subtracting the startup memory from
+ * the total memory. */
+ size_t net_usage = 1;
+ if (zmalloc_used > mh->startup_allocated)
+ net_usage = zmalloc_used - mh->startup_allocated;
+ mh->dataset_perc = (float)mh->dataset*100/net_usage;
+ mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
+
+ return mh;
}
+/* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
+ * stats output. */
+void inputCatSds(void *result, const char *str) {
+ /* result is actually a (sds *), so re-cast it here */
+ sds *info = (sds *)result;
+ *info = sdscat(*info, str);
+}
+
+/* This implements MEMORY DOCTOR. An human readable analysis of the Redis
+ * memory condition. */
+sds getMemoryDoctorReport(void) {
+ int empty = 0; /* Instance is empty or almost empty. */
+ int big_peak = 0; /* Memory peak is much larger than used mem. */
+ int high_frag = 0; /* High fragmentation. */
+ int big_slave_buf = 0; /* Slave buffers are too big. */
+ int big_client_buf = 0; /* Client buffers are too big. */
+ int num_reports = 0;
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ if (mh->total_allocated < (1024*1024*5)) {
+ empty = 1;
+ num_reports++;
+ } else {
+ /* Peak is > 150% of current used memory? */
+ if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
+ big_peak = 1;
+ num_reports++;
+ }
+
+ /* Fragmentation is higher than 1.4? */
+ if (mh->fragmentation > 1.4) {
+ high_frag = 1;
+ num_reports++;
+ }
+
+ /* Clients using more than 200k each average? */
+ long numslaves = listLength(server.slaves);
+ long numclients = listLength(server.clients)-numslaves;
+ if (mh->clients_normal / numclients > (1024*200)) {
+ big_client_buf = 1;
+ num_reports++;
+ }
+
+ /* Slaves using more than 10 MB each? */
+ if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
+ big_slave_buf = 1;
+ num_reports++;
+ }
+ }
+
+ sds s;
+ if (num_reports == 0) {
+ s = sdsnew(
+ "Hi Sam, I can't find any memory issue in your instance. "
+ "I can only account for what occurs on this base.\n");
+ } else if (empty == 1) {
+ s = sdsnew(
+ "Hi Sam, this instance is empty or is using very little memory, "
+ "my issues detector can't be used in these conditions. "
+ "Please, leave for your mission on Earth and fill it with some data. "
+ "The new Sam and I will be back to our programming as soon as I "
+ "finished rebooting.\n");
+ } else {
+ s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
+ if (big_peak) {
+ s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
+ }
+ if (high_frag) {
+ s = sdscatprintf(s," * High fragmentation: This instance has a memory fragmentation greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
+ }
+ if (big_slave_buf) {
+ s = sdscat(s," * Big slave buffers: The slave output buffers in this instance are greater than 10MB for each slave (on average). This likely means that there is some slave instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what slave is not receiving data correctly and why. You can use the INFO output in order to check the slaves delays and the CLIENT LIST command to check the output buffers of each slave.\n\n");
+ }
+ if (big_client_buf) {
+ s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
+ }
+ s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
+ }
+ freeMemoryOverheadData(mh);
+ return s;
+}
+
+/* ======================= The OBJECT and MEMORY commands =================== */
+
/* This is a helper function for the OBJECT command. We need to lookup keys
* without any modification of LRU or other parameters. */
-robj *objectCommandLookup(redisClient *c, robj *key) {
+robj *objectCommandLookup(client *c, robj *key) {
dictEntry *de;
if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL;
return (robj*) dictGetVal(de);
}
-robj *objectCommandLookupOrReply(redisClient *c, robj *key, robj *reply) {
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
robj *o = objectCommandLookup(c,key);
if (!o) addReply(c, reply);
@@ -699,7 +1009,7 @@ robj *objectCommandLookupOrReply(redisClient *c, robj *key, robj *reply) {
/* Object command allows to inspect the internals of an Redis Object.
* Usage: OBJECT <refcount|encoding|idletime> <key> */
-void objectCommand(redisClient *c) {
+void objectCommand(client *c) {
robj *o;
if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
@@ -713,9 +1023,156 @@ void objectCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
+ } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
+ == NULL) return;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
+ addReplyError(c,"An LRU maxmemory policy is selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
+ addReplyLongLong(c,o->lru&255);
} else {
- addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime)");
+ addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime|freq)");
}
}
+/* The memory command will eventually be a complete interface for the
+ * memory introspection capabilities of Redis.
+ *
+ * Usage: MEMORY usage <key> */
+void memoryCommand(client *c) {
+ robj *o;
+
+ if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
+ long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
+ for (int j = 3; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"samples") &&
+ j+1 < c->argc)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
+ == C_ERR) return;
+ if (samples < 0) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ if (samples == 0) samples = LLONG_MAX;;
+ j++; /* skip option argument. */
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
+ == NULL) return;
+ size_t usage = objectComputeSize(o,samples);
+ usage += sdsAllocSize(c->argv[1]->ptr);
+ usage += sizeof(dictEntry);
+ addReplyLongLong(c,usage);
+ } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ addReplyMultiBulkLen(c,(14+mh->num_dbs)*2);
+
+ addReplyBulkCString(c,"peak.allocated");
+ addReplyLongLong(c,mh->peak_allocated);
+
+ addReplyBulkCString(c,"total.allocated");
+ addReplyLongLong(c,mh->total_allocated);
+
+ addReplyBulkCString(c,"startup.allocated");
+ addReplyLongLong(c,mh->startup_allocated);
+
+ addReplyBulkCString(c,"replication.backlog");
+ addReplyLongLong(c,mh->repl_backlog);
+
+ addReplyBulkCString(c,"clients.slaves");
+ addReplyLongLong(c,mh->clients_slaves);
+
+ addReplyBulkCString(c,"clients.normal");
+ addReplyLongLong(c,mh->clients_normal);
+
+ addReplyBulkCString(c,"aof.buffer");
+ addReplyLongLong(c,mh->aof_buffer);
+
+ for (size_t j = 0; j < mh->num_dbs; j++) {
+ char dbname[32];
+ snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
+ addReplyBulkCString(c,dbname);
+ addReplyMultiBulkLen(c,4);
+
+ addReplyBulkCString(c,"overhead.hashtable.main");
+ addReplyLongLong(c,mh->db[j].overhead_ht_main);
+
+ addReplyBulkCString(c,"overhead.hashtable.expires");
+ addReplyLongLong(c,mh->db[j].overhead_ht_expires);
+ }
+
+ addReplyBulkCString(c,"overhead.total");
+ addReplyLongLong(c,mh->overhead_total);
+
+ addReplyBulkCString(c,"keys.count");
+ addReplyLongLong(c,mh->total_keys);
+
+ addReplyBulkCString(c,"keys.bytes-per-key");
+ addReplyLongLong(c,mh->bytes_per_key);
+
+ addReplyBulkCString(c,"dataset.bytes");
+ addReplyLongLong(c,mh->dataset);
+
+ addReplyBulkCString(c,"dataset.percentage");
+ addReplyDouble(c,mh->dataset_perc);
+
+ addReplyBulkCString(c,"peak.percentage");
+ addReplyDouble(c,mh->peak_perc);
+
+ addReplyBulkCString(c,"fragmentation");
+ addReplyDouble(c,mh->fragmentation);
+
+ freeMemoryOverheadData(mh);
+ } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
+#if defined(USE_JEMALLOC)
+ sds info = sdsempty();
+ je_malloc_stats_print(inputCatSds, &info, NULL);
+ addReplyBulkSds(c, info);
+#else
+ addReplyBulkCString(c,"Stats not supported for the current allocator");
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
+ sds report = getMemoryDoctorReport();
+ addReplyBulkSds(c,report);
+ } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
+#if defined(USE_JEMALLOC)
+ char tmp[32];
+ unsigned narenas = 0;
+ size_t sz = sizeof(unsigned);
+ if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
+ sprintf(tmp, "arena.%d.purge", narenas);
+ if (!je_mallctl(tmp, NULL, 0, NULL, 0)) {
+ addReply(c, shared.ok);
+ return;
+ }
+ }
+ addReplyError(c, "Error purging dirty pages");
+#else
+ addReply(c, shared.ok);
+ /* Nothing to do for other allocators. */
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
+ addReplyMultiBulkLen(c,4);
+ addReplyBulkCString(c,
+"MEMORY USAGE <key> [SAMPLES <count>] - Estimate memory usage of key");
+ addReplyBulkCString(c,
+"MEMORY STATS - Show memory usage details");
+ addReplyBulkCString(c,
+"MEMORY PURGE - Ask the allocator to release memory");
+ addReplyBulkCString(c,
+"MEMORY MALLOC-STATS - Show allocator internal stats");
+ } else {
+ addReplyError(c,"Syntax error. Try MEMORY HELP");
+ }
+}
diff --git a/src/pubsub.c b/src/pubsub.c
index 720cd5185..b6d1167db 100644
--- a/src/pubsub.c
+++ b/src/pubsub.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* Pubsub low level API
@@ -48,14 +48,14 @@ int listMatchPubsubPattern(void *a, void *b) {
}
/* Return the number of channels + patterns a client is subscribed to. */
-int clientSubscriptionsCount(redisClient *c) {
+int clientSubscriptionsCount(client *c) {
return dictSize(c->pubsub_channels)+
listLength(c->pubsub_patterns);
}
/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
* 0 if the client was already subscribed to that channel. */
-int pubsubSubscribeChannel(redisClient *c, robj *channel) {
+int pubsubSubscribeChannel(client *c, robj *channel) {
dictEntry *de;
list *clients = NULL;
int retval = 0;
@@ -85,7 +85,7 @@ int pubsubSubscribeChannel(redisClient *c, robj *channel) {
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
-int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
+int pubsubUnsubscribeChannel(client *c, robj *channel, int notify) {
dictEntry *de;
list *clients;
listNode *ln;
@@ -98,10 +98,10 @@ int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
retval = 1;
/* Remove the client from the channel -> clients list hash table */
de = dictFind(server.pubsub_channels,channel);
- redisAssertWithInfo(c,NULL,de != NULL);
+ serverAssertWithInfo(c,NULL,de != NULL);
clients = dictGetVal(de);
ln = listSearchKey(clients,c);
- redisAssertWithInfo(c,NULL,ln != NULL);
+ serverAssertWithInfo(c,NULL,ln != NULL);
listDelNode(clients,ln);
if (listLength(clients) == 0) {
/* Free the list and associated hash entry at all if this was
@@ -124,7 +124,7 @@ int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
}
/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the client was already subscribed to that pattern. */
-int pubsubSubscribePattern(redisClient *c, robj *pattern) {
+int pubsubSubscribePattern(client *c, robj *pattern) {
int retval = 0;
if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
@@ -147,7 +147,7 @@ int pubsubSubscribePattern(redisClient *c, robj *pattern) {
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
-int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
+int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
listNode *ln;
pubsubPattern pat;
int retval = 0;
@@ -175,7 +175,7 @@ int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
/* Unsubscribe from all the channels. Return the number of channels the
* client was subscribed to. */
-int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
+int pubsubUnsubscribeAllChannels(client *c, int notify) {
dictIterator *di = dictGetSafeIterator(c->pubsub_channels);
dictEntry *de;
int count = 0;
@@ -199,7 +199,7 @@ int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
/* Unsubscribe from all the patterns. Return the number of patterns the
* client was subscribed from. */
-int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
+int pubsubUnsubscribeAllPatterns(client *c, int notify) {
listNode *ln;
listIter li;
int count = 0;
@@ -237,7 +237,7 @@ int pubsubPublishMessage(robj *channel, robj *message) {
listRewind(list,&li);
while ((ln = listNext(&li)) != NULL) {
- redisClient *c = ln->value;
+ client *c = ln->value;
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.messagebulk);
@@ -274,15 +274,15 @@ int pubsubPublishMessage(robj *channel, robj *message) {
* Pubsub commands implementation
*----------------------------------------------------------------------------*/
-void subscribeCommand(redisClient *c) {
+void subscribeCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribeChannel(c,c->argv[j]);
- c->flags |= REDIS_PUBSUB;
+ c->flags |= CLIENT_PUBSUB;
}
-void unsubscribeCommand(redisClient *c) {
+void unsubscribeCommand(client *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllChannels(c,1);
} else {
@@ -291,18 +291,18 @@ void unsubscribeCommand(redisClient *c) {
for (j = 1; j < c->argc; j++)
pubsubUnsubscribeChannel(c,c->argv[j],1);
}
- if (clientSubscriptionsCount(c) == 0) c->flags &= ~REDIS_PUBSUB;
+ if (clientSubscriptionsCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
}
-void psubscribeCommand(redisClient *c) {
+void psubscribeCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribePattern(c,c->argv[j]);
- c->flags |= REDIS_PUBSUB;
+ c->flags |= CLIENT_PUBSUB;
}
-void punsubscribeCommand(redisClient *c) {
+void punsubscribeCommand(client *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllPatterns(c,1);
} else {
@@ -311,20 +311,20 @@ void punsubscribeCommand(redisClient *c) {
for (j = 1; j < c->argc; j++)
pubsubUnsubscribePattern(c,c->argv[j],1);
}
- if (clientSubscriptionsCount(c) == 0) c->flags &= ~REDIS_PUBSUB;
+ if (clientSubscriptionsCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
}
-void publishCommand(redisClient *c) {
+void publishCommand(client *c) {
int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
if (server.cluster_enabled)
clusterPropagatePublish(c->argv[1],c->argv[2]);
else
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL);
+ forceCommandPropagation(c,PROPAGATE_REPL);
addReplyLongLong(c,receivers);
}
/* PUBSUB command for Pub/Sub introspection. */
-void pubsubCommand(redisClient *c) {
+void pubsubCommand(client *c) {
if (!strcasecmp(c->argv[1]->ptr,"channels") &&
(c->argc == 2 || c->argc ==3))
{
@@ -358,7 +358,7 @@ void pubsubCommand(redisClient *c) {
list *l = dictFetchValue(server.pubsub_channels,c->argv[j]);
addReplyBulk(c,c->argv[j]);
- addReplyBulkLongLong(c,l ? listLength(l) : 0);
+ addReplyLongLong(c,l ? listLength(l) : 0);
}
} else if (!strcasecmp(c->argv[1]->ptr,"numpat") && c->argc == 2) {
/* PUBSUB NUMPAT */
diff --git a/src/quicklist.c b/src/quicklist.c
new file mode 100644
index 000000000..c8b72743c
--- /dev/null
+++ b/src/quicklist.c
@@ -0,0 +1,2651 @@
+/* quicklist.c - A doubly linked list of ziplists
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must start the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h> /* for memcpy */
+#include "quicklist.h"
+#include "zmalloc.h"
+#include "ziplist.h"
+#include "util.h" /* for ll2string */
+#include "lzf.h"
+
+#if defined(REDIS_TEST) || defined(REDIS_TEST_VERBOSE)
+#include <stdio.h> /* for printf (debug printing), snprintf (genstr) */
+#endif
+
+#ifndef REDIS_STATIC
+#define REDIS_STATIC static
+#endif
+
+/* Optimization levels for size-based filling */
+static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
+
+/* Maximum size in bytes of any multi-element ziplist.
+ * Larger values will live in their own isolated ziplists. */
+#define SIZE_SAFETY_LIMIT 8192
+
+/* Minimum ziplist size in bytes for attempting compression. */
+#define MIN_COMPRESS_BYTES 48
+
+/* Minimum size reduction in bytes to store compressed quicklistNode data.
+ * This also prevents us from storing compression if the compression
+ * resulted in a larger size than the original data. */
+#define MIN_COMPRESS_IMPROVE 8
+
+/* If not verbose testing, remove all debug printing. */
+#ifndef REDIS_TEST_VERBOSE
+#define D(...)
+#else
+#define D(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } while (0);
+#endif
+
+/* Simple way to give quicklistEntry structs default values with one call. */
+#define initEntry(e) \
+ do { \
+ (e)->zi = (e)->value = NULL; \
+ (e)->longval = -123456789; \
+ (e)->quicklist = NULL; \
+ (e)->node = NULL; \
+ (e)->offset = 123456789; \
+ (e)->sz = 0; \
+ } while (0)
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+/* Create a new quicklist.
+ * Free with quicklistRelease(). */
+quicklist *quicklistCreate(void) {
+ struct quicklist *quicklist;
+
+ quicklist = zmalloc(sizeof(*quicklist));
+ quicklist->head = quicklist->tail = NULL;
+ quicklist->len = 0;
+ quicklist->count = 0;
+ quicklist->compress = 0;
+ quicklist->fill = -2;
+ return quicklist;
+}
+
+#define COMPRESS_MAX (1 << 16)
+void quicklistSetCompressDepth(quicklist *quicklist, int compress) {
+ if (compress > COMPRESS_MAX) {
+ compress = COMPRESS_MAX;
+ } else if (compress < 0) {
+ compress = 0;
+ }
+ quicklist->compress = compress;
+}
+
+#define FILL_MAX (1 << 15)
+void quicklistSetFill(quicklist *quicklist, int fill) {
+ if (fill > FILL_MAX) {
+ fill = FILL_MAX;
+ } else if (fill < -5) {
+ fill = -5;
+ }
+ quicklist->fill = fill;
+}
+
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth) {
+ quicklistSetFill(quicklist, fill);
+ quicklistSetCompressDepth(quicklist, depth);
+}
+
+/* Create a new quicklist with some default parameters. */
+quicklist *quicklistNew(int fill, int compress) {
+ quicklist *quicklist = quicklistCreate();
+ quicklistSetOptions(quicklist, fill, compress);
+ return quicklist;
+}
+
+REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
+ quicklistNode *node;
+ node = zmalloc(sizeof(*node));
+ node->zl = NULL;
+ node->count = 0;
+ node->sz = 0;
+ node->next = node->prev = NULL;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ node->container = QUICKLIST_NODE_CONTAINER_ZIPLIST;
+ node->recompress = 0;
+ return node;
+}
+
+/* Return cached quicklist count */
+unsigned int quicklistCount(const quicklist *ql) { return ql->count; }
+
+/* Free entire quicklist. */
+void quicklistRelease(quicklist *quicklist) {
+ unsigned long len;
+ quicklistNode *current, *next;
+
+ current = quicklist->head;
+ len = quicklist->len;
+ while (len--) {
+ next = current->next;
+
+ zfree(current->zl);
+ quicklist->count -= current->count;
+
+ zfree(current);
+
+ quicklist->len--;
+ current = next;
+ }
+ zfree(quicklist);
+}
+
+/* Compress the ziplist in 'node' and update encoding details.
+ * Returns 1 if ziplist compressed successfully.
+ * Returns 0 if compression failed or if ziplist too small to compress. */
+REDIS_STATIC int __quicklistCompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 1;
+#endif
+
+ /* Don't bother compressing small values */
+ if (node->sz < MIN_COMPRESS_BYTES)
+ return 0;
+
+ quicklistLZF *lzf = zmalloc(sizeof(*lzf) + node->sz);
+
+ /* Cancel if compression fails or doesn't compress small enough */
+ if (((lzf->sz = lzf_compress(node->zl, node->sz, lzf->compressed,
+ node->sz)) == 0) ||
+ lzf->sz + MIN_COMPRESS_IMPROVE >= node->sz) {
+ /* lzf_compress aborts/rejects compression if value not compressable. */
+ zfree(lzf);
+ return 0;
+ }
+ lzf = zrealloc(lzf, sizeof(*lzf) + lzf->sz);
+ zfree(node->zl);
+ node->zl = (unsigned char *)lzf;
+ node->encoding = QUICKLIST_NODE_ENCODING_LZF;
+ node->recompress = 0;
+ return 1;
+}
+
+/* Compress only uncompressed nodes. */
+#define quicklistCompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_RAW) { \
+ __quicklistCompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Uncompress the ziplist in 'node' and update encoding details.
+ * Returns 1 on successful decode, 0 on failure to decode. */
+REDIS_STATIC int __quicklistDecompressNode(quicklistNode *node) {
+#ifdef REDIS_TEST
+ node->attempted_compress = 0;
+#endif
+
+ void *decompressed = zmalloc(node->sz);
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ if (lzf_decompress(lzf->compressed, lzf->sz, decompressed, node->sz) == 0) {
+ /* Someone requested decompress, but we can't decompress. Not good. */
+ zfree(decompressed);
+ return 0;
+ }
+ zfree(lzf);
+ node->zl = decompressed;
+ node->encoding = QUICKLIST_NODE_ENCODING_RAW;
+ return 1;
+}
+
+/* Decompress only compressed nodes. */
+#define quicklistDecompressNode(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ } \
+ } while (0)
+
+/* Force node to not be immediately re-compresable */
+#define quicklistDecompressNodeForUse(_node) \
+ do { \
+ if ((_node) && (_node)->encoding == QUICKLIST_NODE_ENCODING_LZF) { \
+ __quicklistDecompressNode((_node)); \
+ (_node)->recompress = 1; \
+ } \
+ } while (0)
+
+/* Extract the raw LZF data from this quicklistNode.
+ * Pointer to LZF data is assigned to '*data'.
+ * Return value is the length of compressed LZF data. */
+size_t quicklistGetLzf(const quicklistNode *node, void **data) {
+ quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ *data = lzf->compressed;
+ return lzf->sz;
+}
+
+#define quicklistAllowsCompression(_ql) ((_ql)->compress != 0)
+
+/* Force 'quicklist' to meet compression guidelines set by compress depth.
+ * The only way to guarantee interior nodes get compressed is to iterate
+ * to our "interior" compress depth then compress the next node we find.
+ * If compress depth is larger than the entire list, we return immediately. */
+REDIS_STATIC void __quicklistCompress(const quicklist *quicklist,
+ quicklistNode *node) {
+ /* If length is less than our compress depth (from both sides),
+ * we can't compress anything. */
+ if (!quicklistAllowsCompression(quicklist) ||
+ quicklist->len < (unsigned int)(quicklist->compress * 2))
+ return;
+
+#if 0
+ /* Optimized cases for small depth counts */
+ if (quicklist->compress == 1) {
+ quicklistNode *h = quicklist->head, *t = quicklist->tail;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(t);
+ if (h != node && t != node)
+ quicklistCompressNode(node);
+ return;
+ } else if (quicklist->compress == 2) {
+ quicklistNode *h = quicklist->head, *hn = h->next, *hnn = hn->next;
+ quicklistNode *t = quicklist->tail, *tp = t->prev, *tpp = tp->prev;
+ quicklistDecompressNode(h);
+ quicklistDecompressNode(hn);
+ quicklistDecompressNode(t);
+ quicklistDecompressNode(tp);
+ if (h != node && hn != node && t != node && tp != node) {
+ quicklistCompressNode(node);
+ }
+ if (hnn != t) {
+ quicklistCompressNode(hnn);
+ }
+ if (tpp != h) {
+ quicklistCompressNode(tpp);
+ }
+ return;
+ }
+#endif
+
+ /* Iterate until we reach compress depth for both sides of the list.a
+ * Note: because we do length checks at the *top* of this function,
+ * we can skip explicit null checks below. Everything exists. */
+ quicklistNode *forward = quicklist->head;
+ quicklistNode *reverse = quicklist->tail;
+ int depth = 0;
+ int in_depth = 0;
+ while (depth++ < quicklist->compress) {
+ quicklistDecompressNode(forward);
+ quicklistDecompressNode(reverse);
+
+ if (forward == node || reverse == node)
+ in_depth = 1;
+
+ if (forward == reverse)
+ return;
+
+ forward = forward->next;
+ reverse = reverse->prev;
+ }
+
+ if (!in_depth)
+ quicklistCompressNode(node);
+
+ if (depth > 2) {
+ /* At this point, forward and reverse are one node beyond depth */
+ quicklistCompressNode(forward);
+ quicklistCompressNode(reverse);
+ }
+}
+
+#define quicklistCompress(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ else \
+ __quicklistCompress((_ql), (_node)); \
+ } while (0)
+
+/* If we previously used quicklistDecompressNodeForUse(), just recompress. */
+#define quicklistRecompressOnly(_ql, _node) \
+ do { \
+ if ((_node)->recompress) \
+ quicklistCompressNode((_node)); \
+ } while (0)
+
+/* Insert 'new_node' after 'old_node' if 'after' is 1.
+ * Insert 'new_node' before 'old_node' if 'after' is 0.
+ * Note: 'new_node' is *always* uncompressed, so if we assign it to
+ * head or tail, we do not need to uncompress it. */
+REDIS_STATIC void __quicklistInsertNode(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node, int after) {
+ if (after) {
+ new_node->prev = old_node;
+ if (old_node) {
+ new_node->next = old_node->next;
+ if (old_node->next)
+ old_node->next->prev = new_node;
+ old_node->next = new_node;
+ }
+ if (quicklist->tail == old_node)
+ quicklist->tail = new_node;
+ } else {
+ new_node->next = old_node;
+ if (old_node) {
+ new_node->prev = old_node->prev;
+ if (old_node->prev)
+ old_node->prev->next = new_node;
+ old_node->prev = new_node;
+ }
+ if (quicklist->head == old_node)
+ quicklist->head = new_node;
+ }
+ /* If this insert creates the only element so far, initialize head/tail. */
+ if (quicklist->len == 0) {
+ quicklist->head = quicklist->tail = new_node;
+ }
+
+ if (old_node)
+ quicklistCompress(quicklist, old_node);
+
+ quicklist->len++;
+}
+
+/* Wrappers for node inserting around existing node. */
+REDIS_STATIC void _quicklistInsertNodeBefore(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 0);
+}
+
+REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
+ quicklistNode *old_node,
+ quicklistNode *new_node) {
+ __quicklistInsertNode(quicklist, old_node, new_node, 1);
+}
+
+REDIS_STATIC int
+_quicklistNodeSizeMeetsOptimizationRequirement(const size_t sz,
+ const int fill) {
+ if (fill >= 0)
+ return 0;
+
+ size_t offset = (-fill) - 1;
+ if (offset < (sizeof(optimization_level) / sizeof(*optimization_level))) {
+ if (sz <= optimization_level[offset]) {
+ return 1;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
+#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
+
+REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
+ const int fill, const size_t sz) {
+ if (unlikely(!node))
+ return 0;
+
+ int ziplist_overhead;
+ /* size of previous offset */
+ if (sz < 254)
+ ziplist_overhead = 1;
+ else
+ ziplist_overhead = 5;
+
+ /* size of forward offset */
+ if (sz < 64)
+ ziplist_overhead += 1;
+ else if (likely(sz < 16384))
+ ziplist_overhead += 2;
+ else
+ ziplist_overhead += 5;
+
+ /* new_sz overestimates if 'sz' encodes to an integer type */
+ unsigned int new_sz = node->sz + sz + ziplist_overhead;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(new_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(new_sz))
+ return 0;
+ else if ((int)node->count < fill)
+ return 1;
+ else
+ return 0;
+}
+
+REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a,
+ const quicklistNode *b,
+ const int fill) {
+ if (!a || !b)
+ return 0;
+
+ /* approximate merged ziplist size (- 11 to remove one ziplist
+ * header/trailer) */
+ unsigned int merge_sz = a->sz + b->sz - 11;
+ if (likely(_quicklistNodeSizeMeetsOptimizationRequirement(merge_sz, fill)))
+ return 1;
+ else if (!sizeMeetsSafetyLimit(merge_sz))
+ return 0;
+ else if ((int)(a->count + b->count) <= fill)
+ return 1;
+ else
+ return 0;
+}
+
+#define quicklistNodeUpdateSz(node) \
+ do { \
+ (node)->sz = ziplistBlobLen((node)->zl); \
+ } while (0)
+
+/* Add new entry to head node of quicklist.
+ *
+ * Returns 0 if used existing head.
+ * Returns 1 if new head created. */
+int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_head = quicklist->head;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->head, quicklist->fill, sz))) {
+ quicklist->head->zl =
+ ziplistPush(quicklist->head->zl, value, sz, ZIPLIST_HEAD);
+ quicklistNodeUpdateSz(quicklist->head);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeBefore(quicklist, quicklist->head, node);
+ }
+ quicklist->count++;
+ quicklist->head->count++;
+ return (orig_head != quicklist->head);
+}
+
+/* Add new entry to tail node of quicklist.
+ *
+ * Returns 0 if used existing tail.
+ * Returns 1 if new tail created. */
+int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
+ quicklistNode *orig_tail = quicklist->tail;
+ if (likely(
+ _quicklistNodeAllowInsert(quicklist->tail, quicklist->fill, sz))) {
+ quicklist->tail->zl =
+ ziplistPush(quicklist->tail->zl, value, sz, ZIPLIST_TAIL);
+ quicklistNodeUpdateSz(quicklist->tail);
+ } else {
+ quicklistNode *node = quicklistCreateNode();
+ node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_TAIL);
+
+ quicklistNodeUpdateSz(node);
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ }
+ quicklist->count++;
+ quicklist->tail->count++;
+ return (orig_tail != quicklist->tail);
+}
+
+/* Create new node consisting of a pre-formed ziplist.
+ * Used for loading RDBs where entire ziplists have been stored
+ * to be retrieved later. */
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl) {
+ quicklistNode *node = quicklistCreateNode();
+
+ node->zl = zl;
+ node->count = ziplistLen(node->zl);
+ node->sz = ziplistBlobLen(zl);
+
+ _quicklistInsertNodeAfter(quicklist, quicklist->tail, node);
+ quicklist->count += node->count;
+}
+
+/* Append all values of ziplist 'zl' individually into 'quicklist'.
+ *
+ * This allows us to restore old RDB ziplists into new quicklists
+ * with smaller ziplist sizes than the saved RDB ziplist.
+ *
+ * Returns 'quicklist' argument. Frees passed-in ziplist 'zl' */
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl) {
+ unsigned char *value;
+ unsigned int sz;
+ long long longval;
+ char longstr[32] = {0};
+
+ unsigned char *p = ziplistIndex(zl, 0);
+ while (ziplistGet(p, &value, &sz, &longval)) {
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+ quicklistPushTail(quicklist, value, sz);
+ p = ziplistNext(zl, p);
+ }
+ zfree(zl);
+ return quicklist;
+}
+
+/* Create new (potentially multi-node) quicklist from a single existing ziplist.
+ *
+ * Returns new quicklist. Frees passed-in ziplist 'zl'. */
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl) {
+ return quicklistAppendValuesFromZiplist(quicklistNew(fill, compress), zl);
+}
+
+#define quicklistDeleteIfEmpty(ql, n) \
+ do { \
+ if ((n)->count == 0) { \
+ __quicklistDelNode((ql), (n)); \
+ (n) = NULL; \
+ } \
+ } while (0)
+
+REDIS_STATIC void __quicklistDelNode(quicklist *quicklist,
+ quicklistNode *node) {
+ if (node->next)
+ node->next->prev = node->prev;
+ if (node->prev)
+ node->prev->next = node->next;
+
+ if (node == quicklist->tail) {
+ quicklist->tail = node->prev;
+ }
+
+ if (node == quicklist->head) {
+ quicklist->head = node->next;
+ }
+
+ /* If we deleted a node within our compress depth, we
+ * now have compressed nodes needing to be decompressed. */
+ __quicklistCompress(quicklist, NULL);
+
+ quicklist->count -= node->count;
+
+ zfree(node->zl);
+ zfree(node);
+ quicklist->len--;
+}
+
+/* Delete one entry from list given the node for the entry and a pointer
+ * to the entry in the node.
+ *
+ * Note: quicklistDelIndex() *requires* uncompressed nodes because you
+ * already had to get *p from an uncompressed node somewhere.
+ *
+ * Returns 1 if the entire node was deleted, 0 if node still exists.
+ * Also updates in/out param 'p' with the next offset in the ziplist. */
+REDIS_STATIC int quicklistDelIndex(quicklist *quicklist, quicklistNode *node,
+ unsigned char **p) {
+ int gone = 0;
+
+ node->zl = ziplistDelete(node->zl, p);
+ node->count--;
+ if (node->count == 0) {
+ gone = 1;
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistNodeUpdateSz(node);
+ }
+ quicklist->count--;
+ /* If we deleted the node, the original node is no longer valid */
+ return gone ? 1 : 0;
+}
+
+/* Delete one element represented by 'entry'
+ *
+ * 'entry' stores enough metadata to delete the proper position in
+ * the correct ziplist in the correct quicklist node. */
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry) {
+ quicklistNode *prev = entry->node->prev;
+ quicklistNode *next = entry->node->next;
+ int deleted_node = quicklistDelIndex((quicklist *)entry->quicklist,
+ entry->node, &entry->zi);
+
+ /* after delete, the zi is now invalid for any future usage. */
+ iter->zi = NULL;
+
+ /* If current node is deleted, we must update iterator node and offset. */
+ if (deleted_node) {
+ if (iter->direction == AL_START_HEAD) {
+ iter->current = next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ iter->current = prev;
+ iter->offset = -1;
+ }
+ }
+ /* else if (!deleted_node), no changes needed.
+ * we already reset iter->zi above, and the existing iter->offset
+ * doesn't move again because:
+ * - [1, 2, 3] => delete offset 1 => [1, 3]: next element still offset 1
+ * - [1, 2, 3] => delete offset 0 => [2, 3]: next element still offset 0
+ * if we deleted the last element at offet N and now
+ * length of this ziplist is N-1, the next call into
+ * quicklistNext() will jump to the next node. */
+}
+
+/* Replace quicklist entry at offset 'index' by 'data' with length 'sz'.
+ *
+ * Returns 1 if replace happened.
+ * Returns 0 if replace failed and no changes happened. */
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz) {
+ quicklistEntry entry;
+ if (likely(quicklistIndex(quicklist, index, &entry))) {
+ /* quicklistIndex provides an uncompressed node */
+ entry.node->zl = ziplistDelete(entry.node->zl, &entry.zi);
+ entry.node->zl = ziplistInsert(entry.node->zl, entry.zi, data, sz);
+ quicklistNodeUpdateSz(entry.node);
+ quicklistCompress(quicklist, entry.node);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Given two nodes, try to merge their ziplists.
+ *
+ * This helps us not have a quicklist with 3 element ziplists if
+ * our fill factor can handle much higher levels.
+ *
+ * Note: 'a' must be to the LEFT of 'b'.
+ *
+ * After calling this function, both 'a' and 'b' should be considered
+ * unusable. The return value from this function must be used
+ * instead of re-using any of the quicklistNode input arguments.
+ *
+ * Returns the input node picked to merge against or NULL if
+ * merging was not possible. */
+REDIS_STATIC quicklistNode *_quicklistZiplistMerge(quicklist *quicklist,
+ quicklistNode *a,
+ quicklistNode *b) {
+ D("Requested merge (a,b) (%u, %u)", a->count, b->count);
+
+ quicklistDecompressNode(a);
+ quicklistDecompressNode(b);
+ if ((ziplistMerge(&a->zl, &b->zl))) {
+ /* We merged ziplists! Now remove the unused quicklistNode. */
+ quicklistNode *keep = NULL, *nokeep = NULL;
+ if (!a->zl) {
+ nokeep = a;
+ keep = b;
+ } else if (!b->zl) {
+ nokeep = b;
+ keep = a;
+ }
+ keep->count = ziplistLen(keep->zl);
+ quicklistNodeUpdateSz(keep);
+
+ nokeep->count = 0;
+ __quicklistDelNode(quicklist, nokeep);
+ quicklistCompress(quicklist, keep);
+ return keep;
+ } else {
+ /* else, the merge returned NULL and nothing changed. */
+ return NULL;
+ }
+}
+
+/* Attempt to merge ziplists within two nodes on either side of 'center'.
+ *
+ * We attempt to merge:
+ * - (center->prev->prev, center->prev)
+ * - (center->next, center->next->next)
+ * - (center->prev, center)
+ * - (center, center->next)
+ */
+REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
+ quicklistNode *center) {
+ int fill = quicklist->fill;
+ quicklistNode *prev, *prev_prev, *next, *next_next, *target;
+ prev = prev_prev = next = next_next = target = NULL;
+
+ if (center->prev) {
+ prev = center->prev;
+ if (center->prev->prev)
+ prev_prev = center->prev->prev;
+ }
+
+ if (center->next) {
+ next = center->next;
+ if (center->next->next)
+ next_next = center->next->next;
+ }
+
+ /* Try to merge prev_prev and prev */
+ if (_quicklistNodeAllowMerge(prev, prev_prev, fill)) {
+ _quicklistZiplistMerge(quicklist, prev_prev, prev);
+ prev_prev = prev = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge next and next_next */
+ if (_quicklistNodeAllowMerge(next, next_next, fill)) {
+ _quicklistZiplistMerge(quicklist, next, next_next);
+ next = next_next = NULL; /* they could have moved, invalidate them. */
+ }
+
+ /* Try to merge center node and previous node */
+ if (_quicklistNodeAllowMerge(center, center->prev, fill)) {
+ target = _quicklistZiplistMerge(quicklist, center->prev, center);
+ center = NULL; /* center could have been deleted, invalidate it. */
+ } else {
+ /* else, we didn't merge here, but target needs to be valid below. */
+ target = center;
+ }
+
+ /* Use result of center merge (or original) to merge with next node. */
+ if (_quicklistNodeAllowMerge(target, target->next, fill)) {
+ _quicklistZiplistMerge(quicklist, target, target->next);
+ }
+}
+
+/* Split 'node' into two parts, parameterized by 'offset' and 'after'.
+ *
+ * The 'after' argument controls which quicklistNode gets returned.
+ * If 'after'==1, returned node has elements after 'offset'.
+ * input node keeps elements up to 'offset', including 'offset'.
+ * If 'after'==0, returned node has elements up to 'offset', including 'offset'.
+ * input node keeps elements after 'offset'.
+ *
+ * If 'after'==1, returned node will have elements _after_ 'offset'.
+ * The returned node will have elements [OFFSET+1, END].
+ * The input node keeps elements [0, OFFSET].
+ *
+ * If 'after'==0, returned node will keep elements up to and including 'offset'.
+ * The returned node will have elements [0, OFFSET].
+ * The input node keeps elements [OFFSET+1, END].
+ *
+ * The input node keeps all elements not taken by the returned node.
+ *
+ * Returns newly created node or NULL if split not possible. */
+REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset,
+ int after) {
+ size_t zl_sz = node->sz;
+
+ quicklistNode *new_node = quicklistCreateNode();
+ new_node->zl = zmalloc(zl_sz);
+
+ /* Copy original ziplist so we can split it */
+ memcpy(new_node->zl, node->zl, zl_sz);
+
+ /* -1 here means "continue deleting until the list ends" */
+ int orig_start = after ? offset + 1 : 0;
+ int orig_extent = after ? -1 : offset;
+ int new_start = after ? 0 : offset;
+ int new_extent = after ? offset + 1 : -1;
+
+ D("After %d (%d); ranges: [%d, %d], [%d, %d]", after, offset, orig_start,
+ orig_extent, new_start, new_extent);
+
+ node->zl = ziplistDeleteRange(node->zl, orig_start, orig_extent);
+ node->count = ziplistLen(node->zl);
+ quicklistNodeUpdateSz(node);
+
+ new_node->zl = ziplistDeleteRange(new_node->zl, new_start, new_extent);
+ new_node->count = ziplistLen(new_node->zl);
+ quicklistNodeUpdateSz(new_node);
+
+ D("After split lengths: orig (%d), new (%d)", node->count, new_node->count);
+ return new_node;
+}
+
+/* Insert a new entry before or after existing entry 'entry'.
+ *
+ * If after==1, the new value is inserted after 'entry', otherwise
+ * the new value is inserted before 'entry'. */
+REDIS_STATIC void _quicklistInsert(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz, int after) {
+ int full = 0, at_tail = 0, at_head = 0, full_next = 0, full_prev = 0;
+ int fill = quicklist->fill;
+ quicklistNode *node = entry->node;
+ quicklistNode *new_node = NULL;
+
+ if (!node) {
+ /* we have no reference node, so let's create only node in the list */
+ D("No node given!");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ __quicklistInsertNode(quicklist, NULL, new_node, after);
+ new_node->count++;
+ quicklist->count++;
+ return;
+ }
+
+ /* Populate accounting flags for easier boolean checks later */
+ if (!_quicklistNodeAllowInsert(node, fill, sz)) {
+ D("Current node is full with count %d with requested fill %lu",
+ node->count, fill);
+ full = 1;
+ }
+
+ if (after && (entry->offset == node->count)) {
+ D("At Tail of current ziplist");
+ at_tail = 1;
+ if (!_quicklistNodeAllowInsert(node->next, fill, sz)) {
+ D("Next node is full too.");
+ full_next = 1;
+ }
+ }
+
+ if (!after && (entry->offset == 0)) {
+ D("At Head");
+ at_head = 1;
+ if (!_quicklistNodeAllowInsert(node->prev, fill, sz)) {
+ D("Prev node is full too.");
+ full_prev = 1;
+ }
+ }
+
+ /* Now determine where and how to insert the new element */
+ if (!full && after) {
+ D("Not full, inserting after current position.");
+ quicklistDecompressNodeForUse(node);
+ unsigned char *next = ziplistNext(node->zl, entry->zi);
+ if (next == NULL) {
+ node->zl = ziplistPush(node->zl, value, sz, ZIPLIST_TAIL);
+ } else {
+ node->zl = ziplistInsert(node->zl, next, value, sz);
+ }
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (!full && !after) {
+ D("Not full, inserting before current position.");
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistInsert(node->zl, entry->zi, value, sz);
+ node->count++;
+ quicklistNodeUpdateSz(node);
+ quicklistRecompressOnly(quicklist, node);
+ } else if (full && at_tail && node->next && !full_next && after) {
+ /* If we are: at tail, next has free space, and inserting after:
+ * - insert entry at head of next node. */
+ D("Full and tail, but next isn't full; inserting next node head");
+ new_node = node->next;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && at_head && node->prev && !full_prev && !after) {
+ /* If we are: at head, previous has free space, and inserting before:
+ * - insert entry at tail of previous node. */
+ D("Full and head, but prev isn't full, inserting prev node tail");
+ new_node = node->prev;
+ quicklistDecompressNodeForUse(new_node);
+ new_node->zl = ziplistPush(new_node->zl, value, sz, ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ quicklistRecompressOnly(quicklist, new_node);
+ } else if (full && ((at_tail && node->next && full_next && after) ||
+ (at_head && node->prev && full_prev && !after))) {
+ /* If we are: full, and our prev/next is full, then:
+ * - create new node and attach to quicklist */
+ D("\tprovisioning new node...");
+ new_node = quicklistCreateNode();
+ new_node->zl = ziplistPush(ziplistNew(), value, sz, ZIPLIST_HEAD);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ } else if (full) {
+ /* else, node is full we need to split it. */
+ /* covers both after and !after cases */
+ D("\tsplitting node...");
+ quicklistDecompressNodeForUse(node);
+ new_node = _quicklistSplitNode(node, entry->offset, after);
+ new_node->zl = ziplistPush(new_node->zl, value, sz,
+ after ? ZIPLIST_HEAD : ZIPLIST_TAIL);
+ new_node->count++;
+ quicklistNodeUpdateSz(new_node);
+ __quicklistInsertNode(quicklist, node, new_node, after);
+ _quicklistMergeNodes(quicklist, node);
+ }
+
+ quicklist->count++;
+}
+
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 0);
+}
+
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *entry,
+ void *value, const size_t sz) {
+ _quicklistInsert(quicklist, entry, value, sz, 1);
+}
+
+/* Delete a range of elements from the quicklist.
+ *
+ * elements may span across multiple quicklistNodes, so we
+ * have to be careful about tracking where we start and end.
+ *
+ * Returns 1 if entries were deleted, 0 if nothing was deleted. */
+int quicklistDelRange(quicklist *quicklist, const long start,
+ const long count) {
+ if (count <= 0)
+ return 0;
+
+ unsigned long extent = count; /* range is inclusive of start position */
+
+ if (start >= 0 && extent > (quicklist->count - start)) {
+ /* if requesting delete more elements than exist, limit to list size. */
+ extent = quicklist->count - start;
+ } else if (start < 0 && extent > (unsigned long)(-start)) {
+ /* else, if at negative offset, limit max size to rest of list. */
+ extent = -start; /* c.f. LREM -29 29; just delete until end. */
+ }
+
+ quicklistEntry entry;
+ if (!quicklistIndex(quicklist, start, &entry))
+ return 0;
+
+ D("Quicklist delete request for start %ld, count %ld, extent: %ld", start,
+ count, extent);
+ quicklistNode *node = entry.node;
+
+ /* iterate over next nodes until everything is deleted. */
+ while (extent) {
+ quicklistNode *next = node->next;
+
+ unsigned long del;
+ int delete_entire_node = 0;
+ if (entry.offset == 0 && extent >= node->count) {
+ /* If we are deleting more than the count of this node, we
+ * can just delete the entire node without ziplist math. */
+ delete_entire_node = 1;
+ del = node->count;
+ } else if (entry.offset >= 0 && extent >= node->count) {
+ /* If deleting more nodes after this one, calculate delete based
+ * on size of current node. */
+ del = node->count - entry.offset;
+ } else if (entry.offset < 0) {
+ /* If offset is negative, we are in the first run of this loop
+ * and we are deleting the entire range
+ * from this start offset to end of list. Since the Negative
+ * offset is the number of elements until the tail of the list,
+ * just use it directly as the deletion count. */
+ del = -entry.offset;
+
+ /* If the positive offset is greater than the remaining extent,
+ * we only delete the remaining extent, not the entire offset.
+ */
+ if (del > extent)
+ del = extent;
+ } else {
+ /* else, we are deleting less than the extent of this node, so
+ * use extent directly. */
+ del = extent;
+ }
+
+ D("[%ld]: asking to del: %ld because offset: %d; (ENTIRE NODE: %d), "
+ "node count: %u",
+ extent, del, entry.offset, delete_entire_node, node->count);
+
+ if (delete_entire_node) {
+ __quicklistDelNode(quicklist, node);
+ } else {
+ quicklistDecompressNodeForUse(node);
+ node->zl = ziplistDeleteRange(node->zl, entry.offset, del);
+ quicklistNodeUpdateSz(node);
+ node->count -= del;
+ quicklist->count -= del;
+ quicklistDeleteIfEmpty(quicklist, node);
+ if (node)
+ quicklistRecompressOnly(quicklist, node);
+ }
+
+ extent -= del;
+
+ node = next;
+
+ entry.offset = 0;
+ }
+ return 1;
+}
+
+/* Passthrough to ziplistCompare() */
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len) {
+ return ziplistCompare(p1, p2, p2_len);
+}
+
+/* Returns a quicklist iterator 'iter'. After the initialization every
+ * call to quicklistNext() will return the next element of the quicklist. */
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction) {
+ quicklistIter *iter;
+
+ iter = zmalloc(sizeof(*iter));
+
+ if (direction == AL_START_HEAD) {
+ iter->current = quicklist->head;
+ iter->offset = 0;
+ } else if (direction == AL_START_TAIL) {
+ iter->current = quicklist->tail;
+ iter->offset = -1;
+ }
+
+ iter->direction = direction;
+ iter->quicklist = quicklist;
+
+ iter->zi = NULL;
+
+ return iter;
+}
+
+/* Initialize an iterator at a specific offset 'idx' and make the iterator
+ * return nodes in 'direction' direction. */
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ const int direction,
+ const long long idx) {
+ quicklistEntry entry;
+
+ if (quicklistIndex(quicklist, idx, &entry)) {
+ quicklistIter *base = quicklistGetIterator(quicklist, direction);
+ base->zi = NULL;
+ base->current = entry.node;
+ base->offset = entry.offset;
+ return base;
+ } else {
+ return NULL;
+ }
+}
+
+/* Release iterator.
+ * If we still have a valid current node, then re-encode current node. */
+void quicklistReleaseIterator(quicklistIter *iter) {
+ if (iter->current)
+ quicklistCompress(iter->quicklist, iter->current);
+
+ zfree(iter);
+}
+
+/* Get next element in iterator.
+ *
+ * Note: You must NOT insert into the list while iterating over it.
+ * You *may* delete from the list while iterating using the
+ * quicklistDelEntry() function.
+ * If you insert into the quicklist while iterating, you should
+ * re-create the iterator after your addition.
+ *
+ * iter = quicklistGetIterator(quicklist,<direction>);
+ * quicklistEntry entry;
+ * while (quicklistNext(iter, &entry)) {
+ * if (entry.value)
+ * [[ use entry.value with entry.sz ]]
+ * else
+ * [[ use entry.longval ]]
+ * }
+ *
+ * Populates 'entry' with values for this iteration.
+ * Returns 0 when iteration is complete or if iteration not possible.
+ * If return value is 0, the contents of 'entry' are not valid.
+ */
+int quicklistNext(quicklistIter *iter, quicklistEntry *entry) {
+ initEntry(entry);
+
+ if (!iter) {
+ D("Returning because no iter!");
+ return 0;
+ }
+
+ entry->quicklist = iter->quicklist;
+ entry->node = iter->current;
+
+ if (!iter->current) {
+ D("Returning because current node is NULL")
+ return 0;
+ }
+
+ unsigned char *(*nextFn)(unsigned char *, unsigned char *) = NULL;
+ int offset_update = 0;
+
+ if (!iter->zi) {
+ /* If !zi, use current index. */
+ quicklistDecompressNodeForUse(iter->current);
+ iter->zi = ziplistIndex(iter->current->zl, iter->offset);
+ } else {
+ /* else, use existing iterator offset and get prev/next as necessary. */
+ if (iter->direction == AL_START_HEAD) {
+ nextFn = ziplistNext;
+ offset_update = 1;
+ } else if (iter->direction == AL_START_TAIL) {
+ nextFn = ziplistPrev;
+ offset_update = -1;
+ }
+ iter->zi = nextFn(iter->current->zl, iter->zi);
+ iter->offset += offset_update;
+ }
+
+ entry->zi = iter->zi;
+ entry->offset = iter->offset;
+
+ if (iter->zi) {
+ /* Populate value from existing ziplist position */
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ return 1;
+ } else {
+ /* We ran out of ziplist entries.
+ * Pick next node, update offset, then re-run retrieval. */
+ quicklistCompress(iter->quicklist, iter->current);
+ if (iter->direction == AL_START_HEAD) {
+ /* Forward traversal */
+ D("Jumping to start of next node");
+ iter->current = iter->current->next;
+ iter->offset = 0;
+ } else if (iter->direction == AL_START_TAIL) {
+ /* Reverse traversal */
+ D("Jumping to end of previous node");
+ iter->current = iter->current->prev;
+ iter->offset = -1;
+ }
+ iter->zi = NULL;
+ return quicklistNext(iter, entry);
+ }
+}
+
+/* Duplicate the quicklist.
+ * On success a copy of the original quicklist is returned.
+ *
+ * The original quicklist both on success or error is never modified.
+ *
+ * Returns newly allocated quicklist. */
+quicklist *quicklistDup(quicklist *orig) {
+ quicklist *copy;
+
+ copy = quicklistNew(orig->fill, orig->compress);
+
+ for (quicklistNode *current = orig->head; current;
+ current = current->next) {
+ quicklistNode *node = quicklistCreateNode();
+
+ if (current->encoding == QUICKLIST_NODE_ENCODING_LZF) {
+ quicklistLZF *lzf = (quicklistLZF *)current->zl;
+ size_t lzf_sz = sizeof(*lzf) + lzf->sz;
+ node->zl = zmalloc(lzf_sz);
+ memcpy(node->zl, current->zl, lzf_sz);
+ } else if (current->encoding == QUICKLIST_NODE_ENCODING_RAW) {
+ node->zl = zmalloc(current->sz);
+ memcpy(node->zl, current->zl, current->sz);
+ }
+
+ node->count = current->count;
+ copy->count += node->count;
+ node->sz = current->sz;
+ node->encoding = current->encoding;
+
+ _quicklistInsertNodeAfter(copy, copy->tail, node);
+ }
+
+ /* copy->count must equal orig->count here */
+ return copy;
+}
+
+/* Populate 'entry' with the element at the specified zero-based index
+ * where 0 is the head, 1 is the element next to head
+ * and so on. Negative integers are used in order to count
+ * from the tail, -1 is the last element, -2 the penultimate
+ * and so on. If the index is out of range 0 is returned.
+ *
+ * Returns 1 if element found
+ * Returns 0 if element not found */
+int quicklistIndex(const quicklist *quicklist, const long long idx,
+ quicklistEntry *entry) {
+ quicklistNode *n;
+ unsigned long long accum = 0;
+ unsigned long long index;
+ int forward = idx < 0 ? 0 : 1; /* < 0 -> reverse, 0+ -> forward */
+
+ initEntry(entry);
+ entry->quicklist = quicklist;
+
+ if (!forward) {
+ index = (-idx) - 1;
+ n = quicklist->tail;
+ } else {
+ index = idx;
+ n = quicklist->head;
+ }
+
+ if (index >= quicklist->count)
+ return 0;
+
+ while (likely(n)) {
+ if ((accum + n->count) > index) {
+ break;
+ } else {
+ D("Skipping over (%p) %u at accum %lld", (void *)n, n->count,
+ accum);
+ accum += n->count;
+ n = forward ? n->next : n->prev;
+ }
+ }
+
+ if (!n)
+ return 0;
+
+ D("Found node: %p at accum %llu, idx %llu, sub+ %llu, sub- %llu", (void *)n,
+ accum, index, index - accum, (-index) - 1 + accum);
+
+ entry->node = n;
+ if (forward) {
+ /* forward = normal head-to-tail offset. */
+ entry->offset = index - accum;
+ } else {
+ /* reverse = need negative offset for tail-to-head, so undo
+ * the result of the original if (index < 0) above. */
+ entry->offset = (-index) - 1 + accum;
+ }
+
+ quicklistDecompressNodeForUse(entry->node);
+ entry->zi = ziplistIndex(entry->node->zl, entry->offset);
+ ziplistGet(entry->zi, &entry->value, &entry->sz, &entry->longval);
+ /* The caller will use our result, so we don't re-compress here.
+ * The caller can recompress or delete the node as needed. */
+ return 1;
+}
+
+/* Rotate quicklist by moving the tail element to the head. */
+void quicklistRotate(quicklist *quicklist) {
+ if (quicklist->count <= 1)
+ return;
+
+ /* First, get the tail entry */
+ unsigned char *p = ziplistIndex(quicklist->tail->zl, -1);
+ unsigned char *value;
+ long long longval;
+ unsigned int sz;
+ char longstr[32] = {0};
+ ziplistGet(p, &value, &sz, &longval);
+
+ /* If value found is NULL, then ziplistGet populated longval instead */
+ if (!value) {
+ /* Write the longval as a string so we can re-add it */
+ sz = ll2string(longstr, sizeof(longstr), longval);
+ value = (unsigned char *)longstr;
+ }
+
+ /* Add tail entry to head (must happen before tail is deleted). */
+ quicklistPushHead(quicklist, value, sz);
+
+ /* If quicklist has only one node, the head ziplist is also the
+ * tail ziplist and PushHead() could have reallocated our single ziplist,
+ * which would make our pre-existing 'p' unusable. */
+ if (quicklist->len == 1) {
+ p = ziplistIndex(quicklist->tail->zl, -1);
+ }
+
+ /* Remove tail entry. */
+ quicklistDelIndex(quicklist, quicklist->tail, &p);
+}
+
+/* pop from quicklist and return result in 'data' ptr. Value of 'data'
+ * is the return value of 'saver' function pointer if the data is NOT a number.
+ *
+ * If the quicklist element is a long long, then the return value is returned in
+ * 'sval'.
+ *
+ * Return value of 0 means no elements available.
+ * Return value of 1 means check 'data' and 'sval' for values.
+ * If 'data' is set, use 'data' and 'sz'. Otherwise, use 'sval'. */
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz)) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ int pos = (where == QUICKLIST_HEAD) ? 0 : -1;
+
+ if (quicklist->count == 0)
+ return 0;
+
+ if (data)
+ *data = NULL;
+ if (sz)
+ *sz = 0;
+ if (sval)
+ *sval = -123456789;
+
+ quicklistNode *node;
+ if (where == QUICKLIST_HEAD && quicklist->head) {
+ node = quicklist->head;
+ } else if (where == QUICKLIST_TAIL && quicklist->tail) {
+ node = quicklist->tail;
+ } else {
+ return 0;
+ }
+
+ p = ziplistIndex(node->zl, pos);
+ if (ziplistGet(p, &vstr, &vlen, &vlong)) {
+ if (vstr) {
+ if (data)
+ *data = saver(vstr, vlen);
+ if (sz)
+ *sz = vlen;
+ } else {
+ if (data)
+ *data = NULL;
+ if (sval)
+ *sval = vlong;
+ }
+ quicklistDelIndex(quicklist, node, &p);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return a malloc'd copy of data passed in */
+REDIS_STATIC void *_quicklistSaver(unsigned char *data, unsigned int sz) {
+ unsigned char *vstr;
+ if (data) {
+ vstr = zmalloc(sz);
+ memcpy(vstr, data, sz);
+ return vstr;
+ }
+ return NULL;
+}
+
+/* Default pop function
+ *
+ * Returns malloc'd value from quicklist */
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ if (quicklist->count == 0)
+ return 0;
+ int ret = quicklistPopCustom(quicklist, where, &vstr, &vlen, &vlong,
+ _quicklistSaver);
+ if (data)
+ *data = vstr;
+ if (slong)
+ *slong = vlong;
+ if (sz)
+ *sz = vlen;
+ return ret;
+}
+
+/* Wrapper to allow argument-based switching between HEAD/TAIL pop */
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where) {
+ if (where == QUICKLIST_HEAD) {
+ quicklistPushHead(quicklist, value, sz);
+ } else if (where == QUICKLIST_TAIL) {
+ quicklistPushTail(quicklist, value, sz);
+ }
+}
+
+/* The rest of this file is test cases and test helpers. */
+#ifdef REDIS_TEST
+#include <stdint.h>
+#include <sys/time.h>
+
+#define assert(_e) \
+ do { \
+ if (!(_e)) { \
+ printf("\n\n=== ASSERTION FAILED ===\n"); \
+ printf("==> %s:%d '%s' is not true\n", __FILE__, __LINE__, #_e); \
+ err++; \
+ } \
+ } while (0)
+
+#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
+
+#define OK printf("\tOK\n")
+
+#define ERROR \
+ do { \
+ printf("\tERROR!\n"); \
+ err++; \
+ } while (0)
+
+#define ERR(x, ...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf("ERROR! " x "\n", __VA_ARGS__); \
+ err++; \
+ } while (0)
+
+#define TEST(name) printf("test — %s\n", name);
+#define TEST_DESC(name, ...) printf("test — " name "\n", __VA_ARGS__);
+
+#define QL_TEST_VERBOSE 0
+
+#define UNUSED(x) (void)(x)
+static void ql_info(quicklist *ql) {
+#if QL_TEST_VERBOSE
+ printf("Container length: %lu\n", ql->len);
+ printf("Container size: %lu\n", ql->count);
+ if (ql->head)
+ printf("\t(zsize head: %d)\n", ziplistLen(ql->head->zl));
+ if (ql->tail)
+ printf("\t(zsize tail: %d)\n", ziplistLen(ql->tail->zl));
+ printf("\n");
+#else
+ UNUSED(ql);
+#endif
+}
+
+/* Return the UNIX time in microseconds */
+static long long ustime(void) {
+ struct timeval tv;
+ long long ust;
+
+ gettimeofday(&tv, NULL);
+ ust = ((long long)tv.tv_sec) * 1000000;
+ ust += tv.tv_usec;
+ return ust;
+}
+
+/* Return the UNIX time in milliseconds */
+static long long mstime(void) { return ustime() / 1000; }
+
+/* Iterate over an entire quicklist.
+ * Print the list if 'print' == 1.
+ *
+ * Returns physical count of elements found by iterating over the list. */
+static int _itrprintr(quicklist *ql, int print, int forward) {
+ quicklistIter *iter =
+ quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ int p = 0;
+ quicklistNode *prev = NULL;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.node != prev) {
+ /* Count the number of list nodes too */
+ p++;
+ prev = entry.node;
+ }
+ if (print) {
+ printf("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, entry.sz,
+ (char *)entry.value, entry.longval);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ return i;
+}
+static int itrprintr(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 1);
+}
+
+static int itrprintr_rev(quicklist *ql, int print) {
+ return _itrprintr(ql, print, 0);
+}
+
+#define ql_verify(a, b, c, d, e) \
+ do { \
+ err += _ql_verify((a), (b), (c), (d), (e)); \
+ } while (0)
+
+/* Verify list metadata matches physical list contents. */
+static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count,
+ uint32_t head_count, uint32_t tail_count) {
+ int errors = 0;
+
+ ql_info(ql);
+ if (len != ql->len) {
+ yell("quicklist length wrong: expected %d, got %u", len, ql->len);
+ errors++;
+ }
+
+ if (count != ql->count) {
+ yell("quicklist count wrong: expected %d, got %lu", count, ql->count);
+ errors++;
+ }
+
+ int loopr = itrprintr(ql, 0);
+ if (loopr != (int)ql->count) {
+ yell("quicklist cached count not match actual count: expected %lu, got "
+ "%d",
+ ql->count, loopr);
+ errors++;
+ }
+
+ int rloopr = itrprintr_rev(ql, 0);
+ if (loopr != rloopr) {
+ yell("quicklist has different forward count than reverse count! "
+ "Forward count is %d, reverse count is %d.",
+ loopr, rloopr);
+ errors++;
+ }
+
+ if (ql->len == 0 && !errors) {
+ OK;
+ return errors;
+ }
+
+ if (ql->head && head_count != ql->head->count &&
+ head_count != ziplistLen(ql->head->zl)) {
+ yell("quicklist head count wrong: expected %d, "
+ "got cached %d vs. actual %d",
+ head_count, ql->head->count, ziplistLen(ql->head->zl));
+ errors++;
+ }
+
+ if (ql->tail && tail_count != ql->tail->count &&
+ tail_count != ziplistLen(ql->tail->zl)) {
+ yell("quicklist tail count wrong: expected %d, "
+ "got cached %u vs. actual %d",
+ tail_count, ql->tail->count, ziplistLen(ql->tail->zl));
+ errors++;
+ }
+
+ if (quicklistAllowsCompression(ql)) {
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len; at++, node = node->next) {
+ if (node && (at < low_raw || at >= high_raw)) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ yell("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress);
+ errors++;
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF &&
+ !node->attempted_compress) {
+ yell("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; recompress: %d; attempted: %d)",
+ at, ql->compress, low_raw, high_raw, ql->len, node->sz,
+ node->recompress, node->attempted_compress);
+ errors++;
+ }
+ }
+ }
+ }
+
+ if (!errors)
+ OK;
+ return errors;
+}
+
+/* Generate new string concatenating integer i against string 'prefix' */
+static char *genstr(char *prefix, int i) {
+ static char result[64] = {0};
+ snprintf(result, sizeof(result), "%s%d", prefix, i);
+ return result;
+}
+
+/* main test, but callable from other files */
+int quicklistTest(int argc, char *argv[]) {
+ UNUSED(argc);
+ UNUSED(argv);
+
+ unsigned int err = 0;
+ int optimize_start =
+ -(int)(sizeof(optimization_level) / sizeof(*optimization_level));
+
+ printf("Starting optimization offset at: %d\n", optimize_start);
+
+ int options[] = {0, 1, 2, 3, 4, 5, 6, 10};
+ size_t option_count = sizeof(options) / sizeof(*options);
+ long long runtime[option_count];
+
+ for (int _i = 0; _i < (int)option_count; _i++) {
+ printf("Testing Option %d\n", options[_i]);
+ long long start = mstime();
+
+ TEST("create list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to tail of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("add to head of empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to tail 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST_DESC("add to head 5x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 5; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 5)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 1, 5, 5, 5);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to tail 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 64);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("add to head 500x at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ if (ql->count != 500)
+ ERROR;
+ if (f == 32)
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("rotate empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistRotate(ql);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 32; f++) {
+ TEST("rotate one val once") {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistRotate(ql);
+ /* Ignore compression verify because ziplist is
+ * too small to compress. */
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 3; f++) {
+ TEST_DESC("rotate 500 val 5000 times at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushHead(ql, "900", 3);
+ quicklistPushHead(ql, "7000", 4);
+ quicklistPushHead(ql, "-1200", 5);
+ quicklistPushHead(ql, "42", 2);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 64);
+ ql_info(ql);
+ for (int i = 0; i < 5000; i++) {
+ ql_info(ql);
+ quicklistRotate(ql);
+ }
+ if (f == 1)
+ ql_verify(ql, 504, 504, 1, 1);
+ else if (f == 2)
+ ql_verify(ql, 252, 504, 2, 2);
+ else if (f == 32)
+ ql_verify(ql, 16, 504, 32, 24);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("pop empty") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop 1 string from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ char *populate = genstr("hello", 331);
+ quicklistPushHead(ql, populate, 32);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(populate, (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)", sz,
+ data, populate);
+ zfree(data);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 1 number from 1") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "55513", 5);
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ ql_info(ql);
+ quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(data == NULL);
+ assert(lv == 55513);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 500 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_info(ql);
+ for (int i = 0; i < 500; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)",
+ sz, data, genstr("hello", 499 - i));
+ zfree(data);
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("pop head 5000 from 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 5000; i++) {
+ unsigned char *data;
+ unsigned int sz;
+ long long lv;
+ int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
+ if (i < 500) {
+ assert(ret == 1);
+ assert(data != NULL);
+ assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value "
+ "(%s)",
+ sz, data, genstr("hello", 499 - i));
+ zfree(data);
+ } else {
+ assert(ret == 0);
+ }
+ }
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate forward over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 499, count = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i--;
+ count++;
+ }
+ if (count != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("iterate reverse over 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ char *h = genstr("hello", i);
+ if (strcmp((char *)entry.value, h))
+ ERR("value [%s] didn't match [%s] at position %d",
+ entry.value, h, i);
+ i++;
+ }
+ if (i != 500)
+ ERR("Didn't iterate over exactly 500 elements (%d)", i);
+ ql_verify(ql, 16, 500, 20, 32);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertBefore(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after with 0 elements") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert after 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ TEST("insert before 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, "hello", 6);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ quicklistInsertAfter(ql, &entry, "abc", 4);
+ ql_verify(ql, 1, 2, 2, 2);
+ quicklistRelease(ql);
+ }
+
+ for (int f = optimize_start; f < 12; f++) {
+ TEST_DESC("insert once in elements while iterating at fill %d at "
+ "compress %d\n",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistSetFill(ql, 1);
+ quicklistPushTail(ql, "def", 3); /* force to unique node */
+ quicklistSetFill(ql, f);
+ quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */
+ quicklistPushTail(ql, "foo", 3);
+ quicklistPushTail(ql, "zoo", 3);
+
+ itrprintr(ql, 0);
+ /* insert "bar" before "bob" while iterating over list. */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ while (quicklistNext(iter, &entry)) {
+ if (!strncmp((char *)entry.value, "bob", 3)) {
+ /* Insert as fill = 1 so it spills into new node. */
+ quicklistInsertBefore(ql, &entry, "bar", 3);
+ break; /* didn't we fix insert-while-iterating? */
+ }
+ }
+ itrprintr(ql, 0);
+
+ /* verify results */
+ quicklistIndex(ql, 0, &entry);
+ if (strncmp((char *)entry.value, "abc", 3))
+ ERR("Value 0 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 1, &entry);
+ if (strncmp((char *)entry.value, "def", 3))
+ ERR("Value 1 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 2, &entry);
+ if (strncmp((char *)entry.value, "bar", 3))
+ ERR("Value 2 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 3, &entry);
+ if (strncmp((char *)entry.value, "bob", 3))
+ ERR("Value 3 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 4, &entry);
+ if (strncmp((char *)entry.value, "foo", 3))
+ ERR("Value 4 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistIndex(ql, 5, &entry);
+ if (strncmp((char *)entry.value, "zoo", 3))
+ ERR("Value 5 didn't match, instead got: %.*s", entry.sz,
+ entry.value);
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC(
+ "insert [before] 250 new in middle of 500 elements at fill"
+ " %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertBefore(ql, &entry, genstr("abc", i), 32);
+ }
+ if (f == 32)
+ ql_verify(ql, 25, 750, 32, 20);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 1024; f++) {
+ TEST_DESC("insert [after] 250 new in middle of 500 elements at "
+ "fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ for (int i = 0; i < 250; i++) {
+ quicklistEntry entry;
+ quicklistIndex(ql, 250, &entry);
+ quicklistInsertAfter(ql, &entry, genstr("abc", i), 32);
+ }
+
+ if (ql->count != 750)
+ ERR("List size not 750, but rather %ld", ql->count);
+
+ if (f == 32)
+ ql_verify(ql, 26, 750, 20, 32);
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("duplicate empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 1 element") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushHead(ql, genstr("hello", 3), 32);
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 1, 1, 1, 1);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ TEST("duplicate list of 500") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 16, 500, 20, 32);
+
+ quicklist *copy = quicklistDup(ql);
+ ql_verify(copy, 16, 500, 20, 32);
+ quicklistRelease(ql);
+ quicklistRelease(copy);
+ }
+
+ for (int f = optimize_start; f < 512; f++) {
+ TEST_DESC("index 1,200 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, 1, &entry);
+ if (!strcmp((char *)entry.value, "hello2"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, 200, &entry);
+ if (!strcmp((char *)entry.value, "hello201"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -1,-2 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -1, &entry);
+ if (!strcmp((char *)entry.value, "hello500"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistIndex(ql, -2, &entry);
+ if (!strcmp((char *)entry.value, "hello499"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index -100 from 500 list at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ quicklistIndex(ql, -100, &entry);
+ if (!strcmp((char *)entry.value, "hello401"))
+ OK;
+ else
+ ERR("Value: %s", entry.value);
+ quicklistRelease(ql);
+ }
+
+ TEST_DESC("index too big +1 from 50 list at fill %d at compress %d",
+ f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistEntry entry;
+ if (quicklistIndex(ql, 50, &entry))
+ ERR("Index found at 50 with 50 list: %.*s", entry.sz,
+ entry.value);
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ TEST("delete range empty list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistDelRange(ql, 5, 20);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node in list of one node") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 32);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete range of entire node with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ for (int i = 0; i < 32; i++)
+ quicklistPushHead(ql, genstr("hello", i), 32);
+ ql_verify(ql, 1, 32, 32, 32);
+ quicklistDelRange(ql, 0, 128);
+ ql_verify(ql, 0, 0, 0, 0);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete middle 100 of 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, 200, 100);
+ ql_verify(ql, 14, 400, 32, 20);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 1);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 1 from 500 list with overflow counts") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 16, 500, 32, 20);
+ quicklistDelRange(ql, -1, 128);
+ ql_verify(ql, 16, 499, 32, 19);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete negative 100 from 500 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 500; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ quicklistDelRange(ql, -100, 100);
+ ql_verify(ql, 13, 400, 32, 16);
+ quicklistRelease(ql);
+ }
+
+ TEST("delete -10 count 5 from 50 list") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ for (int i = 0; i < 50; i++)
+ quicklistPushTail(ql, genstr("hello", i + 1), 32);
+ ql_verify(ql, 2, 50, 32, 18);
+ quicklistDelRange(ql, -10, 5);
+ ql_verify(ql, 2, 45, 32, 13);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers only list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "1111", 4);
+ quicklistPushTail(ql, "2222", 4);
+ quicklistPushTail(ql, "3333", 4);
+ quicklistPushTail(ql, "4444", 4);
+ ql_verify(ql, 1, 4, 4, 4);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111, %lld", entry.longval);
+ quicklistIndex(ql, 1, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222, %lld", entry.longval);
+ quicklistIndex(ql, 2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333, %lld", entry.longval);
+ quicklistIndex(ql, 3, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444, %lld", entry.longval);
+ if (quicklistIndex(ql, 4, &entry))
+ ERR("Index past elements: %lld", entry.longval);
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 4444)
+ ERR("Not 4444 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -2, &entry);
+ if (entry.longval != 3333)
+ ERR("Not 3333 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -3, &entry);
+ if (entry.longval != 2222)
+ ERR("Not 2222 (reverse), %lld", entry.longval);
+ quicklistIndex(ql, -4, &entry);
+ if (entry.longval != 1111)
+ ERR("Not 1111 (reverse), %lld", entry.longval);
+ if (quicklistIndex(ql, -5, &entry))
+ ERR("Index past elements (reverse), %lld", entry.longval);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistSetFill(ql, 32);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 5000; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistEntry entry;
+ for (int i = 0; i < 5000; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[i])
+ ERR("[%d] Not longval %lld but rather %lld", i, nums[i],
+ entry.longval);
+ entry.longval = 0xdeadbeef;
+ }
+ quicklistIndex(ql, 5000, &entry);
+ if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20))
+ ERR("String val not match: %s", entry.value);
+ ql_verify(ql, 157, 5001, 32, 9);
+ quicklistRelease(ql);
+ }
+
+ TEST("numbers larger list read B") {
+ quicklist *ql = quicklistNew(-2, options[_i]);
+ quicklistPushTail(ql, "99", 2);
+ quicklistPushTail(ql, "98", 2);
+ quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20);
+ quicklistPushTail(ql, "96", 2);
+ quicklistPushTail(ql, "95", 2);
+ quicklistReplaceAtIndex(ql, 1, "foo", 3);
+ quicklistReplaceAtIndex(ql, -1, "bar", 3);
+ quicklistRelease(ql);
+ OK;
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("lrem test at fill %d at compress %d", f, options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char *words[] = {"abc", "foo", "bar", "foobar", "foobared",
+ "zap", "bar", "test", "foo"};
+ char *result[] = {"abc", "foo", "foobar", "foobared",
+ "zap", "test", "foo"};
+ char *resultB[] = {"abc", "foo", "foobar",
+ "foobared", "zap", "test"};
+ for (int i = 0; i < 9; i++)
+ quicklistPushTail(ql, words[i], strlen(words[i]));
+
+ /* lrem 0 bar */
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD);
+ quicklistEntry entry;
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"bar", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem 0 bar */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ int ok = 1;
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, result[i], entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, result[i]);
+ ok = 0;
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ quicklistPushTail(ql, "foo", 3);
+
+ /* lrem -2 foo */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ int del = 2;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"foo", 3)) {
+ quicklistDelEntry(iter, &entry);
+ del--;
+ }
+ if (!del)
+ break;
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ /* check result of lrem -2 foo */
+ /* (we're ignoring the '2' part and still deleting all foo
+ * because
+ * we only have two foo) */
+ iter = quicklistGetIterator(ql, AL_START_TAIL);
+ i = 0;
+ size_t resB = sizeof(resultB) / sizeof(*resultB);
+ while (quicklistNext(iter, &entry)) {
+ /* Result must be: abc, foo, foobar, foobared, zap, test,
+ * foo */
+ if (strncmp((char *)entry.value, resultB[resB - 1 - i],
+ entry.sz)) {
+ ERR("No match at position %d, got %.*s instead of %s",
+ i, entry.sz, entry.value, resultB[resB - 1 - i]);
+ ok = 0;
+ }
+ i++;
+ }
+
+ quicklistReleaseIterator(iter);
+ /* final result of all tests */
+ if (ok)
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 16; f++) {
+ TEST_DESC("iterate reverse + delete at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ quicklistPushTail(ql, "abc", 3);
+ quicklistPushTail(ql, "def", 3);
+ quicklistPushTail(ql, "hij", 3);
+ quicklistPushTail(ql, "jkl", 3);
+ quicklistPushTail(ql, "oop", 3);
+
+ quicklistEntry entry;
+ quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
+ int i = 0;
+ while (quicklistNext(iter, &entry)) {
+ if (quicklistCompare(entry.zi, (unsigned char *)"hij", 3)) {
+ quicklistDelEntry(iter, &entry);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+
+ if (i != 5)
+ ERR("Didn't iterate 5 times, iterated %d times.", i);
+
+ /* Check results after deletion of "hij" */
+ iter = quicklistGetIterator(ql, AL_START_HEAD);
+ i = 0;
+ char *vals[] = {"abc", "def", "jkl", "oop"};
+ while (quicklistNext(iter, &entry)) {
+ if (!quicklistCompare(entry.zi, (unsigned char *)vals[i],
+ 3)) {
+ ERR("Value at %d didn't match %s\n", i, vals[i]);
+ }
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 800; f++) {
+ TEST_DESC("iterator at index test at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 760; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+
+ quicklistEntry entry;
+ quicklistIter *iter =
+ quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437);
+ int i = 437;
+ while (quicklistNext(iter, &entry)) {
+ if (entry.longval != nums[i])
+ ERR("Expected %lld, but got %lld", entry.longval,
+ nums[i]);
+ i++;
+ }
+ quicklistReleaseIterator(iter);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test A at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 32; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 32, 32, 32);
+ /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */
+ quicklistDelRange(ql, 0, 25);
+ quicklistDelRange(ql, 0, 0);
+ quicklistEntry entry;
+ for (int i = 0; i < 7; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[25 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[25 + i]);
+ }
+ if (f == 32)
+ ql_verify(ql, 1, 7, 7, 7);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test B at fill %d at compress %d", f,
+ options[_i]) {
+ /* Force-disable compression because our 33 sequential
+ * integers don't compress and the check always fails. */
+ quicklist *ql = quicklistNew(f, QUICKLIST_NOCOMPRESS);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */
+ quicklistDelRange(ql, 0, 5);
+ quicklistDelRange(ql, -16, 16);
+ if (f == 32)
+ ql_verify(ql, 1, 12, 12, 12);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != 5)
+ ERR("A: longval not 5, but %lld", entry.longval);
+ else
+ OK;
+ quicklistIndex(ql, -1, &entry);
+ if (entry.longval != 16)
+ ERR("B! got instead: %lld", entry.longval);
+ else
+ OK;
+ quicklistPushTail(ql, "bobobob", 7);
+ quicklistIndex(ql, -1, &entry);
+ if (strncmp((char *)entry.value, "bobobob", 7))
+ ERR("Tail doesn't match bobobob, it's %.*s instead",
+ entry.sz, entry.value);
+ for (int i = 0; i < 12; i++) {
+ quicklistIndex(ql, i, &entry);
+ if (entry.longval != nums[5 + i])
+ ERR("Deleted invalid range! Expected %lld but got "
+ "%lld",
+ entry.longval, nums[5 + i]);
+ }
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test C at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */
+ quicklistDelRange(ql, 0, 3);
+ quicklistDelRange(ql, -29,
+ 4000); /* make sure not loop forever */
+ if (f == 32)
+ ql_verify(ql, 1, 1, 1, 1);
+ quicklistEntry entry;
+ quicklistIndex(ql, 0, &entry);
+ if (entry.longval != -5157318210846258173)
+ ERROR;
+ else
+ OK;
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 40; f++) {
+ TEST_DESC("ltrim test D at fill %d at compress %d", f,
+ options[_i]) {
+ quicklist *ql = quicklistNew(f, options[_i]);
+ char num[32];
+ long long nums[5000];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ quicklistPushTail(ql, num, sz);
+ }
+ if (f == 32)
+ ql_verify(ql, 2, 33, 32, 1);
+ quicklistDelRange(ql, -12, 3);
+ if (ql->count != 30)
+ ERR("Didn't delete exactly three elements! Count is: %lu",
+ ql->count);
+ quicklistRelease(ql);
+ }
+ }
+
+ for (int f = optimize_start; f < 72; f++) {
+ TEST_DESC("create quicklist from ziplist at fill %d at compress %d",
+ f, options[_i]) {
+ unsigned char *zl = ziplistNew();
+ long long nums[64];
+ char num[64];
+ for (int i = 0; i < 33; i++) {
+ nums[i] = -5157318210846258176 + i;
+ int sz = ll2string(num, sizeof(num), nums[i]);
+ zl =
+ ziplistPush(zl, (unsigned char *)num, sz, ZIPLIST_TAIL);
+ }
+ for (int i = 0; i < 33; i++) {
+ zl = ziplistPush(zl, (unsigned char *)genstr("hello", i),
+ 32, ZIPLIST_TAIL);
+ }
+ quicklist *ql = quicklistCreateFromZiplist(f, options[_i], zl);
+ if (f == 1)
+ ql_verify(ql, 66, 66, 1, 1);
+ else if (f == 32)
+ ql_verify(ql, 3, 66, 32, 2);
+ else if (f == 66)
+ ql_verify(ql, 1, 66, 66, 66);
+ quicklistRelease(ql);
+ }
+ }
+
+ long long stop = mstime();
+ runtime[_i] = stop - start;
+ }
+
+ /* Run a longer test of compression depth outside of primary test loop. */
+ int list_sizes[] = {250, 251, 500, 999, 1000};
+ long long start = mstime();
+ for (int list = 0; list < (int)(sizeof(list_sizes) / sizeof(*list_sizes));
+ list++) {
+ for (int f = optimize_start; f < 128; f++) {
+ for (int depth = 1; depth < 40; depth++) {
+ /* skip over many redundant test cases */
+ TEST_DESC("verify specific compression of interior nodes with "
+ "%d list "
+ "at fill %d at compress %d",
+ list_sizes[list], f, depth) {
+ quicklist *ql = quicklistNew(f, depth);
+ for (int i = 0; i < list_sizes[list]; i++) {
+ quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64);
+ quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64);
+ }
+
+ quicklistNode *node = ql->head;
+ unsigned int low_raw = ql->compress;
+ unsigned int high_raw = ql->len - ql->compress;
+
+ for (unsigned int at = 0; at < ql->len;
+ at++, node = node->next) {
+ if (at < low_raw || at >= high_raw) {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) {
+ ERR("Incorrect compression: node %d is "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz);
+ }
+ } else {
+ if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) {
+ ERR("Incorrect non-compression: node %d is NOT "
+ "compressed at depth %d ((%u, %u); total "
+ "nodes: %u; size: %u; attempted: %d)",
+ at, depth, low_raw, high_raw, ql->len,
+ node->sz, node->attempted_compress);
+ }
+ }
+ }
+ quicklistRelease(ql);
+ }
+ }
+ }
+ }
+ long long stop = mstime();
+
+ printf("\n");
+ for (size_t i = 0; i < option_count; i++)
+ printf("Test Loop %02d: %0.2f seconds.\n", options[i],
+ (float)runtime[i] / 1000);
+ printf("Compressions: %0.2f seconds.\n", (float)(stop - start) / 1000);
+ printf("\n");
+
+ if (!err)
+ printf("ALL TESTS PASSED!\n");
+ else
+ ERR("Sorry, not all tests passed! In fact, %d tests failed.", err);
+
+ return err;
+}
+#endif
diff --git a/src/quicklist.h b/src/quicklist.h
new file mode 100644
index 000000000..8f3875900
--- /dev/null
+++ b/src/quicklist.h
@@ -0,0 +1,169 @@
+/* quicklist.h - A generic doubly linked quicklist implementation
+ *
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this quicklist of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this quicklist of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __QUICKLIST_H__
+#define __QUICKLIST_H__
+
+/* Node, quicklist, and Iterator are the only data structures used currently. */
+
+/* quicklistNode is a 32 byte struct describing a ziplist for a quicklist.
+ * We use bit fields keep the quicklistNode at 32 bytes.
+ * count: 16 bits, max 65536 (max zl bytes is 65k, so max count actually < 32k).
+ * encoding: 2 bits, RAW=1, LZF=2.
+ * container: 2 bits, NONE=1, ZIPLIST=2.
+ * recompress: 1 bit, bool, true if node is temporarry decompressed for usage.
+ * attempted_compress: 1 bit, boolean, used for verifying during testing.
+ * extra: 12 bits, free for future use; pads out the remainder of 32 bits */
+typedef struct quicklistNode {
+ struct quicklistNode *prev;
+ struct quicklistNode *next;
+ unsigned char *zl;
+ unsigned int sz; /* ziplist size in bytes */
+ unsigned int count : 16; /* count of items in ziplist */
+ unsigned int encoding : 2; /* RAW==1 or LZF==2 */
+ unsigned int container : 2; /* NONE==1 or ZIPLIST==2 */
+ unsigned int recompress : 1; /* was this node previous compressed? */
+ unsigned int attempted_compress : 1; /* node can't compress; too small */
+ unsigned int extra : 10; /* more bits to steal for future usage */
+} quicklistNode;
+
+/* quicklistLZF is a 4+N byte struct holding 'sz' followed by 'compressed'.
+ * 'sz' is byte length of 'compressed' field.
+ * 'compressed' is LZF data with total (compressed) length 'sz'
+ * NOTE: uncompressed length is stored in quicklistNode->sz.
+ * When quicklistNode->zl is compressed, node->zl points to a quicklistLZF */
+typedef struct quicklistLZF {
+ unsigned int sz; /* LZF size in bytes*/
+ char compressed[];
+} quicklistLZF;
+
+/* quicklist is a 32 byte struct (on 64-bit systems) describing a quicklist.
+ * 'count' is the number of total entries.
+ * 'len' is the number of quicklist nodes.
+ * 'compress' is: -1 if compression disabled, otherwise it's the number
+ * of quicklistNodes to leave uncompressed at ends of quicklist.
+ * 'fill' is the user-requested (or default) fill factor. */
+typedef struct quicklist {
+ quicklistNode *head;
+ quicklistNode *tail;
+ unsigned long count; /* total count of all entries in all ziplists */
+ unsigned int len; /* number of quicklistNodes */
+ int fill : 16; /* fill factor for individual nodes */
+ unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
+} quicklist;
+
+typedef struct quicklistIter {
+ const quicklist *quicklist;
+ quicklistNode *current;
+ unsigned char *zi;
+ long offset; /* offset in current ziplist */
+ int direction;
+} quicklistIter;
+
+typedef struct quicklistEntry {
+ const quicklist *quicklist;
+ quicklistNode *node;
+ unsigned char *zi;
+ unsigned char *value;
+ long long longval;
+ unsigned int sz;
+ int offset;
+} quicklistEntry;
+
+#define QUICKLIST_HEAD 0
+#define QUICKLIST_TAIL -1
+
+/* quicklist node encodings */
+#define QUICKLIST_NODE_ENCODING_RAW 1
+#define QUICKLIST_NODE_ENCODING_LZF 2
+
+/* quicklist compression disable */
+#define QUICKLIST_NOCOMPRESS 0
+
+/* quicklist container formats */
+#define QUICKLIST_NODE_CONTAINER_NONE 1
+#define QUICKLIST_NODE_CONTAINER_ZIPLIST 2
+
+#define quicklistNodeIsCompressed(node) \
+ ((node)->encoding == QUICKLIST_NODE_ENCODING_LZF)
+
+/* Prototypes */
+quicklist *quicklistCreate(void);
+quicklist *quicklistNew(int fill, int compress);
+void quicklistSetCompressDepth(quicklist *quicklist, int depth);
+void quicklistSetFill(quicklist *quicklist, int fill);
+void quicklistSetOptions(quicklist *quicklist, int fill, int depth);
+void quicklistRelease(quicklist *quicklist);
+int quicklistPushHead(quicklist *quicklist, void *value, const size_t sz);
+int quicklistPushTail(quicklist *quicklist, void *value, const size_t sz);
+void quicklistPush(quicklist *quicklist, void *value, const size_t sz,
+ int where);
+void quicklistAppendZiplist(quicklist *quicklist, unsigned char *zl);
+quicklist *quicklistAppendValuesFromZiplist(quicklist *quicklist,
+ unsigned char *zl);
+quicklist *quicklistCreateFromZiplist(int fill, int compress,
+ unsigned char *zl);
+void quicklistInsertAfter(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistInsertBefore(quicklist *quicklist, quicklistEntry *node,
+ void *value, const size_t sz);
+void quicklistDelEntry(quicklistIter *iter, quicklistEntry *entry);
+int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
+ int sz);
+int quicklistDelRange(quicklist *quicklist, const long start, const long stop);
+quicklistIter *quicklistGetIterator(const quicklist *quicklist, int direction);
+quicklistIter *quicklistGetIteratorAtIdx(const quicklist *quicklist,
+ int direction, const long long idx);
+int quicklistNext(quicklistIter *iter, quicklistEntry *node);
+void quicklistReleaseIterator(quicklistIter *iter);
+quicklist *quicklistDup(quicklist *orig);
+int quicklistIndex(const quicklist *quicklist, const long long index,
+ quicklistEntry *entry);
+void quicklistRewind(quicklist *quicklist, quicklistIter *li);
+void quicklistRewindTail(quicklist *quicklist, quicklistIter *li);
+void quicklistRotate(quicklist *quicklist);
+int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *sval,
+ void *(*saver)(unsigned char *data, unsigned int sz));
+int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
+ unsigned int *sz, long long *slong);
+unsigned int quicklistCount(const quicklist *ql);
+int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len);
+size_t quicklistGetLzf(const quicklistNode *node, void **data);
+
+#ifdef REDIS_TEST
+int quicklistTest(int argc, char *argv[]);
+#endif
+
+/* Directions for iterators */
+#define AL_START_HEAD 0
+#define AL_START_TAIL 1
+
+#endif /* __QUICKLIST_H__ */
diff --git a/src/rand.c b/src/rand.c
index 36cb417cf..09b0508f1 100644
--- a/src/rand.c
+++ b/src/rand.c
@@ -66,7 +66,7 @@
#define HI_BIT (1L << (2 * N - 1))
static uint32_t x[3] = { X0, X1, X2 }, a[3] = { A0, A1, A2 }, c = C;
-static void next();
+static void next(void);
int32_t redisLrand48() {
next();
@@ -77,7 +77,7 @@ void redisSrand48(int32_t seedval) {
SEED(X0, LOW(seedval), HIGH(seedval));
}
-static void next() {
+static void next(void) {
uint32_t p[2], q[2], r[2], carry0, carry1;
MUL(a[0], x[0], p);
diff --git a/src/rax.c b/src/rax.c
new file mode 100644
index 000000000..dda008dff
--- /dev/null
+++ b/src/rax.c
@@ -0,0 +1,1733 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <math.h>
+#include "rax.h"
+
+#ifndef RAX_MALLOC_INCLUDE
+#define RAX_MALLOC_INCLUDE "rax_malloc.h"
+#endif
+
+#include RAX_MALLOC_INCLUDE
+
+/* This is a special pointer that is guaranteed to never have the same value
+ * of a radix tree node. It's used in order to report "not found" error without
+ * requiring the function to have multiple return values. */
+void *raxNotFound = (void*)"rax-not-found-pointer";
+
+/* -------------------------------- Debugging ------------------------------ */
+
+void raxDebugShowNode(const char *msg, raxNode *n);
+
+/* Turn debugging messages on/off. */
+#if 0
+#define debugf(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+ fflush(stdout); \
+ } while (0);
+
+#define debugnode(msg,n) raxDebugShowNode(msg,n)
+#else
+#define debugf(...)
+#define debugnode(msg,n)
+#endif
+
+/* ------------------------- raxStack functions --------------------------
+ * The raxStack is a simple stack of pointers that is capable of switching
+ * from using a stack-allocated array to dynamic heap once a given number of
+ * items are reached. It is used in order to retain the list of parent nodes
+ * while walking the radix tree in order to implement certain operations that
+ * need to navigate the tree upward.
+ * ------------------------------------------------------------------------- */
+
+/* Initialize the stack. */
+static inline void raxStackInit(raxStack *ts) {
+ ts->stack = ts->static_items;
+ ts->items = 0;
+ ts->maxitems = RAX_STACK_STATIC_ITEMS;
+ ts->oom = 0;
+}
+
+/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
+static inline int raxStackPush(raxStack *ts, void *ptr) {
+ if (ts->items == ts->maxitems) {
+ if (ts->stack == ts->static_items) {
+ ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
+ if (ts->stack == NULL) {
+ ts->stack = ts->static_items;
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
+ } else {
+ void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
+ if (newalloc == NULL) {
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ ts->stack = newalloc;
+ }
+ ts->maxitems *= 2;
+ }
+ ts->stack[ts->items] = ptr;
+ ts->items++;
+ return 1;
+}
+
+/* Pop an item from the stack, the function returns NULL if there are no
+ * items to pop. */
+static inline void *raxStackPop(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ ts->items--;
+ return ts->stack[ts->items];
+}
+
+/* Return the stack item at the top of the stack without actually consuming
+ * it. */
+static inline void *raxStackPeek(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ return ts->stack[ts->items-1];
+}
+
+/* Free the stack in case we used heap allocation. */
+static inline void raxStackFree(raxStack *ts) {
+ if (ts->stack != ts->static_items) rax_free(ts->stack);
+}
+
+/* ----------------------------------------------------------------------------
+ * Radis tree implementation
+ * --------------------------------------------------------------------------*/
+
+/* Allocate a new non compressed node with the specified number of children.
+ * If datafiled is true, the allocation is made large enough to hold the
+ * associated data pointer.
+ * Returns the new node pointer. On out of memory NULL is returned. */
+raxNode *raxNewNode(size_t children, int datafield) {
+ size_t nodesize = sizeof(raxNode)+children+
+ sizeof(raxNode*)*children;
+ if (datafield) nodesize += sizeof(void*);
+ raxNode *node = rax_malloc(nodesize);
+ if (node == NULL) return NULL;
+ node->iskey = 0;
+ node->isnull = 0;
+ node->iscompr = 0;
+ node->size = children;
+ return node;
+}
+
+/* Allocate a new rax and return its pointer. On out of memory the function
+ * returns NULL. */
+rax *raxNew(void) {
+ rax *rax = rax_malloc(sizeof(*rax));
+ if (rax == NULL) return NULL;
+ rax->numele = 0;
+ rax->numnodes = 1;
+ rax->head = raxNewNode(0,0);
+ if (rax->head == NULL) {
+ rax_free(rax);
+ return NULL;
+ } else {
+ return rax;
+ }
+}
+
+/* Return the current total size of the node. */
+#define raxNodeCurrentLength(n) ( \
+ sizeof(raxNode)+(n)->size+ \
+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
+ (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
+)
+
+/* realloc the node to make room for auxiliary data in order
+ * to store an item in that node. On out of memory NULL is returned. */
+raxNode *raxReallocForData(raxNode *n, void *data) {
+ if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
+ size_t curlen = raxNodeCurrentLength(n);
+ return rax_realloc(n,curlen+sizeof(void*));
+}
+
+/* Set the node auxiliary data to the specified pointer. */
+void raxSetData(raxNode *n, void *data) {
+ n->iskey = 1;
+ if (data != NULL) {
+ n->isnull = 0;
+ void **ndata = (void**)
+ ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ memcpy(ndata,&data,sizeof(data));
+ } else {
+ n->isnull = 1;
+ }
+}
+
+/* Get the node auxiliary data. */
+void *raxGetData(raxNode *n) {
+ if (n->isnull) return NULL;
+ void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ void *data;
+ memcpy(&data,ndata,sizeof(data));
+ return data;
+}
+
+/* Add a new child to the node 'n' representing the character 'c' and return
+ * its new pointer, as well as the child pointer by reference. Additionally
+ * '***parentlink' is populated with the raxNode pointer-to-pointer of where
+ * the new child was stored, which is useful for the caller to replace the
+ * child pointer if it gets reallocated.
+ *
+ * On success the new parent node pointer is returned (it may change because
+ * of the realloc, so the caller should discard 'n' and use the new value).
+ * On out of memory NULL is returned, and the old node is still valid. */
+raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
+ assert(n->iscompr == 0);
+
+ size_t curlen = sizeof(raxNode)+
+ n->size+
+ sizeof(raxNode*)*n->size;
+ size_t newlen;
+
+ /* Alloc the new child we will link to 'n'. */
+ raxNode *child = raxNewNode(0,0);
+ if (child == NULL) return NULL;
+
+ /* Make space in the original node. */
+ if (n->iskey) curlen += sizeof(void*);
+ newlen = curlen+sizeof(raxNode*)+1; /* Add 1 char and 1 pointer. */
+ raxNode *newn = rax_realloc(n,newlen);
+ if (newn == NULL) {
+ rax_free(child);
+ return NULL;
+ }
+ n = newn;
+
+ /* After the reallocation, we have 5/9 (depending on the system
+ * pointer size) bytes at the end, that is, the additional char
+ * in the 'data' section, plus one pointer to the new child:
+ *
+ * [numc][abx][ap][bp][xp]|auxp|.....
+ *
+ * Let's find where to insert the new child in order to make sure
+ * it is inserted in-place lexicographically. */
+ int pos;
+ for (pos = 0; pos < n->size; pos++) {
+ if (n->data[pos] > c) break;
+ }
+
+ /* Now, if present, move auxiliary data pointer at the end
+ * so that we can mess with the other data without overwriting it.
+ * We will obtain something like that:
+ *
+ * [numc][abx][ap][bp][xp].....|auxp| */
+ unsigned char *src;
+ if (n->iskey && !n->isnull) {
+ src = n->data+n->size+sizeof(raxNode*)*n->size;
+ memmove(src+1+sizeof(raxNode*),src,sizeof(void*));
+ }
+
+ /* Now imagine we are adding a node with edge 'c'. The insertion
+ * point is between 'b' and 'x', so the 'pos' variable value is
+ * To start, move all the child pointers after the insertion point
+ * of 1+sizeof(pointer) bytes on the right, to obtain:
+ *
+ * [numc][abx][ap][bp].....[xp]|auxp| */
+ src = n->data+n->size+sizeof(raxNode*)*pos;
+ memmove(src+1+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
+
+ /* Now make the space for the additional char in the data section,
+ * but also move the pointers before the insertion point in the right
+ * by 1 byte, in order to obtain the following:
+ *
+ * [numc][ab.x][ap][bp]....[xp]|auxp| */
+ src = n->data+pos;
+ memmove(src+1,src,n->size-pos+sizeof(raxNode*)*pos);
+
+ /* We can now set the character and its child node pointer to get:
+ *
+ * [numc][abcx][ap][bp][cp]....|auxp|
+ * [numc][abcx][ap][bp][cp][xp]|auxp| */
+ n->data[pos] = c;
+ n->size++;
+ raxNode **childfield = (raxNode**)(n->data+n->size+sizeof(raxNode*)*pos);
+ memcpy(childfield,&child,sizeof(child));
+ *childptr = child;
+ *parentlink = childfield;
+ return n;
+}
+
+/* Return the pointer to the last child pointer in a node. For the compressed
+ * nodes this is the only child pointer. */
+#define raxNodeLastChildPtr(n) ((raxNode**) ( \
+ ((char*)(n)) + \
+ raxNodeCurrentLength(n) - \
+ sizeof(raxNode*) - \
+ (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
+))
+
+/* Return the pointer to the first child pointer. */
+#define raxNodeFirstChildPtr(n) ((raxNode**)((n)->data+(n)->size))
+
+/* Turn the node 'n', that must be a node without any children, into a
+ * compressed node representing a set of nodes linked one after the other
+ * and having exactly one child each. The node can be a key or not: this
+ * property and the associated value if any will be preserved.
+ *
+ * The function also returns a child node, since the last node of the
+ * compressed chain cannot be part of the chain: it has zero children while
+ * we can only compress inner nodes with exactly one child each. */
+raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
+ assert(n->size == 0 && n->iscompr == 0);
+ void *data = NULL; /* Initialized only to avoid warnings. */
+ size_t newsize;
+
+ debugf("Compress node: %.*s\n", (int)len,s);
+
+ /* Allocate the child to link to this node. */
+ *child = raxNewNode(0,0);
+ if (*child == NULL) return NULL;
+
+ /* Make space in the parent node. */
+ newsize = sizeof(raxNode)+len+sizeof(raxNode*);
+ if (n->iskey) {
+ data = raxGetData(n); /* To restore it later. */
+ if (!n->isnull) newsize += sizeof(void*);
+ }
+ raxNode *newn = rax_realloc(n,newsize);
+ if (newn == NULL) {
+ rax_free(*child);
+ return NULL;
+ }
+ n = newn;
+
+ n->iscompr = 1;
+ n->size = len;
+ memcpy(n->data,s,len);
+ if (n->iskey) raxSetData(n,data);
+ raxNode **childfield = raxNodeLastChildPtr(n);
+ memcpy(childfield,child,sizeof(*child));
+ return n;
+}
+
+/* Low level function that walks the tree looking for the string
+ * 's' of 'len' bytes. The function returns the number of characters
+ * of the key that was possible to process: if the returned integer
+ * is the same as 'len', then it means that the node corresponding to the
+ * string was found (however it may not be a key in case the node->iskey is
+ * zero or if simply we stopped in the middle of a compressed node, so that
+ * 'splitpos' is non zero).
+ *
+ * Otherwise if the returned integer is not the same as 'len', there was an
+ * early stop during the tree walk because of a character mismatch.
+ *
+ * The node where the search ended (because the full string was processed
+ * or because there was an early stop) is returned by reference as
+ * '*stopnode' if the passed pointer is not NULL. This node link in the
+ * parent's node is returned as '*plink' if not NULL. Finally, if the
+ * search stopped in a compressed node, '*splitpos' returns the index
+ * inside the compressed node where the search ended. This is useful to
+ * know where to split the node for insertion. */
+static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
+ raxNode *h = rax->head;
+ raxNode **parentlink = &rax->head;
+
+ size_t i = 0; /* Position in the string. */
+ size_t j = 0; /* Position in the node children (or bytes if compressed).*/
+ while(h->size && i < len) {
+ debugnode("Lookup current node",h);
+ unsigned char *v = h->data;
+
+ if (h->iscompr) {
+ for (j = 0; j < h->size && i < len; j++, i++) {
+ if (v[j] != s[i]) break;
+ }
+ if (j != h->size) break;
+ } else {
+ /* Even when h->size is large, linear scan provides good
+ * performances compared to other approaches that are in theory
+ * more sounding, like performing a binary search. */
+ for (j = 0; j < h->size; j++) {
+ if (v[j] == s[i]) break;
+ }
+ if (j == h->size) break;
+ i++;
+ }
+
+ if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
+ raxNode **children = raxNodeFirstChildPtr(h);
+ if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
+ memcpy(&h,children+j,sizeof(h));
+ parentlink = children+j;
+ j = 0; /* If the new node is compressed and we do not
+ iterate again (since i == l) set the split
+ position to 0 to signal this node represents
+ the searched key. */
+ }
+ debugnode("Lookup stop node is",h);
+ if (stopnode) *stopnode = h;
+ if (plink) *plink = parentlink;
+ if (splitpos && h->iscompr) *splitpos = j;
+ return i;
+}
+
+/* Insert the element 's' of size 'len', setting as auxiliary data
+ * the pointer 'data'. If the element is already present, the associated
+ * data is updated, and 0 is returned, otherwise the element is inserted
+ * and 1 is returned. On out of memory the function returns 0 as well but
+ * sets errno to ENOMEM, otherwise errno will be set to 0. */
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+ size_t i;
+ int j = 0; /* Split position. If raxLowWalk() stops in a compressed
+ node, the index 'j' represents the char we stopped within the
+ compressed node, that is, the position where to split the
+ node for insertion. */
+ raxNode *h, **parentlink;
+
+ debugf("### Insert %.*s with value %p\n", (int)len, s, data);
+ i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
+
+ /* If i == len we walked following the whole string. If we are not
+ * in the middle of a compressed node, the string is either already
+ * inserted or this middle node is currently not a key, but can represent
+ * our key. We have just to reallocate the node and make space for the
+ * data pointer. */
+ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
+ debugf("### Insert: node representing key exists\n");
+ if (!h->iskey || h->isnull) {
+ h = raxReallocForData(h,data);
+ if (h) memcpy(parentlink,&h,sizeof(h));
+ }
+ if (h == NULL) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (h->iskey) {
+ if (old) *old = raxGetData(h);
+ raxSetData(h,data);
+ errno = 0;
+ return 0; /* Element already exists. */
+ }
+ raxSetData(h,data);
+ rax->numele++;
+ return 1; /* Element inserted. */
+ }
+
+ /* If the node we stopped at is a compressed node, we need to
+ * split it before to continue.
+ *
+ * Splitting a compressed node have a few possibile cases.
+ * Imagine that the node 'h' we are currently at is a compressed
+ * node contaning the string "ANNIBALE" (it means that it represents
+ * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
+ * pointer of this node pointing at the 'E' node, because remember that
+ * we have characters at the edges of the graph, not inside the nodes
+ * themselves.
+ *
+ * In order to show a real case imagine our node to also point to
+ * another compressed node, that finally points at the node without
+ * children, representing 'O':
+ *
+ * "ANNIBALE" -> "SCO" -> []
+ *
+ * When inserting we may face the following cases. Note that all the cases
+ * require the insertion of a non compressed node with exactly two
+ * children, except for the last case which just requires splitting a
+ * compressed node.
+ *
+ * 1) Inserting "ANNIENTARE"
+ *
+ * |B| -> "ALE" -> "SCO" -> []
+ * "ANNI" -> |-|
+ * |E| -> (... continue algo ...) "NTARE" -> []
+ *
+ * 2) Inserting "ANNIBALI"
+ *
+ * |E| -> "SCO" -> []
+ * "ANNIBAL" -> |-|
+ * |I| -> (... continue algo ...) []
+ *
+ * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
+ *
+ * |N| -> "NIBALE" -> "SCO" -> []
+ * |A| -> |-|
+ * |G| -> (... continue algo ...) |O| -> []
+ *
+ * 4) Inserting "CIAO"
+ *
+ * |A| -> "NNIBALE" -> "SCO" -> []
+ * |-|
+ * |C| -> (... continue algo ...) "IAO" -> []
+ *
+ * 5) Inserting "ANNI"
+ *
+ * "ANNI" -> "BALE" -> "SCO" -> []
+ *
+ * The final algorithm for insertion covering all the above cases is as
+ * follows.
+ *
+ * ============================= ALGO 1 =============================
+ *
+ * For the above cases 1 to 4, that is, all cases where we stopped in
+ * the middle of a compressed node for a character mismatch, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we found the mismatching
+ * character. For example if the node contains "ANNIBALE" and we add
+ * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
+ * mismatching character is found.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create "split node" having as child the non common letter
+ * at the compressed node. The other non common letter (at the key)
+ * will be added later as we continue the normal insertion algorithm
+ * at step "6".
+ *
+ * 3a. IF $SPLITPOS == 0:
+ * Replace the old node with the split node, by copying the auxiliary
+ * data if any. Fix parent's reference. Free old node eventually
+ * (we still need its data for the next steps of the algorithm).
+ *
+ * 3b. IF $SPLITPOS != 0:
+ * Trim the compressed node (reallocating it as well) in order to
+ * contain $splitpos characters. Change chilid pointer in order to link
+ * to the split node. If new compressed node len is just 1, set
+ * iscompr to 0 (layout is the same). Fix parent's reference.
+ *
+ * 4a. IF the postfix len (the length of the remaining string of the
+ * original compressed node after the split character) is non zero,
+ * create a "postfix node". If the postfix node has just one character
+ * set iscompr to 0, otherwise iscompr to 1. Set the postfix node
+ * child pointer to $NEXT.
+ *
+ * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
+ *
+ * 5. Set child[0] of split node to postfix node.
+ *
+ * 6. Set the split node as the current node, set current index at child[1]
+ * and continue insertion algorithm as usually.
+ *
+ * ============================= ALGO 2 =============================
+ *
+ * For case 5, that is, if we stopped in the middle of a compressed
+ * node but no mismatch was found, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we stopped iterating because
+ * there were no more keys character to match. So in the example of
+ * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create a "postfix node" containing all the characters from $SPLITPOS
+ * to the end. Use $NEXT as the postfix node child pointer.
+ * If the postfix node length is 1, set iscompr to 0.
+ * Set the node as a key with the associated value of the new
+ * inserted key.
+ *
+ * 3. Trim the current node to contain the first $SPLITPOS characters.
+ * As usually if the new node length is just 1, set iscompr to 0.
+ * Take the iskey / associated value as it was in the orignal node.
+ * Fix the parent's reference.
+ *
+ * 4. Set the postfix node as the only child pointer of the trimmed
+ * node created at step 1.
+ */
+
+ /* ------------------------- ALGORITHM 1 --------------------------- */
+ if (h->iscompr && i != len) {
+ debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
+ h->size, h->data, (void*)h);
+ debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
+ debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
+ debugf("Other (key) letter is '%c'\n", s[i]);
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+ debugf("Next is %p\n", (void*)next);
+ debugf("iskey %d\n", h->iskey);
+ if (h->iskey) {
+ debugf("key value is %p\n", raxGetData(h));
+ }
+
+ /* Set the length of the additional nodes we will need. */
+ size_t trimmedlen = j;
+ size_t postfixlen = h->size - j - 1;
+ int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
+ size_t nodesize;
+
+ /* 2: Create the split node. Also allocate the other nodes we'll need
+ * ASAP, so that it will be simpler to handle OOM. */
+ raxNode *splitnode = raxNewNode(1, split_node_is_key);
+ raxNode *trimmed = NULL;
+ raxNode *postfix = NULL;
+
+ if (trimmedlen) {
+ nodesize = sizeof(raxNode)+trimmedlen+sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ trimmed = rax_malloc(nodesize);
+ }
+
+ if (postfixlen) {
+ nodesize = sizeof(raxNode)+postfixlen+
+ sizeof(raxNode*);
+ postfix = rax_malloc(nodesize);
+ }
+
+ /* OOM? Abort now that the tree is untouched. */
+ if (splitnode == NULL ||
+ (trimmedlen && trimmed == NULL) ||
+ (postfixlen && postfix == NULL))
+ {
+ rax_free(splitnode);
+ rax_free(trimmed);
+ rax_free(postfix);
+ errno = ENOMEM;
+ return 0;
+ }
+ splitnode->data[0] = h->data[j];
+
+ if (j == 0) {
+ /* 3a: Replace the old node with the split node. */
+ if (h->iskey) {
+ void *ndata = raxGetData(h);
+ raxSetData(splitnode,ndata);
+ }
+ memcpy(parentlink,&splitnode,sizeof(splitnode));
+ } else {
+ /* 3b: Trim the compressed node. */
+ trimmed->size = j;
+ memcpy(trimmed->data,h->data,j);
+ trimmed->iscompr = j > 1 ? 1 : 0;
+ trimmed->iskey = h->iskey;
+ trimmed->isnull = h->isnull;
+ if (h->iskey && !h->isnull) {
+ void *ndata = raxGetData(h);
+ raxSetData(trimmed,ndata);
+ }
+ raxNode **cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&splitnode,sizeof(splitnode));
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ parentlink = cp; /* Set parentlink to splitnode parent. */
+ rax->numnodes++;
+ }
+
+ /* 4: Create the postfix node: what remains of the original
+ * compressed node after the split. */
+ if (postfixlen) {
+ /* 4a: create a postfix node. */
+ postfix->iskey = 0;
+ postfix->isnull = 0;
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ memcpy(postfix->data,h->data+j+1,postfixlen);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+ } else {
+ /* 4b: just use next as postfix node. */
+ postfix = next;
+ }
+
+ /* 5: Set splitnode first child as the postfix node. */
+ raxNode **splitchild = raxNodeLastChildPtr(splitnode);
+ memcpy(splitchild,&postfix,sizeof(postfix));
+
+ /* 6. Continue insertion: this will cause the splitnode to
+ * get a new child (the non common character at the currently
+ * inserted key). */
+ rax_free(h);
+ h = splitnode;
+ } else if (h->iscompr && i == len) {
+ /* ------------------------- ALGORITHM 2 --------------------------- */
+ debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
+ h->size, h->data, (void*)h, j);
+
+ /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
+ size_t postfixlen = h->size - j;
+ size_t nodesize = sizeof(raxNode)+postfixlen+sizeof(raxNode*);
+ if (data != NULL) nodesize += sizeof(void*);
+ raxNode *postfix = rax_malloc(nodesize);
+
+ nodesize = sizeof(raxNode)+j+sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ raxNode *trimmed = rax_malloc(nodesize);
+
+ if (postfix == NULL || trimmed == NULL) {
+ rax_free(postfix);
+ rax_free(trimmed);
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+
+ /* 2: Create the postfix node. */
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ postfix->iskey = 1;
+ postfix->isnull = 0;
+ memcpy(postfix->data,h->data+j,postfixlen);
+ raxSetData(postfix,data);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+
+ /* 3: Trim the compressed node. */
+ trimmed->size = j;
+ trimmed->iscompr = j > 1;
+ trimmed->iskey = 0;
+ trimmed->isnull = 0;
+ memcpy(trimmed->data,h->data,j);
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ if (h->iskey) {
+ void *aux = raxGetData(h);
+ raxSetData(trimmed,aux);
+ }
+
+ /* Fix the trimmed node child pointer to point to
+ * the postfix node. */
+ cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&postfix,sizeof(postfix));
+
+ /* Finish! We don't need to contine with the insertion
+ * algorithm for ALGO 2. The key is already inserted. */
+ rax->numele++;
+ rax_free(h);
+ return 1; /* Key inserted. */
+ }
+
+ /* We walked the radix tree as far as we could, but still there are left
+ * chars in our string. We need to insert the missing nodes. */
+ while(i < len) {
+ raxNode *child;
+
+ /* If this node is going to have a single child, and there
+ * are other characters, so that that would result in a chain
+ * of single-childed nodes, turn it into a compressed node. */
+ if (h->size == 0 && len-i > 1) {
+ debugf("Inserting compressed node\n");
+ size_t comprsize = len-i;
+ if (comprsize > RAX_NODE_MAX_SIZE)
+ comprsize = RAX_NODE_MAX_SIZE;
+ raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = raxNodeLastChildPtr(h);
+ i += comprsize;
+ } else {
+ debugf("Inserting normal node\n");
+ raxNode **new_parentlink;
+ raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = new_parentlink;
+ i++;
+ }
+ rax->numnodes++;
+ h = child;
+ }
+ raxNode *newh = raxReallocForData(h,data);
+ if (newh == NULL) goto oom;
+ h = newh;
+ if (!h->iskey) rax->numele++;
+ raxSetData(h,data);
+ memcpy(parentlink,&h,sizeof(h));
+ return 1; /* Element inserted. */
+
+oom:
+ /* This code path handles out of memory after part of the sub-tree was
+ * already modified. Set the node as a key, and then remove it. However we
+ * do that only if the node is a terminal node, otherwise if the OOM
+ * happened reallocating a node in the middle, we don't need to free
+ * anything. */
+ if (h->size == 0) {
+ h->isnull = 1;
+ h->iskey = 1;
+ rax->numele++; /* Compensate the next remove. */
+ assert(raxRemove(rax,s,i,NULL) != 0);
+ }
+ errno = ENOMEM;
+ return 0;
+}
+
+/* Find a key in the rax, returns raxNotFound special void pointer value
+ * if the item was not found, otherwise the value associated with the
+ * item is returned. */
+void *raxFind(rax *rax, unsigned char *s, size_t len) {
+ raxNode *h;
+
+ debugf("### Lookup: %.*s\n", (int)len, s);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
+ return raxNotFound;
+ return raxGetData(h);
+}
+
+/* Return the memory address where the 'parent' node stores the specified
+ * 'child' pointer, so that the caller can update the pointer with another
+ * one if needed. The function assumes it will find a match, otherwise the
+ * operation is an undefined behavior (it will continue scanning the
+ * memory without any bound checking). */
+raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode *c;
+ while(1) {
+ memcpy(&c,cp,sizeof(c));
+ if (c == child) break;
+ cp++;
+ }
+ return cp;
+}
+
+/* Low level child removal from node. The new node pointer (after the child
+ * removal) is returned. Note that this function does not fix the pointer
+ * of the parent node in its parent, so this task is up to the caller.
+ * The function never fails for out of memory. */
+raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
+ debugnode("raxRemoveChild before", parent);
+ /* If parent is a compressed node (having a single child, as for definition
+ * of the data structure), the removal of the child consists into turning
+ * it into a normal node without children. */
+ if (parent->iscompr) {
+ void *data = NULL;
+ if (parent->iskey) data = raxGetData(parent);
+ parent->isnull = 0;
+ parent->iscompr = 0;
+ parent->size = 0;
+ if (parent->iskey) raxSetData(parent,data);
+ debugnode("raxRemoveChild after", parent);
+ return parent;
+ }
+
+ /* Otherwise we need to scan for the children pointer and memmove()
+ * accordingly.
+ *
+ * 1. To start we seek the first element in both the children
+ * pointers and edge bytes in the node. */
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode **c = cp;
+ unsigned char *e = parent->data;
+
+ /* 2. Search the child pointer to remove inside the array of children
+ * pointers. */
+ while(1) {
+ raxNode *aux;
+ memcpy(&aux,c,sizeof(aux));
+ if (aux == child) break;
+ c++;
+ e++;
+ }
+
+ /* 3. Remove the edge and the pointer by memmoving the remaining children
+ * pointer and edge bytes one position before. */
+ int taillen = parent->size - (e - parent->data) - 1;
+ debugf("raxRemoveChild tail len: %d\n", taillen);
+ memmove(e,e+1,taillen);
+
+ /* Since we have one data byte less, also child pointers start one byte
+ * before now. */
+ memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**));
+
+ /* Move the remaining "tail" pointer at the right position as well. */
+ memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)+parent->iskey*sizeof(void*));
+
+ /* 4. Update size. */
+ parent->size--;
+
+ /* realloc the node according to the theoretical memory usage, to free
+ * data if we are over-allocating right now. */
+ raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
+ if (newnode) {
+ debugnode("raxRemoveChild after", newnode);
+ }
+ /* Note: if rax_realloc() fails we just return the old address, which
+ * is valid. */
+ return newnode ? newnode : parent;
+}
+
+/* Remove the specified item. Returns 1 if the item was found and
+ * deleted, 0 otherwise. */
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
+ raxNode *h;
+ raxStack ts;
+
+ debugf("### Delete: %.*s\n", (int)len, s);
+ raxStackInit(&ts);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
+ raxStackFree(&ts);
+ return 0;
+ }
+ if (old) *old = raxGetData(h);
+ h->iskey = 0;
+ rax->numele--;
+
+ /* If this node has no children, the deletion needs to reclaim the
+ * no longer used nodes. This is an iterative process that needs to
+ * walk the three upward, deleting all the nodes with just one child
+ * that are not keys, until the head of the rax is reached or the first
+ * node with more than one child is found. */
+
+ int trycompress = 0; /* Will be set to 1 if we should try to optimize the
+ tree resulting from the deletion. */
+
+ if (h->size == 0) {
+ debugf("Key deleted in node without children. Cleanup needed.\n");
+ raxNode *child = NULL;
+ while(h != rax->head) {
+ child = h;
+ debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
+ (int)child->size, (char*)child->data, child->iskey);
+ rax_free(child);
+ rax->numnodes--;
+ h = raxStackPop(&ts);
+ /* If this node has more then one child, or actually holds
+ * a key, stop here. */
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ if (child) {
+ debugf("Unlinking child %p from parent %p\n",
+ (void*)child, (void*)h);
+ raxNode *new = raxRemoveChild(h,child);
+ if (new != h) {
+ raxNode *parent = raxStackPeek(&ts);
+ raxNode **parentlink;
+ if (parent == NULL) {
+ parentlink = &rax->head;
+ } else {
+ parentlink = raxFindParentLink(parent,h);
+ }
+ memcpy(parentlink,&new,sizeof(new));
+ }
+
+ /* If after the removal the node has just a single child
+ * and is not a key, we need to try to compress it. */
+ if (new->size == 1 && new->iskey == 0) {
+ trycompress = 1;
+ h = new;
+ }
+ }
+ } else if (h->size == 1) {
+ /* If the node had just one child, after the removal of the key
+ * further compression with adjacent nodes is pontentially possible. */
+ trycompress = 1;
+ }
+
+ /* Don't try node compression if our nodes pointers stack is not
+ * complete because of OOM while executing raxLowWalk() */
+ if (trycompress && ts.oom) trycompress = 0;
+
+ /* Recompression: if trycompress is true, 'h' points to a radix tree node
+ * that changed in a way that could allow to compress nodes in this
+ * sub-branch. Compressed nodes represent chains of nodes that are not
+ * keys and have a single child, so there are two deletion events that
+ * may alter the tree so that further compression is needed:
+ *
+ * 1) A node with a single child was a key and now no longer is a key.
+ * 2) A node with two children now has just one child.
+ *
+ * We try to navigate upward till there are other nodes that can be
+ * compressed, when we reach the upper node which is not a key and has
+ * a single child, we scan the chain of children to collect the
+ * compressable part of the tree, and replace the current node with the
+ * new one, fixing the child pointer to reference the first non
+ * compressable node.
+ *
+ * Example of case "1". A tree stores the keys "FOO" = 1 and
+ * "FOOBAR" = 2:
+ *
+ *
+ * "FOO" -> "BAR" -> [] (2)
+ * (1)
+ *
+ * After the removal of "FOO" the tree can be compressed as:
+ *
+ * "FOOBAR" -> [] (2)
+ *
+ *
+ * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
+ * "FOOTER" = 2:
+ *
+ * |B| -> "AR" -> [] (1)
+ * "FOO" -> |-|
+ * |T| -> "ER" -> [] (2)
+ *
+ * After the removal of "FOOTER" the resulting tree is:
+ *
+ * "FOO" -> |B| -> "AR" -> [] (1)
+ *
+ * That can be compressed into:
+ *
+ * "FOOBAR" -> [] (1)
+ */
+ if (trycompress) {
+ debugf("After removing %.*s:\n", (int)len, s);
+ debugnode("Compression may be needed",h);
+ debugf("Seek start node\n");
+
+ /* Try to reach the upper node that is compressible.
+ * At the end of the loop 'h' will point to the first node we
+ * can try to compress and 'parent' to its parent. */
+ raxNode *parent;
+ while(1) {
+ parent = raxStackPop(&ts);
+ if (!parent || parent->iskey ||
+ (!parent->iscompr && parent->size != 1)) break;
+ h = parent;
+ debugnode("Going up to",h);
+ }
+ raxNode *start = h; /* Compression starting node. */
+
+ /* Scan chain of nodes we can compress. */
+ size_t comprsize = h->size;
+ int nodes = 1;
+ while(h->size != 0) {
+ raxNode **cp = raxNodeLastChildPtr(h);
+ memcpy(&h,cp,sizeof(h));
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ /* Stop here if going to the next node would result into
+ * a compressed node larger than h->size can hold. */
+ if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
+ nodes++;
+ comprsize += h->size;
+ }
+ if (nodes > 1) {
+ /* If we can compress, create the new node and populate it. */
+ size_t nodesize =
+ sizeof(raxNode)+comprsize+sizeof(raxNode*);
+ raxNode *new = rax_malloc(nodesize);
+ /* An out of memory here just means we cannot optimize this
+ * node, but the tree is left in a consistent state. */
+ if (new == NULL) {
+ raxStackFree(&ts);
+ return 1;
+ }
+ new->iskey = 0;
+ new->isnull = 0;
+ new->iscompr = 1;
+ new->size = comprsize;
+ rax->numnodes++;
+
+ /* Scan again, this time to populate the new node content and
+ * to fix the new node child pointer. At the same time we free
+ * all the nodes that we'll no longer use. */
+ comprsize = 0;
+ h = start;
+ while(h->size != 0) {
+ memcpy(new->data+comprsize,h->data,h->size);
+ comprsize += h->size;
+ raxNode **cp = raxNodeLastChildPtr(h);
+ raxNode *tofree = h;
+ memcpy(&h,cp,sizeof(h));
+ rax_free(tofree); rax->numnodes--;
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ debugnode("New node",new);
+
+ /* Now 'h' points to the first node that we still need to use,
+ * so our new node child pointer will point to it. */
+ raxNode **cp = raxNodeLastChildPtr(new);
+ memcpy(cp,&h,sizeof(h));
+
+ /* Fix parent link. */
+ if (parent) {
+ raxNode **parentlink = raxFindParentLink(parent,start);
+ memcpy(parentlink,&new,sizeof(new));
+ } else {
+ rax->head = new;
+ }
+
+ debugf("Compressed %d nodes, %d total bytes\n",
+ nodes, (int)comprsize);
+ }
+ }
+ raxStackFree(&ts);
+ return 1;
+}
+
+/* This is the core of raxFree(): performs a depth-first scan of the
+ * tree and releases all the nodes found. */
+void raxRecursiveFree(rax *rax, raxNode *n) {
+ debugnode("free traversing",n);
+ int numchildren = n->iscompr ? 1 : n->size;
+ raxNode **cp = raxNodeLastChildPtr(n);
+ while(numchildren--) {
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveFree(rax,child);
+ cp--;
+ }
+ debugnode("free depth-first",n);
+ rax_free(n);
+ rax->numnodes--;
+}
+
+/* Free a whole radix tree. */
+void raxFree(rax *rax) {
+ raxRecursiveFree(rax,rax->head);
+ assert(rax->numnodes == 0);
+ rax_free(rax);
+}
+
+/* ------------------------------- Iterator --------------------------------- */
+
+/* Initialize a Rax iterator. This call should be performed a single time
+ * to initialize the iterator, and must be followed by a raxSeek() call,
+ * otherwise the raxPrev()/raxNext() functions will just return EOF. */
+void raxStart(raxIterator *it, rax *rt) {
+ it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
+ it->rt = rt;
+ it->key_len = 0;
+ it->key = it->key_static_string;
+ it->key_max = RAX_ITER_STATIC_LEN;
+ it->data = NULL;
+ raxStackInit(&it->stack);
+}
+
+/* Append characters at the current key string of the iterator 'it'. This
+ * is a low level function used to implement the iterator, not callable by
+ * the user. Returns 0 on out of memory, otherwise 1 is returned. */
+int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
+ if (it->key_max < it->key_len+len) {
+ unsigned char *old = (it->key == it->key_static_string) ? NULL :
+ it->key;
+ size_t new_max = (it->key_len+len)*2;
+ it->key = rax_realloc(old,new_max);
+ if (it->key == NULL) {
+ it->key = (!old) ? it->key_static_string : old;
+ errno = ENOMEM;
+ return 0;
+ }
+ if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
+ it->key_max = new_max;
+ }
+ /* Use memmove since there could be an overlap between 's' and
+ * it->key when we use the current key in order to re-seek. */
+ memmove(it->key+it->key_len,s,len);
+ it->key_len += len;
+ return 1;
+}
+
+/* Remove the specified number of chars from the right of the current
+ * iterator key. */
+void raxIteratorDelChars(raxIterator *it, size_t count) {
+ it->key_len -= count;
+}
+
+/* Do an iteration step towards the next element. At the end of the step the
+ * iterator key will represent the (new) current key. If it is not possible
+ * to step in the specified direction since there are no longer elements, the
+ * iterator is flagged with RAX_ITER_EOF.
+ *
+ * If 'noup' is true the function starts directly scanning for the next
+ * lexicographically smaller children, and the current node is already assumed
+ * to be the parent of the last key node, so the first operation to go back to
+ * the parent will be skipped. This option is used by raxSeek() when
+ * implementing seeking a non existing element with the ">" or "<" options:
+ * the starting node is not a key in that particular case, so we start the scan
+ * from a node that does not represent the key set.
+ *
+ * The function returns 1 on success or 0 on out of memory. */
+int raxIteratorNextStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 0;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ /* Clear the EOF flag: it will be set again if the EOF condition
+ * is still valid. */
+ it->flags &= ~RAX_ITER_EOF;
+
+ while(1) {
+ int children = it->node->iscompr ? 1 : it->node->size;
+ if (!noup && children) {
+ debugf("GO DEEPER\n");
+ /* Seek the lexicographically smaller key in this subtree, which
+ * is the first one found always going torwards the first child
+ * of every successive node. */
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->iscompr ? it->node->size : 1)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* For "next" step, stop every time we find a key along the
+ * way, since the key is lexicograhically smaller compared to
+ * what follows in the sub-children. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ } else {
+ /* If we finished exporing the previous sub-tree, switch to the
+ * new one: go upper until a node is found where there are
+ * children representing keys lexicographically greater than the
+ * current key. */
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+ /* If there are no children at the current node, try parent's
+ * next child. */
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the next child if there was at least one
+ * additional child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ int i = 0;
+ while (i < it->node->size) {
+ debugf("SCAN NEXT %c\n", it->node->data[i]);
+ if (it->node->data[i] > prevchild) break;
+ i++;
+ cp++;
+ }
+ if (i != it->node->size) {
+ debugf("SCAN found a new node\n");
+ raxIteratorAddChars(it,it->node->data+i,1);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+/* Seek the grestest key in the subtree at the current node. Return 0 on
+ * out of memory, otherwise 1. This is an helper function for different
+ * iteration functions below. */
+int raxSeekGreatest(raxIterator *it) {
+ while(it->node->size) {
+ if (it->node->iscompr) {
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
+ return 0;
+ }
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ }
+ return 1;
+}
+
+/* Like raxIteratorNextStep() but implements an iteration step moving
+ * to the lexicographically previous element. The 'noup' option has a similar
+ * effect to the one of raxIteratorPrevSte(). */
+int raxIteratorPrevStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 0;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the prev child if there is at least one
+ * child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ int i = it->node->size-1;
+ while (i >= 0) {
+ debugf("SCAN PREV %c\n", it->node->data[i]);
+ if (it->node->data[i] < prevchild) break;
+ i--;
+ cp--;
+ }
+ /* If we found a new subtree to explore in this node,
+ * go deeper following all the last children in order to
+ * find the key lexicographically greater. */
+ if (i != -1) {
+ debugf("SCAN found a new node\n");
+ /* Enter the node we just found. */
+ if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Seek sub-tree max. */
+ if (!raxSeekGreatest(it)) return 0;
+ }
+ }
+
+ /* Return the key: this could be the key we found scanning a new
+ * subtree, or if we did not find a new subtree to explore here,
+ * before giving up with this node, check if it's a key itself. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ }
+}
+
+/* Seek an iterator at the specified element.
+ * Return 0 if the seek failed for syntax error or out of memory. Otherwise
+ * 1 is returned. When 0 is returned for out of memory, errno is set to
+ * the ENOMEM value. */
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
+ int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
+
+ it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */
+ it->flags |= RAX_ITER_JUST_SEEKED;
+ it->flags &= ~RAX_ITER_EOF;
+ it->key_len = 0;
+ it->node = NULL;
+
+ /* Set flags according to the operator used to perform the seek. */
+ if (op[0] == '>') {
+ gt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '<') {
+ lt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '=') {
+ eq = 1;
+ } else if (op[0] == '^') {
+ first = 1;
+ } else if (op[0] == '$') {
+ last = 1;
+ } else {
+ errno = 0;
+ return 0; /* Error. */
+ }
+
+ /* If there are no elements, set the EOF condition immediately and
+ * return. */
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+
+ if (first) {
+ /* Seeking the first key greater or equal to the empty string
+ * is equivalent to seeking the smaller key available. */
+ return raxSeek(it,">=",NULL,0);
+ }
+
+ if (last) {
+ /* Find the greatest key taking always the last child till a
+ * final node is found. */
+ it->node = it->rt->head;
+ if (!raxSeekGreatest(it)) return 0;
+ assert(it->node->iskey);
+ return 1;
+ }
+
+ /* We need to seek the specified key. What we do here is to actually
+ * perform a lookup, and later invoke the prev/next key code that
+ * we already use for iteration. */
+ int splitpos = 0;
+ size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
+
+ /* Return OOM on incomplete stack info. */
+ if (it->stack.oom) return 0;
+
+ if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
+ it->node->iskey)
+ {
+ /* We found our node, since the key matches and we have an
+ * "equal" condition. */
+ if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
+ } else if (lt || gt) {
+ /* Exact key not found or eq flag not set. We have to set as current
+ * key the one represented by the node we stopped at, and perform
+ * a next/prev operation to seek. To reconstruct the key at this node
+ * we start from the parent and go to the current node, accumulating
+ * the characters found along the way. */
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ for (size_t j = 1; j < it->stack.items; j++) {
+ raxNode *parent = it->stack.stack[j-1];
+ raxNode *child = it->stack.stack[j];
+ if (parent->iscompr) {
+ if (!raxIteratorAddChars(it,parent->data,parent->size))
+ return 0;
+ } else {
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ unsigned char *p = parent->data;
+ while(1) {
+ raxNode *aux;
+ memcpy(&aux,cp,sizeof(aux));
+ if (aux == child) break;
+ cp++;
+ p++;
+ }
+ if (!raxIteratorAddChars(it,p,1)) return 0;
+ }
+ }
+ raxStackPop(&it->stack);
+
+ /* We need to set the iterator in the correct state to call next/prev
+ * step in order to seek the desired element. */
+ debugf("After initial seek: i=%d len=%d key=%.*s\n",
+ (int)i, (int)len, (int)it->key_len, it->key);
+ if (i != len && !it->node->iscompr) {
+ /* If we stopped in the middle of a normal node because of a
+ * mismatch, add the mismatching character to the current key
+ * and call the iterator with the 'noup' flag so that it will try
+ * to seek the next/prev child in the current node directly based
+ * on the mismatching character. */
+ if (!raxIteratorAddChars(it,ele+i,1)) return 0;
+ debugf("Seek normal node on mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (lt && !raxIteratorPrevStep(it,1)) return 0;
+ if (gt && !raxIteratorNextStep(it,1)) return 0;
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else if (i != len && it->node->iscompr) {
+ debugf("Compressed mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* In case of a mismatch within a compressed node. */
+ int nodechar = it->node->data[splitpos];
+ int keychar = ele[i];
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (gt) {
+ /* If the key the compressed node represents is greater
+ * than our seek element, continue forward, otherwise set the
+ * state in order to go back to the next sub-tree. */
+ if (nodechar > keychar) {
+ if (!raxIteratorNextStep(it,0)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorNextStep(it,1)) return 0;
+ }
+ }
+ if (lt) {
+ /* If the key the compressed node represents is smaller
+ * than our seek element, seek the greater key in this
+ * subtree, otherwise set the state in order to go back to
+ * the previous sub-tree. */
+ if (nodechar < keychar) {
+ if (!raxSeekGreatest(it)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorPrevStep(it,1)) return 0;
+ }
+ }
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else {
+ debugf("No mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* If there was no mismatch we are into a node representing the
+ * key, (but which is not a key or the seek operator does not
+ * include 'eq'), or we stopped in the middle of a compressed node
+ * after processing all the key. Cotinue iterating as this was
+ * a legitimate key we stopped at. */
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (gt && !raxIteratorNextStep(it,0)) return 0;
+ if (lt && !raxIteratorPrevStep(it,0)) return 0;
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ }
+ } else {
+ /* If we are here just eq was set but no match was found. */
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+ return 1;
+}
+
+/* Go to the next element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxNext(raxIterator *it) {
+ if (!raxIteratorNextStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Go to the previous element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxPrev(raxIterator *it) {
+ if (!raxIteratorPrevStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Perform a random walk starting in the current position of the iterator.
+ * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
+ * and the iterator is set to the node reached after doing a random walk
+ * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
+ * using a random number of steps between 1 and two times the logarithm of
+ * the number of elements.
+ *
+ * NOTE: if you use this function to generate random elements from the radix
+ * tree, expect a disappointing distribution. A random walk produces good
+ * random elements if the tree is not sparse, however in the case of a radix
+ * tree certain keys will be reported much more often than others. At least
+ * this function should be able to expore every possible element eventually. */
+int raxRandomWalk(raxIterator *it, size_t steps) {
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 0;
+ }
+
+ if (steps == 0) {
+ size_t fle = floor(log(it->rt->numele));
+ fle *= 2;
+ steps = 1 + rand() % fle;
+ }
+
+ raxNode *n = it->node;
+ while(steps > 0 || !n->iskey) {
+ int numchildren = n->iscompr ? 1 : n->size;
+ int r = rand() % (numchildren+(n != it->rt->head));
+
+ if (r == numchildren) {
+ /* Go up to parent. */
+ n = raxStackPop(&it->stack);
+ int todel = n->iscompr ? n->size : 1;
+ raxIteratorDelChars(it,todel);
+ } else {
+ /* Select a random child. */
+ if (n->iscompr) {
+ if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n)+r;
+ if (!raxStackPush(&it->stack,n)) return 0;
+ memcpy(&n,cp,sizeof(n));
+ }
+ if (n->iskey) steps--;
+ }
+ it->node = n;
+ return 1;
+}
+
+/* Compare the key currently pointed by the iterator to the specified
+ * key according to the specified operator. Returns 1 if the comparison is
+ * true, otherwise 0 is returned. */
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
+ int eq = 0, lt = 0, gt = 0;
+
+ if (op[0] == '=' || op[1] == '=') eq = 1;
+ if (op[1] == '>') gt = 1;
+ else if (op[1] == '<') lt = 1;
+ else if (op[1] != '=') return 0; /* Syntax error. */
+
+ size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
+ int cmp = memcmp(iter->key,key,minlen);
+
+ /* Handle == */
+ if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
+
+ /* Handle >, >=, <, <= */
+ if (cmp == 0) {
+ /* Same prefix: longer wins. */
+ if (eq && key_len == iter->key_len) return 1;
+ else if (lt) return iter->key_len < key_len;
+ else if (gt) return iter->key_len > key_len;
+ } if (cmp > 0) {
+ return gt ? 1 : 0;
+ } else /* (cmp < 0) */ {
+ return lt ? 1 : 0;
+ }
+}
+
+/* Free the iterator. */
+void raxStop(raxIterator *it) {
+ if (it->key != it->key_static_string) rax_free(it->key);
+ raxStackFree(&it->stack);
+}
+
+/* ----------------------------- Introspection ------------------------------ */
+
+/* This function is mostly used for debugging and learning purposes.
+ * It shows an ASCII representation of a tree on standard output, outling
+ * all the nodes and the contained keys.
+ *
+ * The representation is as follow:
+ *
+ * "foobar" (compressed node)
+ * [abc] (normal node with three children)
+ * [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
+ * [] (a normal empty node)
+ *
+ * Children are represented in new idented lines, each children prefixed by
+ * the "`-(x)" string, where "x" is the edge byte.
+ *
+ * [abc]
+ * `-(a) "ladin"
+ * `-(b) [kj]
+ * `-(c) []
+ *
+ * However when a node has a single child the following representation
+ * is used instead:
+ *
+ * [abc] -> "ladin" -> []
+ */
+
+/* The actual implementation of raxShow(). */
+void raxRecursiveShow(int level, int lpad, raxNode *n) {
+ char s = n->iscompr ? '"' : '[';
+ char e = n->iscompr ? '"' : ']';
+
+ int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
+ if (n->iskey) {
+ numchars += printf("=%p",raxGetData(n));
+ }
+
+ int numchildren = n->iscompr ? 1 : n->size;
+ /* Note that 7 and 4 magic constants are the string length
+ * of " `-(x) " and " -> " respectively. */
+ if (level) {
+ lpad += (numchildren > 1) ? 7 : 4;
+ if (numchildren == 1) lpad += numchars;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n);
+ for (int i = 0; i < numchildren; i++) {
+ char *branch = " `-(%c) ";
+ if (numchildren > 1) {
+ printf("\n");
+ for (int j = 0; j < lpad; j++) putchar(' ');
+ printf(branch,n->data[i]);
+ } else {
+ printf(" -> ");
+ }
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveShow(level+1,lpad,child);
+ cp++;
+ }
+}
+
+/* Show a tree, as outlined in the comment above. */
+void raxShow(rax *rax) {
+ raxRecursiveShow(0,0,rax->head);
+ putchar('\n');
+}
+
+/* Used by debugnode() macro to show info about a given node. */
+void raxDebugShowNode(const char *msg, raxNode *n) {
+ printf("%s: %p [%.*s] key:%d size:%d children:",
+ msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
+ int numcld = n->iscompr ? 1 : n->size;
+ raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
+ while(numcld--) {
+ raxNode *child;
+ memcpy(&child,cldptr,sizeof(child));
+ cldptr++;
+ printf("%p ", (void*)child);
+ }
+ printf("\n");
+ fflush(stdout);
+}
+
+
diff --git a/src/rax.h b/src/rax.h
new file mode 100644
index 000000000..6f91f4c1b
--- /dev/null
+++ b/src/rax.h
@@ -0,0 +1,160 @@
+#ifndef RAX_H
+#define RAX_H
+
+#include <stdint.h>
+
+/* Representation of a radix tree as implemented in this file, that contains
+ * the strings "foo", "foobar" and "footer" after the insertion of each
+ * word. When the node represents a key inside the radix tree, we write it
+ * between [], otherwise it is written between ().
+ *
+ * This is the vanilla representation:
+ *
+ * (f) ""
+ * \
+ * (o) "f"
+ * \
+ * (o) "fo"
+ * \
+ * [t b] "foo"
+ * / \
+ * "foot" (e) (a) "foob"
+ * / \
+ * "foote" (r) (r) "fooba"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However, this implementation implements a very common optimization where
+ * successive nodes having a single child are "compressed" into the node
+ * itself as a string of characters, each representing a next-level child,
+ * and only the link to the node representing the last character node is
+ * provided inside the representation. So the above representation is turend
+ * into:
+ *
+ * ["foo"] ""
+ * |
+ * [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However this optimization makes the implementation a bit more complex.
+ * For instance if a key "first" is added in the above radix tree, a
+ * "node splitting" operation is needed, since the "foo" prefix is no longer
+ * composed of nodes having a single child one after the other. This is the
+ * above tree and the resulting node splitting after this event happens:
+ *
+ *
+ * (f) ""
+ * /
+ * (i o) "f"
+ * / \
+ * "firs" ("rst") (o) "fo"
+ * / \
+ * "first" [] [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * Similarly after deletion, if a new chain of nodes having a single child
+ * is created (the chain must also not include nodes that represent keys),
+ * it must be compressed back into a single node.
+ *
+ */
+
+#define RAX_NODE_MAX_SIZE ((1<<29)-1)
+typedef struct raxNode {
+ uint32_t iskey:1; /* Does this node contain a key? */
+ uint32_t isnull:1; /* Associated value is NULL (don't store it). */
+ uint32_t iscompr:1; /* Node is compressed. */
+ uint32_t size:29; /* Number of children, or compressed string len. */
+ /* Data layout is as follows:
+ *
+ * If node is not compressed we have 'size' bytes, one for each children
+ * character, and 'size' raxNode pointers, point to each child node.
+ * Note how the character is not stored in the children but in the
+ * edge of the parents:
+ *
+ * [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
+ *
+ * if node is compressed (strlen != 0) the node has 1 children.
+ * In that case the 'size' bytes of the string stored immediately at
+ * the start of the data section, represent a sequence of successive
+ * nodes linked one after the other, for which only the last one in
+ * the sequence is actually represented as a node, and pointed to by
+ * the current compressed node.
+ *
+ * [header strlen=3][xyz][z-ptr](value-ptr?)
+ *
+ * Both compressed and not compressed nodes can represent a key
+ * with associated data in the radix tree at any level (not just terminal
+ * nodes).
+ *
+ * If the node has an associated key (iskey=1) and is not NULL
+ * (isnull=0), then after the raxNode pointers poiting to the
+ * childen, an additional value pointer is present (as you can see
+ * in the representation above as "value-ptr" field).
+ */
+ unsigned char data[];
+} raxNode;
+
+typedef struct rax {
+ raxNode *head;
+ uint64_t numele;
+ uint64_t numnodes;
+} rax;
+
+/* Stack data structure used by raxLowWalk() in order to, optionally, return
+ * a list of parent nodes to the caller. The nodes do not have a "parent"
+ * field for space concerns, so we use the auxiliary stack when needed. */
+#define RAX_STACK_STATIC_ITEMS 32
+typedef struct raxStack {
+ void **stack; /* Points to static_items or an heap allocated array. */
+ size_t items, maxitems; /* Number of items contained and total space. */
+ /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
+ * and use this static array of pointers instead. */
+ void *static_items[RAX_STACK_STATIC_ITEMS];
+ int oom; /* True if pushing into this stack failed for OOM at some point. */
+} raxStack;
+
+/* Radix tree iterator state is encapsulated into this data structure. */
+#define RAX_ITER_STATIC_LEN 128
+#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
+ element for the first iteration and
+ clear the flag. */
+#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
+#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
+ iterating. But it is slower. */
+typedef struct raxIterator {
+ int flags;
+ rax *rt; /* Radix tree we are iterating. */
+ unsigned char *key; /* The current string. */
+ void *data; /* Data associated to this key. */
+ size_t key_len; /* Current key length. */
+ size_t key_max; /* Max key len the current key buffer can hold. */
+ unsigned char key_static_string[RAX_ITER_STATIC_LEN];
+ raxNode *node; /* Current node. Only for unsafe iteration. */
+ raxStack stack; /* Stack used for unsafe iteration. */
+} raxIterator;
+
+/* A special pointer returned for not found items. */
+extern void *raxNotFound;
+
+/* Exported API. */
+rax *raxNew(void);
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
+void *raxFind(rax *rax, unsigned char *s, size_t len);
+void raxFree(rax *rax);
+void raxStart(raxIterator *it, rax *rt);
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
+int raxNext(raxIterator *it);
+int raxPrev(raxIterator *it);
+int raxRandomWalk(raxIterator *it, size_t steps);
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
+void raxStop(raxIterator *it);
+void raxShow(rax *rax);
+
+#endif
diff --git a/src/rax_malloc.h b/src/rax_malloc.h
new file mode 100644
index 000000000..9295985c6
--- /dev/null
+++ b/src/rax_malloc.h
@@ -0,0 +1,44 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef RAX_ALLOC_H
+#define RAX_ALLOC_H
+#include "zmalloc.h"
+#define rax_malloc zmalloc
+#define rax_realloc zrealloc
+#define rax_free zfree
+#endif
diff --git a/src/rdb.c b/src/rdb.c
index ce5f99c9a..792c8ff94 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "lzf.h" /* LZF compression library */
#include "zipmap.h"
#include "endianconv.h"
@@ -39,6 +39,34 @@
#include <sys/wait.h>
#include <arpa/inet.h>
#include <sys/stat.h>
+#include <sys/param.h>
+
+#define rdbExitReportCorruptRDB(...) rdbCheckThenExit(__LINE__,__VA_ARGS__)
+
+extern int rdbCheckMode;
+void rdbCheckError(const char *fmt, ...);
+void rdbCheckSetError(const char *fmt, ...);
+
+void rdbCheckThenExit(int linenum, char *reason, ...) {
+ va_list ap;
+ char msg[1024];
+ int len;
+
+ len = snprintf(msg,sizeof(msg),
+ "Internal error in RDB reading function at rdb.c:%d -> ", linenum);
+ va_start(ap,reason);
+ vsnprintf(msg+len,sizeof(msg)-len,reason,ap);
+ va_end(ap);
+
+ if (!rdbCheckMode) {
+ serverLog(LL_WARNING, "%s", msg);
+ char *argv[2] = {"",server.rdb_filename};
+ redis_check_rdb_main(2,argv,NULL);
+ } else {
+ rdbCheckError("%s",msg);
+ }
+ exit(1);
+}
static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
if (rdb && rioWrite(rdb,p,len) == 0)
@@ -77,61 +105,96 @@ long long rdbLoadMillisecondTime(rio *rdb) {
}
/* Saves an encoded length. The first two bits in the first byte are used to
- * hold the encoding type. See the REDIS_RDB_* definitions for more information
+ * hold the encoding type. See the RDB_* definitions for more information
* on the types of encoding. */
-int rdbSaveLen(rio *rdb, uint32_t len) {
+int rdbSaveLen(rio *rdb, uint64_t len) {
unsigned char buf[2];
size_t nwritten;
if (len < (1<<6)) {
/* Save a 6 bit len */
- buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
+ buf[0] = (len&0xFF)|(RDB_6BITLEN<<6);
if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
nwritten = 1;
} else if (len < (1<<14)) {
/* Save a 14 bit len */
- buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
+ buf[0] = ((len>>8)&0xFF)|(RDB_14BITLEN<<6);
buf[1] = len&0xFF;
if (rdbWriteRaw(rdb,buf,2) == -1) return -1;
nwritten = 2;
- } else {
+ } else if (len <= UINT32_MAX) {
/* Save a 32 bit len */
- buf[0] = (REDIS_RDB_32BITLEN<<6);
+ buf[0] = RDB_32BITLEN;
if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
- len = htonl(len);
- if (rdbWriteRaw(rdb,&len,4) == -1) return -1;
+ uint32_t len32 = htonl(len);
+ if (rdbWriteRaw(rdb,&len32,4) == -1) return -1;
nwritten = 1+4;
+ } else {
+ /* Save a 64 bit len */
+ buf[0] = RDB_64BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ len = htonu64(len);
+ if (rdbWriteRaw(rdb,&len,8) == -1) return -1;
+ nwritten = 1+8;
}
return nwritten;
}
-/* Load an encoded length. The "isencoded" argument is set to 1 if the length
- * is not actually a length but an "encoding type". See the REDIS_RDB_ENC_*
- * definitions in rdb.h for more information. */
-uint32_t rdbLoadLen(rio *rdb, int *isencoded) {
+
+/* Load an encoded length. If the loaded length is a normal length as stored
+ * with rdbSaveLen(), the read length is set to '*lenptr'. If instead the
+ * loaded length describes a special encoding that follows, then '*isencoded'
+ * is set to 1 and the encoding format is stored at '*lenptr'.
+ *
+ * See the RDB_ENC_* definitions in rdb.h for more information on special
+ * encodings.
+ *
+ * The function returns -1 on error, 0 on success. */
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr) {
unsigned char buf[2];
- uint32_t len;
int type;
if (isencoded) *isencoded = 0;
- if (rioRead(rdb,buf,1) == 0) return REDIS_RDB_LENERR;
+ if (rioRead(rdb,buf,1) == 0) return -1;
type = (buf[0]&0xC0)>>6;
- if (type == REDIS_RDB_ENCVAL) {
+ if (type == RDB_ENCVAL) {
/* Read a 6 bit encoding type. */
if (isencoded) *isencoded = 1;
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_6BITLEN) {
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_6BITLEN) {
/* Read a 6 bit len. */
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_14BITLEN) {
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_14BITLEN) {
/* Read a 14 bit len. */
- if (rioRead(rdb,buf+1,1) == 0) return REDIS_RDB_LENERR;
- return ((buf[0]&0x3F)<<8)|buf[1];
- } else {
+ if (rioRead(rdb,buf+1,1) == 0) return -1;
+ *lenptr = ((buf[0]&0x3F)<<8)|buf[1];
+ } else if (buf[0] == RDB_32BITLEN) {
/* Read a 32 bit len. */
- if (rioRead(rdb,&len,4) == 0) return REDIS_RDB_LENERR;
- return ntohl(len);
+ uint32_t len;
+ if (rioRead(rdb,&len,4) == 0) return -1;
+ *lenptr = ntohl(len);
+ } else if (buf[0] == RDB_64BITLEN) {
+ /* Read a 64 bit len. */
+ uint64_t len;
+ if (rioRead(rdb,&len,8) == 0) return -1;
+ *lenptr = ntohu64(len);
+ } else {
+ rdbExitReportCorruptRDB(
+ "Unknown length encoding %d in rdbLoadLen()",type);
+ return -1; /* Never reached. */
}
+ return 0;
+}
+
+/* This is like rdbLoadLenByRef() but directly returns the value read
+ * from the RDB stream, signaling an error by returning RDB_LENERR
+ * (since it is a too large count to be applicable in any Redis data
+ * structure). */
+uint64_t rdbLoadLen(rio *rdb, int *isencoded) {
+ uint64_t len;
+
+ if (rdbLoadLenByRef(rdb,isencoded,&len) == -1) return RDB_LENERR;
+ return len;
}
/* Encodes the "value" argument as integer when it fits in the supported ranges
@@ -140,16 +203,16 @@ uint32_t rdbLoadLen(rio *rdb, int *isencoded) {
* length is returned. Otherwise 0 is returned. */
int rdbEncodeInteger(long long value, unsigned char *enc) {
if (value >= -(1<<7) && value <= (1<<7)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT8;
enc[1] = value&0xFF;
return 2;
} else if (value >= -(1<<15) && value <= (1<<15)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT16;
enc[1] = value&0xFF;
enc[2] = (value>>8)&0xFF;
return 3;
} else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT32;
enc[1] = value&0xFF;
enc[2] = (value>>8)&0xFF;
enc[3] = (value>>16)&0xFF;
@@ -161,33 +224,44 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
}
/* Loads an integer-encoded object with the specified encoding type "enctype".
- * If the "encode" argument is set the function may return an integer-encoded
- * string object, otherwise it always returns a raw string object. */
-robj *rdbLoadIntegerObject(rio *rdb, int enctype, int encode) {
+ * The returned value changes according to the flags, see
+ * rdbGenerincLoadStringObject() for more info. */
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ int encode = flags & RDB_LOAD_ENC;
unsigned char enc[4];
long long val;
- if (enctype == REDIS_RDB_ENC_INT8) {
+ if (enctype == RDB_ENC_INT8) {
if (rioRead(rdb,enc,1) == 0) return NULL;
val = (signed char)enc[0];
- } else if (enctype == REDIS_RDB_ENC_INT16) {
+ } else if (enctype == RDB_ENC_INT16) {
uint16_t v;
if (rioRead(rdb,enc,2) == 0) return NULL;
v = enc[0]|(enc[1]<<8);
val = (int16_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT32) {
+ } else if (enctype == RDB_ENC_INT32) {
uint32_t v;
if (rioRead(rdb,enc,4) == 0) return NULL;
v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
val = (int32_t)v;
} else {
val = 0; /* anti-warning */
- redisPanic("Unknown RDB integer encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
}
- if (encode)
+ if (plain || sds) {
+ char buf[LONG_STR_SIZE], *p;
+ int len = ll2string(buf,sizeof(buf),val);
+ if (lenptr) *lenptr = len;
+ p = plain ? zmalloc(len) : sdsnewlen(NULL,len);
+ memcpy(p,buf,len);
+ return p;
+ } else if (encode) {
return createStringObjectFromLongLong(val);
- else
- return createObject(REDIS_STRING,sdsfromlonglong(val));
+ } else {
+ return createObject(OBJ_STRING,sdsfromlonglong(val));
+ }
}
/* String objects in the form "2391" "-100" without any space and with a
@@ -209,67 +283,98 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}
-int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
- size_t comprlen, outlen;
+ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
unsigned char byte;
- int n, nwritten = 0;
- void *out;
+ ssize_t n, nwritten = 0;
- /* We require at least four bytes compression for this to be worth it */
- if (len <= 4) return 0;
- outlen = len-4;
- if ((out = zmalloc(outlen+1)) == NULL) return 0;
- comprlen = lzf_compress(s, len, out, outlen);
- if (comprlen == 0) {
- zfree(out);
- return 0;
- }
/* Data compressed! Let's save it on disk */
- byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
+ byte = (RDB_ENCVAL<<6)|RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,comprlen)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,compress_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbSaveLen(rdb,len)) == -1) goto writeerr;
+ if ((n = rdbSaveLen(rdb,original_len)) == -1) goto writeerr;
nwritten += n;
- if ((n = rdbWriteRaw(rdb,out,comprlen)) == -1) goto writeerr;
+ if ((n = rdbWriteRaw(rdb,data,compress_len)) == -1) goto writeerr;
nwritten += n;
- zfree(out);
return nwritten;
writeerr:
- zfree(out);
return -1;
}
-robj *rdbLoadLzfStringObject(rio *rdb) {
- unsigned int len, clen;
+ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ zfree(out);
+ return nwritten;
+}
+
+/* Load an LZF compressed string in RDB format. The returned value
+ * changes according to 'flags'. For more info check the
+ * rdbGenericLoadStringObject() function. */
+void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
+ uint64_t len, clen;
unsigned char *c = NULL;
- sds val = NULL;
+ char *val = NULL;
- if ((clen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((clen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
if ((c = zmalloc(clen)) == NULL) goto err;
- if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+
+ /* Allocate our target according to the uncompressed size. */
+ if (plain) {
+ val = zmalloc(len);
+ if (lenptr) *lenptr = len;
+ } else {
+ val = sdsnewlen(NULL,len);
+ }
+
+ /* Load the compressed representation and uncompress it to target. */
if (rioRead(rdb,c,clen) == 0) goto err;
- if (lzf_decompress(c,clen,val,len) == 0) goto err;
+ if (lzf_decompress(c,clen,val,len) == 0) {
+ if (rdbCheckMode) rdbCheckSetError("Invalid LZF compressed string");
+ goto err;
+ }
zfree(c);
- return createObject(REDIS_STRING,val);
+
+ if (plain || sds) {
+ return val;
+ } else {
+ return createObject(OBJ_STRING,val);
+ }
err:
zfree(c);
- sdsfree(val);
+ if (plain)
+ zfree(val);
+ else
+ sdsfree(val);
return NULL;
}
/* Save a string object as [len][data] on disk. If the object is a string
* representation of an integer value we try to save it in a special form */
-int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
int enclen;
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
/* Try integer encoding */
if (len <= 11) {
@@ -300,16 +405,16 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
}
/* Save a long long value as either an encoded string or a string. */
-int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
+ssize_t rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
unsigned char buf[32];
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
int enclen = rdbEncodeInteger(value,buf);
if (enclen > 0) {
return rdbWriteRaw(rdb,buf,enclen);
} else {
/* Encode as string */
enclen = ll2string((char*)buf,32,value);
- redisAssert(enclen < 32);
+ serverAssert(enclen < 32);
if ((n = rdbSaveLen(rdb,enclen)) == -1) return -1;
nwritten += n;
if ((n = rdbWriteRaw(rdb,buf,enclen)) == -1) return -1;
@@ -318,53 +423,81 @@ int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
return nwritten;
}
-/* Like rdbSaveStringObjectRaw() but handle encoded objects */
+/* Like rdbSaveRawString() gets a Redis object instead. */
int rdbSaveStringObject(rio *rdb, robj *obj) {
/* Avoid to decode the object, then encode it again, if the
* object is already integer encoded. */
- if (obj->encoding == REDIS_ENCODING_INT) {
+ if (obj->encoding == OBJ_ENCODING_INT) {
return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
} else {
- redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
+ serverAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
}
}
-robj *rdbGenericLoadStringObject(rio *rdb, int encode) {
+/* Load a string object from an RDB file according to flags:
+ *
+ * RDB_LOAD_NONE (no flags): load an RDB object, unencoded.
+ * RDB_LOAD_ENC: If the returned type is a Redis object, try to
+ * encode it in a special way to be more memory
+ * efficient. When this flag is passed the function
+ * no longer guarantees that obj->ptr is an SDS string.
+ * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
+ * instead of a Redis object with an sds in it.
+ * RDB_LOAD_SDS: Return an SDS string instead of a Redis object.
+ *
+ * On I/O error NULL is returned.
+ */
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
+ int encode = flags & RDB_LOAD_ENC;
+ int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
int isencoded;
- uint32_t len;
- robj *o;
+ uint64_t len;
len = rdbLoadLen(rdb,&isencoded);
if (isencoded) {
switch(len) {
- case REDIS_RDB_ENC_INT8:
- case REDIS_RDB_ENC_INT16:
- case REDIS_RDB_ENC_INT32:
- return rdbLoadIntegerObject(rdb,len,encode);
- case REDIS_RDB_ENC_LZF:
- return rdbLoadLzfStringObject(rdb);
+ case RDB_ENC_INT8:
+ case RDB_ENC_INT16:
+ case RDB_ENC_INT32:
+ return rdbLoadIntegerObject(rdb,len,flags,lenptr);
+ case RDB_ENC_LZF:
+ return rdbLoadLzfStringObject(rdb,flags,lenptr);
default:
- redisPanic("Unknown RDB encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB string encoding type %d",len);
}
}
- if (len == REDIS_RDB_LENERR) return NULL;
- o = encode ? createStringObject(NULL,len) :
- createRawStringObject(NULL,len);
- if (len && rioRead(rdb,o->ptr,len) == 0) {
- decrRefCount(o);
- return NULL;
+ if (len == RDB_LENERR) return NULL;
+ if (plain || sds) {
+ void *buf = plain ? zmalloc(len) : sdsnewlen(NULL,len);
+ if (lenptr) *lenptr = len;
+ if (len && rioRead(rdb,buf,len) == 0) {
+ if (plain)
+ zfree(buf);
+ else
+ sdsfree(buf);
+ return NULL;
+ }
+ return buf;
+ } else {
+ robj *o = encode ? createStringObject(NULL,len) :
+ createRawStringObject(NULL,len);
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
+ return NULL;
+ }
+ return o;
}
- return o;
}
robj *rdbLoadStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,0);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
}
robj *rdbLoadEncodedStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,1);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC,NULL);
}
/* Save a double value. Doubles are saved as strings prefixed by an unsigned
@@ -427,41 +560,72 @@ int rdbLoadDoubleValue(rio *rdb, double *val) {
}
}
+/* Saves a double for RDB 8 or greater, where IE754 binary64 format is assumed.
+ * We just make sure the integer is always stored in little endian, otherwise
+ * the value is copied verbatim from memory to disk.
+ *
+ * Return -1 on error, the size of the serialized value on success. */
+int rdbSaveBinaryDoubleValue(rio *rdb, double val) {
+ memrev64ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Loads a double from RDB 8 or greater. See rdbSaveBinaryDoubleValue() for
+ * more info. On error -1 is returned, otherwise 0. */
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev64ifbe(val);
+ return 0;
+}
+
+/* Like rdbSaveBinaryDoubleValue() but single precision. */
+int rdbSaveBinaryFloatValue(rio *rdb, float val) {
+ memrev32ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Like rdbLoadBinaryDoubleValue() but single precision. */
+int rdbLoadBinaryFloatValue(rio *rdb, float *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev32ifbe(val);
+ return 0;
+}
+
/* Save the object type of object "o". */
int rdbSaveObjectType(rio *rdb, robj *o) {
switch (o->type) {
- case REDIS_STRING:
- return rdbSaveType(rdb,REDIS_RDB_TYPE_STRING);
- case REDIS_LIST:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_LINKEDLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST);
+ case OBJ_STRING:
+ return rdbSaveType(rdb,RDB_TYPE_STRING);
+ case OBJ_LIST:
+ if (o->encoding == OBJ_ENCODING_QUICKLIST)
+ return rdbSaveType(rdb,RDB_TYPE_LIST_QUICKLIST);
else
- redisPanic("Unknown list encoding");
- case REDIS_SET:
- if (o->encoding == REDIS_ENCODING_INTSET)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_SET_INTSET);
- else if (o->encoding == REDIS_ENCODING_HT)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_SET);
+ serverPanic("Unknown list encoding");
+ case OBJ_SET:
+ if (o->encoding == OBJ_ENCODING_INTSET)
+ return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_SET);
else
- redisPanic("Unknown set encoding");
- case REDIS_ZSET:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_SKIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET);
+ serverPanic("Unknown set encoding");
+ case OBJ_ZSET:
+ if (o->encoding == OBJ_ENCODING_ZIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_ZIPLIST);
+ else if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_2);
else
- redisPanic("Unknown sorted set encoding");
- case REDIS_HASH:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_HT)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH);
+ serverPanic("Unknown sorted set encoding");
+ case OBJ_HASH:
+ if (o->encoding == OBJ_ENCODING_ZIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_HASH_ZIPLIST);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_HASH);
else
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
+ case OBJ_MODULE:
+ return rdbSaveType(rdb,RDB_TYPE_MODULE_2);
default:
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
return -1; /* avoid warning */
}
@@ -475,41 +639,40 @@ int rdbLoadObjectType(rio *rdb) {
return type;
}
-/* Save a Redis object. Returns -1 on error, 0 on success. */
-int rdbSaveObject(rio *rdb, robj *o) {
- int n, nwritten = 0;
+/* Save a Redis object. Returns -1 on error, number of bytes written on success. */
+ssize_t rdbSaveObject(rio *rdb, robj *o) {
+ ssize_t n = 0, nwritten = 0;
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
/* Save a string value */
if ((n = rdbSaveStringObject(rdb,o)) == -1) return -1;
nwritten += n;
- } else if (o->type == REDIS_LIST) {
+ } else if (o->type == OBJ_LIST) {
/* Save a list value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- size_t l = ziplistBlobLen((unsigned char*)o->ptr);
-
- if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
- nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listIter li;
- listNode *ln;
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
- if ((n = rdbSaveLen(rdb,listLength(list))) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
nwritten += n;
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
- nwritten += n;
- }
+ do {
+ if (quicklistNodeIsCompressed(node)) {
+ void *data;
+ size_t compress_len = quicklistGetLzf(node, &data);
+ if ((n = rdbSaveLzfBlob(rdb,data,compress_len,node->sz)) == -1) return -1;
+ nwritten += n;
+ } else {
+ if ((n = rdbSaveRawString(rdb,node->zl,node->sz)) == -1) return -1;
+ nwritten += n;
+ }
+ } while ((node = node->next));
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
- } else if (o->type == REDIS_SET) {
+ } else if (o->type == OBJ_SET) {
/* Save a set value */
- if (o->encoding == REDIS_ENCODING_HT) {
+ if (o->encoding == OBJ_ENCODING_HT) {
dict *set = o->ptr;
dictIterator *di = dictGetIterator(set);
dictEntry *de;
@@ -518,56 +681,65 @@ int rdbSaveObject(rio *rdb, robj *o) {
nwritten += n;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
+ sds ele = dictGetKey(de);
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele)))
+ == -1) return -1;
nwritten += n;
}
dictReleaseIterator(di);
- } else if (o->encoding == REDIS_ENCODING_INTSET) {
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
size_t l = intsetBlobLen((intset*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_ZSET) {
/* Save a sorted set value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
size_t l = ziplistBlobLen((unsigned char*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
+ zskiplist *zsl = zs->zsl;
- if ((n = rdbSaveLen(rdb,dictSize(zs->dict))) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1;
nwritten += n;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
- double *score = dictGetVal(de);
-
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
+ /* We save the skiplist elements from the greatest to the smallest
+ * (that's trivial since the elements are already ordered in the
+ * skiplist): this improves the load process, since the next loaded
+ * element will always be the smaller, so adding to the skiplist
+ * will always immediately stop at the head, making the insertion
+ * O(1) instead of O(log(N)). */
+ zskiplistNode *zn = zsl->tail;
+ while (zn != NULL) {
+ if ((n = rdbSaveRawString(rdb,
+ (unsigned char*)zn->ele,sdslen(zn->ele))) == -1)
+ {
+ return -1;
+ }
nwritten += n;
- if ((n = rdbSaveDoubleValue(rdb,*score)) == -1) return -1;
+ if ((n = rdbSaveBinaryDoubleValue(rdb,zn->score)) == -1)
+ return -1;
nwritten += n;
+ zn = zn->backward;
}
- dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
- } else if (o->type == REDIS_HASH) {
+ } else if (o->type == OBJ_HASH) {
/* Save a hash value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
size_t l = ziplistBlobLen((unsigned char*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
dictIterator *di = dictGetIterator(o->ptr);
dictEntry *de;
@@ -575,22 +747,47 @@ int rdbSaveObject(rio *rdb, robj *o) {
nwritten += n;
while((de = dictNext(di)) != NULL) {
- robj *key = dictGetKey(de);
- robj *val = dictGetVal(de);
+ sds field = dictGetKey(de);
+ sds value = dictGetVal(de);
- if ((n = rdbSaveStringObject(rdb,key)) == -1) return -1;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
+ sdslen(field))) == -1) return -1;
nwritten += n;
- if ((n = rdbSaveStringObject(rdb,val)) == -1) return -1;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)value,
+ sdslen(value))) == -1) return -1;
nwritten += n;
}
dictReleaseIterator(di);
-
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
+ } else if (o->type == OBJ_MODULE) {
+ /* Save a module-specific value. */
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitIOContext(io,mt,rdb);
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ int retval = rdbSaveLen(rdb,mt->id);
+ if (retval == -1) return -1;
+ io.bytes += retval;
+
+ /* Then write the module-specific representation + EOF marker. */
+ mt->rdb_save(&io,mv->value);
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ if (retval == -1) return -1;
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? -1 : (ssize_t)io.bytes;
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
return nwritten;
}
@@ -599,9 +796,9 @@ int rdbSaveObject(rio *rdb, robj *o) {
* the rdbSaveObject() function. Currently we use a trick to get
* this length with very little changes to the code. In the future
* we could switch to a faster solution. */
-off_t rdbSavedObjectLen(robj *o) {
- int len = rdbSaveObject(NULL,o);
- redisAssertWithInfo(NULL,o,len != -1);
+size_t rdbSavedObjectLen(robj *o) {
+ ssize_t len = rdbSaveObject(NULL,o);
+ serverAssertWithInfo(NULL,o,len != -1);
return len;
}
@@ -616,7 +813,7 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
if (expiretime != -1) {
/* If this key is already expired skip it */
if (expiretime < now) return 0;
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
+ if (rdbSaveType(rdb,RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1;
}
@@ -627,45 +824,101 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
return 1;
}
-/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
-int rdbSave(char *filename) {
+/* Save an AUX field. */
+int rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ if (rdbSaveType(rdb,RDB_OPCODE_AUX) == -1) return -1;
+ if (rdbSaveRawString(rdb,key,keylen) == -1) return -1;
+ if (rdbSaveRawString(rdb,val,vallen) == -1) return -1;
+ return 1;
+}
+
+/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
+ * with strlen(). */
+int rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
+}
+
+/* Wrapper for strlen(key) + integer type (up to long long range). */
+int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[LONG_STR_SIZE];
+ int vlen = ll2string(buf,sizeof(buf),val);
+ return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
+}
+
+/* Save a few default AUX fields with information about the RDB generated. */
+int rdbSaveInfoAuxFields(rio *rdb, int flags, rdbSaveInfo *rsi) {
+ int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+ int aof_preamble = (flags & RDB_SAVE_AOF_PREAMBLE) != 0;
+
+ /* Add a few fields about the state when the RDB was created. */
+ if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+
+ /* Handle saving options that generate aux fields. */
+ if (rsi) {
+ if (rsi->repl_stream_db &&
+ rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db)
+ == -1)
+ {
+ return -1;
+ }
+ }
+ if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1;
+ if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid) == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset) == -1) return -1;
+ return 1;
+}
+
+/* Produces a dump of the database in RDB format sending it to the specified
+ * Redis I/O channel. On success C_OK is returned, otherwise C_ERR
+ * is returned and part of the output, or all the output, can be
+ * missing because of I/O errors.
+ *
+ * When the function returns C_ERR and if 'error' is not NULL, the
+ * integer pointed by 'error' is set to the value of errno just after the I/O
+ * error. */
+int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) {
dictIterator *di = NULL;
dictEntry *de;
- char tmpfile[256];
char magic[10];
int j;
long long now = mstime();
- FILE *fp;
- rio rdb;
uint64_t cksum;
+ size_t processed = 0;
- snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
- fp = fopen(tmpfile,"w");
- if (!fp) {
- redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",
- strerror(errno));
- return REDIS_ERR;
- }
-
- rioInitWithFile(&rdb,fp);
if (server.rdb_checksum)
- rdb.update_cksum = rioGenericUpdateChecksum;
- snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
- if (rdbWriteRaw(&rdb,magic,9) == -1) goto werr;
+ rdb->update_cksum = rioGenericUpdateChecksum;
+ snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION);
+ if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb,flags,rsi) == -1) goto werr;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
- if (!di) {
- fclose(fp);
- return REDIS_ERR;
- }
+ if (!di) return C_ERR;
/* Write the SELECT DB opcode */
- if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
- if (rdbSaveLen(&rdb,j) == -1) goto werr;
+ if (rdbSaveType(rdb,RDB_OPCODE_SELECTDB) == -1) goto werr;
+ if (rdbSaveLen(rdb,j) == -1) goto werr;
+
+ /* Write the RESIZE DB opcode. We trim the size to UINT32_MAX, which
+ * is currently the largest type we are able to represent in RDB sizes.
+ * However this does not limit the actual size of the DB to load since
+ * these sizes are just hints to resize the hash tables. */
+ uint32_t db_size, expires_size;
+ db_size = (dictSize(db->dict) <= UINT32_MAX) ?
+ dictSize(db->dict) :
+ UINT32_MAX;
+ expires_size = (dictSize(db->expires) <= UINT32_MAX) ?
+ dictSize(db->expires) :
+ UINT32_MAX;
+ if (rdbSaveType(rdb,RDB_OPCODE_RESIZEDB) == -1) goto werr;
+ if (rdbSaveLen(rdb,db_size) == -1) goto werr;
+ if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
@@ -675,20 +928,90 @@ int rdbSave(char *filename) {
initStaticStringObject(key,keystr);
expire = getExpire(db,&key);
- if (rdbSaveKeyValuePair(&rdb,&key,o,expire,now) == -1) goto werr;
+ if (rdbSaveKeyValuePair(rdb,&key,o,expire,now) == -1) goto werr;
+
+ /* When this RDB is produced as part of an AOF rewrite, move
+ * accumulated diff from parent to child while rewriting in
+ * order to have a smaller final write. */
+ if (flags & RDB_SAVE_AOF_PREAMBLE &&
+ rdb->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES)
+ {
+ processed = rdb->processed_bytes;
+ aofReadDiffFromParent();
+ }
}
dictReleaseIterator(di);
}
di = NULL; /* So that we don't release it again on error. */
/* EOF opcode */
- if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;
+ if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr;
/* CRC64 checksum. It will be zero if checksum computation is disabled, the
* loading code skips the check in this case. */
- cksum = rdb.cksum;
+ cksum = rdb->cksum;
memrev64ifbe(&cksum);
- rioWrite(&rdb,&cksum,8);
+ if (rioWrite(rdb,&cksum,8) == 0) goto werr;
+ return C_OK;
+
+werr:
+ if (error) *error = errno;
+ if (di) dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
+ * and a suffix to the generated RDB dump. The prefix is:
+ *
+ * $EOF:<40 bytes unguessable hex string>\r\n
+ *
+ * While the suffix is the 40 bytes hex string we announced in the prefix.
+ * This way processes receiving the payload can understand when it ends
+ * without doing any processing of the content. */
+int rdbSaveRioWithEOFMark(rio *rdb, int *error, rdbSaveInfo *rsi) {
+ char eofmark[RDB_EOF_MARK_SIZE];
+
+ getRandomHexChars(eofmark,RDB_EOF_MARK_SIZE);
+ if (error) *error = 0;
+ if (rioWrite(rdb,"$EOF:",5) == 0) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ if (rioWrite(rdb,"\r\n",2) == 0) goto werr;
+ if (rdbSaveRio(rdb,error,RDB_SAVE_NONE,rsi) == C_ERR) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ return C_OK;
+
+werr: /* Write error. */
+ /* Set 'error' only if not already set by rdbSaveRio() call. */
+ if (error && *error == 0) *error = errno;
+ return C_ERR;
+}
+
+/* Save the DB on disk. Return C_ERR on error, C_OK on success. */
+int rdbSave(char *filename, rdbSaveInfo *rsi) {
+ char tmpfile[256];
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+ FILE *fp;
+ rio rdb;
+ int error = 0;
+
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Failed opening the RDB file %s (in server root dir %s) "
+ "for saving: %s",
+ filename,
+ cwdp ? cwdp : "unknown",
+ strerror(errno));
+ return C_ERR;
+ }
+
+ rioInitWithFile(&rdb,fp);
+ if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE,rsi) == C_ERR) {
+ errno = error;
+ goto werr;
+ }
/* Make sure data will not remain on the OS's output buffers */
if (fflush(fp) == EOF) goto werr;
@@ -698,32 +1021,40 @@ int rdbSave(char *filename) {
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Error moving temp DB file %s on the final "
+ "destination %s (in server root dir %s): %s",
+ tmpfile,
+ filename,
+ cwdp ? cwdp : "unknown",
+ strerror(errno));
unlink(tmpfile);
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"DB saved on disk");
+
+ serverLog(LL_NOTICE,"DB saved on disk");
server.dirty = 0;
server.lastsave = time(NULL);
- server.lastbgsave_status = REDIS_OK;
- return REDIS_OK;
+ server.lastbgsave_status = C_OK;
+ return C_OK;
werr:
+ serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
- if (di) dictReleaseIterator(di);
- return REDIS_ERR;
+ return C_ERR;
}
-int rdbSaveBackground(char *filename) {
+int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
pid_t childpid;
long long start;
- if (server.rdb_child_pid != -1) return REDIS_ERR;
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
server.dirty_before_bgsave = server.dirty;
server.lastbgsave_try = time(NULL);
+ openChildInfoPipe();
start = ustime();
if ((childpid = fork()) == 0) {
@@ -732,90 +1063,119 @@ int rdbSaveBackground(char *filename) {
/* Child */
closeListeningSockets(0);
redisSetProcTitle("redis-rdb-bgsave");
- retval = rdbSave(filename);
- if (retval == REDIS_OK) {
- size_t private_dirty = zmalloc_get_private_dirty();
+ retval = rdbSave(filename,rsi);
+ if (retval == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"RDB: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
+
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_RDB);
}
- exitFromChild((retval == REDIS_OK) ? 0 : 1);
+ exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
server.stat_fork_time = ustime()-start;
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
- server.lastbgsave_status = REDIS_ERR;
- redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
+ closeChildInfoPipe();
+ server.lastbgsave_status = C_ERR;
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
+ serverLog(LL_NOTICE,"Background saving started by pid %d",childpid);
server.rdb_save_time_start = time(NULL);
server.rdb_child_pid = childpid;
+ server.rdb_child_type = RDB_CHILD_TYPE_DISK;
updateDictResizePolicy();
- return REDIS_OK;
+ return C_OK;
}
- return REDIS_OK; /* unreached */
+ return C_OK; /* unreached */
}
void rdbRemoveTempFile(pid_t childpid) {
char tmpfile[256];
- snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
+ snprintf(tmpfile,sizeof(tmpfile),"temp-%d.rdb", (int) childpid);
unlink(tmpfile);
}
+/* This function is called by rdbLoadObject() when the code is in RDB-check
+ * mode and we find a module value of type 2 that can be parsed without
+ * the need of the actual module. The value is parsed for errors, finally
+ * a dummy redis object is returned just to conform to the API. */
+robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) {
+ uint64_t opcode;
+ while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) {
+ if (opcode == RDB_MODULE_OPCODE_SINT ||
+ opcode == RDB_MODULE_OPCODE_UINT)
+ {
+ uint64_t len;
+ if (rdbLoadLenByRef(rdb,NULL,&len) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading integer from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_STRING) {
+ robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+ if (o == NULL) {
+ rdbExitReportCorruptRDB(
+ "Error reading string from module %s value", modulename);
+ }
+ decrRefCount(o);
+ } else if (opcode == RDB_MODULE_OPCODE_FLOAT) {
+ float val;
+ if (rdbLoadBinaryFloatValue(rdb,&val) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading float from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) {
+ double val;
+ if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading double from module %s value", modulename);
+ }
+ }
+ }
+ return createStringObject("module-dummy-value",18);
+}
+
/* Load a Redis object of the specified type from the specified file.
* On success a newly allocated object is returned, otherwise NULL. */
robj *rdbLoadObject(int rdbtype, rio *rdb) {
- robj *o, *ele, *dec;
- size_t len;
+ robj *o = NULL, *ele, *dec;
+ uint64_t len;
unsigned int i;
- if (rdbtype == REDIS_RDB_TYPE_STRING) {
+ if (rdbtype == RDB_TYPE_STRING) {
/* Read string value */
if ((o = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
o = tryObjectEncoding(o);
- } else if (rdbtype == REDIS_RDB_TYPE_LIST) {
+ } else if (rdbtype == RDB_TYPE_LIST) {
/* Read list value */
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
- /* Use a real list when there are too many entries */
- if (len > server.list_max_ziplist_entries) {
- o = createListObject();
- } else {
- o = createZiplistObject();
- }
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
/* Load every single element of the list */
while(len--) {
if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
-
- /* If we are using a ziplist and the value is too big, convert
- * the object to a real list. */
- if (o->encoding == REDIS_ENCODING_ZIPLIST &&
- sdsEncodedObject(ele) &&
- sdslen(ele->ptr) > server.list_max_ziplist_value)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
-
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- dec = getDecodedObject(ele);
- o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
- decrRefCount(dec);
- decrRefCount(ele);
- } else {
- ele = tryObjectEncoding(ele);
- listAddNodeTail(o->ptr,ele);
- }
+ dec = getDecodedObject(ele);
+ size_t len = sdslen(dec->ptr);
+ quicklistPushTail(o->ptr, dec->ptr, len);
+ decrRefCount(dec);
+ decrRefCount(ele);
}
- } else if (rdbtype == REDIS_RDB_TYPE_SET) {
- /* Read list/set value */
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ } else if (rdbtype == RDB_TYPE_SET) {
+ /* Read Set value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
/* Use a regular set when there are too many entries. */
if (len > server.set_max_intset_entries) {
@@ -828,140 +1188,150 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
o = createIntsetObject();
}
- /* Load every single element of the list/set */
+ /* Load every single element of the set */
for (i = 0; i < len; i++) {
long long llval;
- if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
+ sds sdsele;
+
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
- if (o->encoding == REDIS_ENCODING_INTSET) {
- /* Fetch integer value from element */
- if (isObjectRepresentableAsLongLong(ele,&llval) == REDIS_OK) {
+ if (o->encoding == OBJ_ENCODING_INTSET) {
+ /* Fetch integer value from element. */
+ if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
o->ptr = intsetAdd(o->ptr,llval,NULL);
} else {
- setTypeConvert(o,REDIS_ENCODING_HT);
+ setTypeConvert(o,OBJ_ENCODING_HT);
dictExpand(o->ptr,len);
}
}
/* This will also be called when the set was just converted
- * to a regular hash table encoded set */
- if (o->encoding == REDIS_ENCODING_HT) {
- dictAdd((dict*)o->ptr,ele,NULL);
+ * to a regular hash table encoded set. */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dictAdd((dict*)o->ptr,sdsele,NULL);
} else {
- decrRefCount(ele);
+ sdsfree(sdsele);
}
}
- } else if (rdbtype == REDIS_RDB_TYPE_ZSET) {
- /* Read list/set value */
- size_t zsetlen;
+ } else if (rdbtype == RDB_TYPE_ZSET_2 || rdbtype == RDB_TYPE_ZSET) {
+ /* Read list/set value. */
+ uint64_t zsetlen;
size_t maxelelen = 0;
zset *zs;
- if ((zsetlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((zsetlen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
o = createZsetObject();
zs = o->ptr;
- /* Load every single element of the list/set */
+ /* Load every single element of the sorted set. */
while(zsetlen--) {
- robj *ele;
+ sds sdsele;
double score;
zskiplistNode *znode;
- if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
- if (rdbLoadDoubleValue(rdb,&score) == -1) return NULL;
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+
+ if (rdbtype == RDB_TYPE_ZSET_2) {
+ if (rdbLoadBinaryDoubleValue(rdb,&score) == -1) return NULL;
+ } else {
+ if (rdbLoadDoubleValue(rdb,&score) == -1) return NULL;
+ }
/* Don't care about integer-encoded strings. */
- if (sdsEncodedObject(ele) && sdslen(ele->ptr) > maxelelen)
- maxelelen = sdslen(ele->ptr);
+ if (sdslen(sdsele) > maxelelen) maxelelen = sdslen(sdsele);
- znode = zslInsert(zs->zsl,score,ele);
- dictAdd(zs->dict,ele,&znode->score);
- incrRefCount(ele); /* added to skiplist */
+ znode = zslInsert(zs->zsl,score,sdsele);
+ dictAdd(zs->dict,sdsele,&znode->score);
}
/* Convert *after* loading, since sorted sets are not stored ordered. */
if (zsetLength(o) <= server.zset_max_ziplist_entries &&
maxelelen <= server.zset_max_ziplist_value)
- zsetConvert(o,REDIS_ENCODING_ZIPLIST);
- } else if (rdbtype == REDIS_RDB_TYPE_HASH) {
- size_t len;
+ zsetConvert(o,OBJ_ENCODING_ZIPLIST);
+ } else if (rdbtype == RDB_TYPE_HASH) {
+ uint64_t len;
int ret;
+ sds field, value;
len = rdbLoadLen(rdb, NULL);
- if (len == REDIS_RDB_LENERR) return NULL;
+ if (len == RDB_LENERR) return NULL;
o = createHashObject();
/* Too many entries? Use a hash table. */
if (len > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
/* Load every field and value into the ziplist */
- while (o->encoding == REDIS_ENCODING_ZIPLIST && len > 0) {
- robj *field, *value;
-
+ while (o->encoding == OBJ_ENCODING_ZIPLIST && len > 0) {
len--;
/* Load raw strings */
- field = rdbLoadStringObject(rdb);
- if (field == NULL) return NULL;
- redisAssert(sdsEncodedObject(field));
- value = rdbLoadStringObject(rdb);
- if (value == NULL) return NULL;
- redisAssert(sdsEncodedObject(value));
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
/* Add pair to ziplist */
- o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL);
- o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL);
+ o->ptr = ziplistPush(o->ptr, (unsigned char*)field,
+ sdslen(field), ZIPLIST_TAIL);
+ o->ptr = ziplistPush(o->ptr, (unsigned char*)value,
+ sdslen(value), ZIPLIST_TAIL);
+
/* Convert to hash table if size threshold is exceeded */
- if (sdslen(field->ptr) > server.hash_max_ziplist_value ||
- sdslen(value->ptr) > server.hash_max_ziplist_value)
+ if (sdslen(field) > server.hash_max_ziplist_value ||
+ sdslen(value) > server.hash_max_ziplist_value)
{
- decrRefCount(field);
- decrRefCount(value);
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ sdsfree(field);
+ sdsfree(value);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
}
- decrRefCount(field);
- decrRefCount(value);
+ sdsfree(field);
+ sdsfree(value);
}
/* Load remaining fields and values into the hash table */
- while (o->encoding == REDIS_ENCODING_HT && len > 0) {
- robj *field, *value;
-
+ while (o->encoding == OBJ_ENCODING_HT && len > 0) {
len--;
/* Load encoded strings */
- field = rdbLoadEncodedStringObject(rdb);
- if (field == NULL) return NULL;
- value = rdbLoadEncodedStringObject(rdb);
- if (value == NULL) return NULL;
-
- field = tryObjectEncoding(field);
- value = tryObjectEncoding(value);
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
/* Add pair to hash table */
ret = dictAdd((dict*)o->ptr, field, value);
- redisAssert(ret == REDIS_OK);
+ if (ret == DICT_ERR) {
+ rdbExitReportCorruptRDB("Duplicate keys detected");
+ }
}
/* All pairs should be read by now */
- redisAssert(len == 0);
-
- } else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP ||
- rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST ||
- rdbtype == REDIS_RDB_TYPE_SET_INTSET ||
- rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST ||
- rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST)
+ serverAssert(len == 0);
+ } else if (rdbtype == RDB_TYPE_LIST_QUICKLIST) {
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ o = createQuicklistObject();
+ quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+
+ while (len--) {
+ unsigned char *zl =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL);
+ if (zl == NULL) return NULL;
+ quicklistAppendZiplist(o->ptr, zl);
+ }
+ } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
+ rdbtype == RDB_TYPE_LIST_ZIPLIST ||
+ rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
+ rdbtype == RDB_TYPE_HASH_ZIPLIST)
{
- robj *aux = rdbLoadStringObject(rdb);
-
- if (aux == NULL) return NULL;
- o = createObject(REDIS_STRING,NULL); /* string is just placeholder */
- o->ptr = zmalloc(sdslen(aux->ptr));
- memcpy(o->ptr,aux->ptr,sdslen(aux->ptr));
- decrRefCount(aux);
+ unsigned char *encoded =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL);
+ if (encoded == NULL) return NULL;
+ o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
/* Fix the object encoding, and make sure to convert the encoded
* data type into the base type if accordingly to the current
@@ -970,7 +1340,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
* size as this is an O(N) scan. Eventually everything will get
* converted. */
switch(rdbtype) {
- case REDIS_RDB_TYPE_HASH_ZIPMAP:
+ case RDB_TYPE_HASH_ZIPMAP:
/* Convert to ziplist encoded hash. This must be deprecated
* when loading dumps created by Redis 2.4 gets deprecated. */
{
@@ -989,46 +1359,84 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
zfree(o->ptr);
o->ptr = zl;
- o->type = REDIS_HASH;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (hashTypeLength(o) > server.hash_max_ziplist_entries ||
maxlen > server.hash_max_ziplist_value)
{
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
}
}
break;
- case REDIS_RDB_TYPE_LIST_ZIPLIST:
- o->type = REDIS_LIST;
- o->encoding = REDIS_ENCODING_ZIPLIST;
- if (ziplistLen(o->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
+ case RDB_TYPE_LIST_ZIPLIST:
+ o->type = OBJ_LIST;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
+ listTypeConvert(o,OBJ_ENCODING_QUICKLIST);
break;
- case REDIS_RDB_TYPE_SET_INTSET:
- o->type = REDIS_SET;
- o->encoding = REDIS_ENCODING_INTSET;
+ case RDB_TYPE_SET_INTSET:
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_INTSET;
if (intsetLen(o->ptr) > server.set_max_intset_entries)
- setTypeConvert(o,REDIS_ENCODING_HT);
+ setTypeConvert(o,OBJ_ENCODING_HT);
break;
- case REDIS_RDB_TYPE_ZSET_ZIPLIST:
- o->type = REDIS_ZSET;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ case RDB_TYPE_ZSET_ZIPLIST:
+ o->type = OBJ_ZSET;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (zsetLength(o) > server.zset_max_ziplist_entries)
- zsetConvert(o,REDIS_ENCODING_SKIPLIST);
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
break;
- case REDIS_RDB_TYPE_HASH_ZIPLIST:
- o->type = REDIS_HASH;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ case RDB_TYPE_HASH_ZIPLIST:
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (hashTypeLength(o) > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
default:
- redisPanic("Unknown encoding");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
break;
}
+ } else if (rdbtype == RDB_TYPE_MODULE || rdbtype == RDB_TYPE_MODULE_2) {
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+ char name[10];
+
+ if (rdbCheckMode && rdbtype == RDB_TYPE_MODULE_2)
+ return rdbLoadCheckModuleValue(rdb,name);
+
+ if (mt == NULL) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module data I can't load: no matching module '%s'", name);
+ exit(1);
+ }
+ RedisModuleIO io;
+ moduleInitIOContext(io,mt,rdb);
+ io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2;
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ void *ptr = mt->rdb_load(&io,moduleid&1023);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+
+ /* Module v2 serialization has an EOF mark at the end. */
+ if (io.ver == 2) {
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ serverLog(LL_WARNING,"The RDB file contains module data for the module '%s' that is not terminated by the proper module value EOF marker", name);
+ exit(1);
+ }
+ }
+
+ if (ptr == NULL) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);
+ exit(1);
+ }
+ o = createModuleObject(mt,ptr);
} else {
- redisPanic("Unknown object type");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
}
return o;
}
@@ -1041,8 +1449,9 @@ void startLoading(FILE *fp) {
/* Load the DB */
server.loading = 1;
server.loading_start_time = time(NULL);
+ server.loading_loaded_bytes = 0;
if (fstat(fileno(fp), &sb) == -1) {
- server.loading_total_bytes = 1; /* just to avoid division by zero */
+ server.loading_total_bytes = 0;
} else {
server.loading_total_bytes = sb.st_size;
}
@@ -1072,83 +1481,131 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
* our cached time since it is used to create and update the last
* interaction time with clients and for other important things. */
updateCachedTime();
- if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER)
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER)
replicationSendNewlineToMaster();
loadingProgress(r->processed_bytes);
processEventsWhileBlocked();
}
}
-int rdbLoad(char *filename) {
- uint32_t dbid;
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned and 'errno' is set accordingly. */
+int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi) {
+ uint64_t dbid;
int type, rdbver;
redisDb *db = server.db+0;
char buf[1024];
long long expiretime, now = mstime();
- FILE *fp;
- rio rdb;
-
- if ((fp = fopen(filename,"r")) == NULL) return REDIS_ERR;
- rioInitWithFile(&rdb,fp);
- rdb.update_cksum = rdbLoadProgressCallback;
- rdb.max_processing_chunk = server.loading_process_events_interval_bytes;
- if (rioRead(&rdb,buf,9) == 0) goto eoferr;
+ rdb->update_cksum = rdbLoadProgressCallback;
+ rdb->max_processing_chunk = server.loading_process_events_interval_bytes;
+ if (rioRead(rdb,buf,9) == 0) goto eoferr;
buf[9] = '\0';
if (memcmp(buf,"REDIS",5) != 0) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
+ serverLog(LL_WARNING,"Wrong signature trying to load DB from file");
errno = EINVAL;
- return REDIS_ERR;
+ return C_ERR;
}
rdbver = atoi(buf+5);
- if (rdbver < 1 || rdbver > REDIS_RDB_VERSION) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver);
errno = EINVAL;
- return REDIS_ERR;
+ return C_ERR;
}
- startLoading(fp);
while(1) {
robj *key, *val;
expiretime = -1;
/* Read type. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- if (type == REDIS_RDB_OPCODE_EXPIRETIME) {
- if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ if ((expiretime = rdbLoadTime(rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
/* the EXPIRETIME opcode specifies time in seconds, so convert
* into milliseconds. */
expiretime *= 1000;
- } else if (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
- /* Milliseconds precision expire times introduced with RDB
- * version 3. */
- if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ if ((expiretime = rdbLoadMillisecondTime(rdb)) == -1) goto eoferr;
/* We read the time so we need to read the object type again. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- }
-
- if (type == REDIS_RDB_OPCODE_EOF)
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
break;
-
- /* Handle SELECT DB opcode as a special case */
- if (type == REDIS_RDB_OPCODE_SELECTDB) {
- if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
goto eoferr;
if (dbid >= (unsigned)server.dbnum) {
- redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
+ serverLog(LL_WARNING,
+ "FATAL: Data file was created with a Redis "
+ "server configured to handle more than %d "
+ "databases. Exiting\n", server.dbnum);
exit(1);
}
db = server.db+dbid;
- continue;
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ dictExpand(db->dict,db_size);
+ dictExpand(db->expires,expires_size);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are requierd to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
+
+ if (((char*)auxkey->ptr)[0] == '%') {
+ /* All the fields with a name staring with '%' are considered
+ * information fields and are logged at startup with a log
+ * level of NOTICE. */
+ serverLog(LL_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) {
+ if (rsi) rsi->repl_stream_db = atoi(auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-id")) {
+ if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) {
+ memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1);
+ rsi->repl_id_is_set = 1;
+ }
+ } else if (!strcasecmp(auxkey->ptr,"repl-offset")) {
+ if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10);
+ } else {
+ /* We ignore fields we don't understand, as by AUX field
+ * contract. */
+ serverLog(LL_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
+ }
+
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
}
+
/* Read key */
- if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
/* Read value */
- if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
+ if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr;
/* Check if the key already expired. This function is used when loading
* an RDB file from disk, either at startup, or when an RDB was
* received from the master. In the latter case, the master is
@@ -1163,49 +1620,68 @@ int rdbLoad(char *filename) {
dbAdd(db,key,val);
/* Set the expire time if needed */
- if (expiretime != -1) setExpire(db,key,expiretime);
+ if (expiretime != -1) setExpire(NULL,db,key,expiretime);
decrRefCount(key);
}
/* Verify the checksum if RDB version is >= 5 */
if (rdbver >= 5 && server.rdb_checksum) {
- uint64_t cksum, expected = rdb.cksum;
+ uint64_t cksum, expected = rdb->cksum;
- if (rioRead(&rdb,&cksum,8) == 0) goto eoferr;
+ if (rioRead(rdb,&cksum,8) == 0) goto eoferr;
memrev64ifbe(&cksum);
if (cksum == 0) {
- redisLog(REDIS_WARNING,"RDB file was saved with checksum disabled: no check performed.");
+ serverLog(LL_WARNING,"RDB file was saved with checksum disabled: no check performed.");
} else if (cksum != expected) {
- redisLog(REDIS_WARNING,"Wrong RDB checksum. Aborting now.");
- exit(1);
+ serverLog(LL_WARNING,"Wrong RDB checksum. Aborting now.");
+ rdbExitReportCorruptRDB("RDB CRC error");
}
}
+ return C_OK;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ serverLog(LL_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
+ rdbExitReportCorruptRDB("Unexpected EOF reading RDB file");
+ return C_ERR; /* Just to avoid warning */
+}
+
+/* Like rdbLoadRio() but takes a filename instead of a rio stream. The
+ * filename is open for reading and a rio stream object created in order
+ * to do the actual loading. Moreover the ETA displayed in the INFO
+ * output is initialized and finalized.
+ *
+ * If you pass an 'rsi' structure initialied with RDB_SAVE_OPTION_INIT, the
+ * loading code will fiil the information fields in the structure. */
+int rdbLoad(char *filename, rdbSaveInfo *rsi) {
+ FILE *fp;
+ rio rdb;
+ int retval;
+ if ((fp = fopen(filename,"r")) == NULL) return C_ERR;
+ startLoading(fp);
+ rioInitWithFile(&rdb,fp);
+ retval = rdbLoadRio(&rdb,rsi);
fclose(fp);
stopLoading();
- return REDIS_OK;
-
-eoferr: /* unexpected end of file is handled here with a fatal exit */
- redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
- exit(1);
- return REDIS_ERR; /* Just to avoid warning */
+ return retval;
}
-/* A background saving child (BGSAVE) terminated its work. Handle this. */
-void backgroundSaveDoneHandler(int exitcode, int bysignal) {
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of actual BGSAVEs. */
+void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
if (!bysignal && exitcode == 0) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background saving terminated with success");
server.dirty = server.dirty - server.dirty_before_bgsave;
server.lastsave = time(NULL);
- server.lastbgsave_status = REDIS_OK;
+ server.lastbgsave_status = C_OK;
} else if (!bysignal && exitcode != 0) {
- redisLog(REDIS_WARNING, "Background saving error");
- server.lastbgsave_status = REDIS_ERR;
+ serverLog(LL_WARNING, "Background saving error");
+ server.lastbgsave_status = C_ERR;
} else {
mstime_t latency;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Background saving terminated by signal %d", bysignal);
latencyStartMonitor(latency);
rdbRemoveTempFile(server.rdb_child_pid);
@@ -1214,34 +1690,328 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
* tirggering an error conditon. */
if (bysignal != SIGUSR1)
- server.lastbgsave_status = REDIS_ERR;
+ server.lastbgsave_status = C_ERR;
}
server.rdb_child_pid = -1;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
server.rdb_save_time_start = -1;
/* Possibly there are slaves waiting for a BGSAVE in order to be served
* (the first stage of SYNC is a bulk transfer of dump.rdb) */
- updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? REDIS_OK : REDIS_ERR);
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, RDB_CHILD_TYPE_DISK);
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this.
+ * This function covers the case of RDB -> Salves socket transfers for
+ * diskless replication. */
+void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
+ uint64_t *ok_slaves;
+
+ if (!bysignal && exitcode == 0) {
+ serverLog(LL_NOTICE,
+ "Background RDB transfer terminated with success");
+ } else if (!bysignal && exitcode != 0) {
+ serverLog(LL_WARNING, "Background transfer error");
+ } else {
+ serverLog(LL_WARNING,
+ "Background transfer terminated by signal %d", bysignal);
+ }
+ server.rdb_child_pid = -1;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_save_time_start = -1;
+
+ /* If the child returns an OK exit code, read the set of slave client
+ * IDs and the associated status code. We'll terminate all the slaves
+ * in error state.
+ *
+ * If the process returned an error, consider the list of slaves that
+ * can continue to be emtpy, so that it's just a special case of the
+ * normal code path. */
+ ok_slaves = zmalloc(sizeof(uint64_t)); /* Make space for the count. */
+ ok_slaves[0] = 0;
+ if (!bysignal && exitcode == 0) {
+ int readlen = sizeof(uint64_t);
+
+ if (read(server.rdb_pipe_read_result_from_child, ok_slaves, readlen) ==
+ readlen)
+ {
+ readlen = ok_slaves[0]*sizeof(uint64_t)*2;
+
+ /* Make space for enough elements as specified by the first
+ * uint64_t element in the array. */
+ ok_slaves = zrealloc(ok_slaves,sizeof(uint64_t)+readlen);
+ if (readlen &&
+ read(server.rdb_pipe_read_result_from_child, ok_slaves+1,
+ readlen) != readlen)
+ {
+ ok_slaves[0] = 0;
+ }
+ }
+ }
+
+ close(server.rdb_pipe_read_result_from_child);
+ close(server.rdb_pipe_write_result_to_parent);
+
+ /* We can continue the replication process with all the slaves that
+ * correctly received the full payload. Others are terminated. */
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
+ uint64_t j;
+ int errorcode = 0;
+
+ /* Search for the slave ID in the reply. In order for a slave to
+ * continue the replication process, we need to find it in the list,
+ * and it must have an error code set to 0 (which means success). */
+ for (j = 0; j < ok_slaves[0]; j++) {
+ if (slave->id == ok_slaves[2*j+1]) {
+ errorcode = ok_slaves[2*j+2];
+ break; /* Found in slaves list. */
+ }
+ }
+ if (j == ok_slaves[0] || errorcode != 0) {
+ serverLog(LL_WARNING,
+ "Closing slave %s: child->slave RDB transfer failed: %s",
+ replicationGetSlaveName(slave),
+ (errorcode == 0) ? "RDB transfer child aborted"
+ : strerror(errorcode));
+ freeClient(slave);
+ } else {
+ serverLog(LL_WARNING,
+ "Slave %s correctly received the streamed RDB file.",
+ replicationGetSlaveName(slave));
+ /* Restore the socket as non-blocking. */
+ anetNonBlock(NULL,slave->fd);
+ anetSendTimeout(NULL,slave->fd,0);
+ }
+ }
+ }
+ zfree(ok_slaves);
+
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, RDB_CHILD_TYPE_SOCKET);
}
-void saveCommand(redisClient *c) {
+/* When a background RDB saving/transfer terminates, call the right handler. */
+void backgroundSaveDoneHandler(int exitcode, int bysignal) {
+ switch(server.rdb_child_type) {
+ case RDB_CHILD_TYPE_DISK:
+ backgroundSaveDoneHandlerDisk(exitcode,bysignal);
+ break;
+ case RDB_CHILD_TYPE_SOCKET:
+ backgroundSaveDoneHandlerSocket(exitcode,bysignal);
+ break;
+ default:
+ serverPanic("Unknown RDB child type.");
+ break;
+ }
+}
+
+/* Spawn an RDB child that writes the RDB to the sockets of the slaves
+ * that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
+int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
+ int *fds;
+ uint64_t *clientids;
+ int numfds;
+ listNode *ln;
+ listIter li;
+ pid_t childpid;
+ long long start;
+ int pipefds[2];
+
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
+
+ /* Before to fork, create a pipe that will be used in order to
+ * send back to the parent the IDs of the slaves that successfully
+ * received all the writes. */
+ if (pipe(pipefds) == -1) return C_ERR;
+ server.rdb_pipe_read_result_from_child = pipefds[0];
+ server.rdb_pipe_write_result_to_parent = pipefds[1];
+
+ /* Collect the file descriptors of the slaves we want to transfer
+ * the RDB to, which are i WAIT_BGSAVE_START state. */
+ fds = zmalloc(sizeof(int)*listLength(server.slaves));
+ /* We also allocate an array of corresponding client IDs. This will
+ * be useful for the child process in order to build the report
+ * (sent via unix pipe) that will be sent to the parent. */
+ clientids = zmalloc(sizeof(uint64_t)*listLength(server.slaves));
+ numfds = 0;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ clientids[numfds] = slave->id;
+ fds[numfds++] = slave->fd;
+ replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
+ /* Put the socket in blocking mode to simplify RDB transfer.
+ * We'll restore it when the children returns (since duped socket
+ * will share the O_NONBLOCK attribute with the parent). */
+ anetBlock(NULL,slave->fd);
+ anetSendTimeout(NULL,slave->fd,server.repl_timeout*1000);
+ }
+ }
+
+ /* Create the child process. */
+ openChildInfoPipe();
+ start = ustime();
+ if ((childpid = fork()) == 0) {
+ /* Child */
+ int retval;
+ rio slave_sockets;
+
+ rioInitWithFdset(&slave_sockets,fds,numfds);
+ zfree(fds);
+
+ closeListeningSockets(0);
+ redisSetProcTitle("redis-rdb-to-slaves");
+
+ retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL,rsi);
+ if (retval == C_OK && rioFlush(&slave_sockets) == 0)
+ retval = C_ERR;
+
+ if (retval == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
+
+ if (private_dirty) {
+ serverLog(LL_NOTICE,
+ "RDB: %zu MB of memory used by copy-on-write",
+ private_dirty/(1024*1024));
+ }
+
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_RDB);
+
+ /* If we are returning OK, at least one slave was served
+ * with the RDB file as expected, so we need to send a report
+ * to the parent via the pipe. The format of the message is:
+ *
+ * <len> <slave[0].id> <slave[0].error> ...
+ *
+ * len, slave IDs, and slave errors, are all uint64_t integers,
+ * so basically the reply is composed of 64 bits for the len field
+ * plus 2 additional 64 bit integers for each entry, for a total
+ * of 'len' entries.
+ *
+ * The 'id' represents the slave's client ID, so that the master
+ * can match the report with a specific slave, and 'error' is
+ * set to 0 if the replication process terminated with a success
+ * or the error code if an error occurred. */
+ void *msg = zmalloc(sizeof(uint64_t)*(1+2*numfds));
+ uint64_t *len = msg;
+ uint64_t *ids = len+1;
+ int j, msglen;
+
+ *len = numfds;
+ for (j = 0; j < numfds; j++) {
+ *ids++ = clientids[j];
+ *ids++ = slave_sockets.io.fdset.state[j];
+ }
+
+ /* Write the message to the parent. If we have no good slaves or
+ * we are unable to transfer the message to the parent, we exit
+ * with an error so that the parent will abort the replication
+ * process with all the childre that were waiting. */
+ msglen = sizeof(uint64_t)*(1+2*numfds);
+ if (*len == 0 ||
+ write(server.rdb_pipe_write_result_to_parent,msg,msglen)
+ != msglen)
+ {
+ retval = C_ERR;
+ }
+ zfree(msg);
+ }
+ zfree(clientids);
+ rioFreeFdset(&slave_sockets);
+ exitFromChild((retval == C_OK) ? 0 : 1);
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+
+ /* Undo the state change. The caller will perform cleanup on
+ * all the slaves in BGSAVE_START state, but an early call to
+ * replicationSetupSlaveForFullResync() turned it into BGSAVE_END */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ int j;
+
+ for (j = 0; j < numfds; j++) {
+ if (slave->id == clientids[j]) {
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ break;
+ }
+ }
+ }
+ close(pipefds[0]);
+ close(pipefds[1]);
+ closeChildInfoPipe();
+ } else {
+ server.stat_fork_time = ustime()-start;
+ server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
+ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
+
+ serverLog(LL_NOTICE,"Background RDB transfer started by pid %d",
+ childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_pid = childpid;
+ server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
+ updateDictResizePolicy();
+ }
+ zfree(clientids);
+ zfree(fds);
+ return (childpid == -1) ? C_ERR : C_OK;
+ }
+ return C_OK; /* Unreached. */
+}
+
+void saveCommand(client *c) {
if (server.rdb_child_pid != -1) {
addReplyError(c,"Background save already in progress");
return;
}
- if (rdbSave(server.rdb_filename) == REDIS_OK) {
+ if (rdbSave(server.rdb_filename,NULL) == C_OK) {
addReply(c,shared.ok);
} else {
addReply(c,shared.err);
}
}
-void bgsaveCommand(redisClient *c) {
+/* BGSAVE [SCHEDULE] */
+void bgsaveCommand(client *c) {
+ int schedule = 0;
+
+ /* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite
+ * is in progress. Instead of returning an error a BGSAVE gets scheduled. */
+ if (c->argc > 1) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
+ schedule = 1;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+
if (server.rdb_child_pid != -1) {
addReplyError(c,"Background save already in progress");
} else if (server.aof_child_pid != -1) {
- addReplyError(c,"Can't BGSAVE while AOF log rewriting is in progress");
- } else if (rdbSaveBackground(server.rdb_filename) == REDIS_OK) {
+ if (schedule) {
+ server.rdb_bgsave_scheduled = 1;
+ addReplyStatus(c,"Background saving scheduled");
+ } else {
+ addReplyError(c,
+ "An AOF log rewriting in progress: can't BGSAVE right now. "
+ "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
+ "possible.");
+ }
+ } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) {
addReplyStatus(c,"Background saving started");
} else {
addReply(c,shared.err);
diff --git a/src/rdb.h b/src/rdb.h
index 54ee4e514..a22cb33ce 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -27,88 +27,125 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __REDIS_RDB_H
-#define __REDIS_RDB_H
+#ifndef __RDB_H
+#define __RDB_H
#include <stdio.h>
#include "rio.h"
/* TBD: include only necessary headers. */
-#include "redis.h"
+#include "server.h"
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define REDIS_RDB_VERSION 6
+#define RDB_VERSION 8
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
* the first byte to interpreter the length:
*
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
+ * 00|XXXXXX => if the two MSB are 00 the len is the 6 bits of this byte
+ * 01|XXXXXX XXXXXXXX => 01, the len is 14 byes, 6 bits + 8 bits of next byte
+ * 10|000000 [32 bit integer] => A full 32 bit len in net byte order will follow
+ * 10|000001 [64 bit integer] => A full 64 bit len in net byte order will follow
+ * 11|OBKIND this means: specially encoded object will follow. The six bits
* number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
+ * See the RDB_ENC_* defines.
*
* Lengths up to 63 are stored using a single byte, most DB keys, and may
* values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
+#define RDB_6BITLEN 0
+#define RDB_14BITLEN 1
+#define RDB_32BITLEN 0x80
+#define RDB_64BITLEN 0x81
+#define RDB_ENCVAL 3
+#define RDB_LENERR UINT64_MAX
/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
+ * set, the remaining six bits specify a special encoding for the object
* accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+#define RDB_ENC_INT8 0 /* 8 bit signed integer */
+#define RDB_ENC_INT16 1 /* 16 bit signed integer */
+#define RDB_ENC_INT32 2 /* 32 bit signed integer */
+#define RDB_ENC_LZF 3 /* string compressed with FASTLZ */
/* Dup object types to RDB object types. Only reason is readability (are we
* dealing with RDB types or with in-memory object types?). */
-#define REDIS_RDB_TYPE_STRING 0
-#define REDIS_RDB_TYPE_LIST 1
-#define REDIS_RDB_TYPE_SET 2
-#define REDIS_RDB_TYPE_ZSET 3
-#define REDIS_RDB_TYPE_HASH 4
+#define RDB_TYPE_STRING 0
+#define RDB_TYPE_LIST 1
+#define RDB_TYPE_SET 2
+#define RDB_TYPE_ZSET 3
+#define RDB_TYPE_HASH 4
+#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */
+#define RDB_TYPE_MODULE 6
+#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without
+ the generating module being loaded. */
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Object types for encoded objects. */
-#define REDIS_RDB_TYPE_HASH_ZIPMAP 9
-#define REDIS_RDB_TYPE_LIST_ZIPLIST 10
-#define REDIS_RDB_TYPE_SET_INTSET 11
-#define REDIS_RDB_TYPE_ZSET_ZIPLIST 12
-#define REDIS_RDB_TYPE_HASH_ZIPLIST 13
+#define RDB_TYPE_HASH_ZIPMAP 9
+#define RDB_TYPE_LIST_ZIPLIST 10
+#define RDB_TYPE_SET_INTSET 11
+#define RDB_TYPE_ZSET_ZIPLIST 12
+#define RDB_TYPE_HASH_ZIPLIST 13
+#define RDB_TYPE_LIST_QUICKLIST 14
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 13))
+#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 14))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
-#define REDIS_RDB_OPCODE_EXPIRETIME_MS 252
-#define REDIS_RDB_OPCODE_EXPIRETIME 253
-#define REDIS_RDB_OPCODE_SELECTDB 254
-#define REDIS_RDB_OPCODE_EOF 255
+#define RDB_OPCODE_AUX 250
+#define RDB_OPCODE_RESIZEDB 251
+#define RDB_OPCODE_EXPIRETIME_MS 252
+#define RDB_OPCODE_EXPIRETIME 253
+#define RDB_OPCODE_SELECTDB 254
+#define RDB_OPCODE_EOF 255
+
+/* Module serialized values sub opcodes */
+#define RDB_MODULE_OPCODE_EOF 0 /* End of module value. */
+#define RDB_MODULE_OPCODE_SINT 1 /* Signed integer. */
+#define RDB_MODULE_OPCODE_UINT 2 /* Unsigned integer. */
+#define RDB_MODULE_OPCODE_FLOAT 3 /* Float. */
+#define RDB_MODULE_OPCODE_DOUBLE 4 /* Double. */
+#define RDB_MODULE_OPCODE_STRING 5 /* String. */
+
+/* rdbLoad...() functions flags. */
+#define RDB_LOAD_NONE 0
+#define RDB_LOAD_ENC (1<<0)
+#define RDB_LOAD_PLAIN (1<<1)
+#define RDB_LOAD_SDS (1<<2)
+
+#define RDB_SAVE_NONE 0
+#define RDB_SAVE_AOF_PREAMBLE (1<<0)
int rdbSaveType(rio *rdb, unsigned char type);
int rdbLoadType(rio *rdb);
int rdbSaveTime(rio *rdb, time_t t);
time_t rdbLoadTime(rio *rdb);
-int rdbSaveLen(rio *rdb, uint32_t len);
-uint32_t rdbLoadLen(rio *rdb, int *isencoded);
+int rdbSaveLen(rio *rdb, uint64_t len);
+uint64_t rdbLoadLen(rio *rdb, int *isencoded);
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
int rdbSaveObjectType(rio *rdb, robj *o);
int rdbLoadObjectType(rio *rdb);
-int rdbLoad(char *filename);
-int rdbSaveBackground(char *filename);
+int rdbLoad(char *filename, rdbSaveInfo *rsi);
+int rdbSaveBackground(char *filename, rdbSaveInfo *rsi);
+int rdbSaveToSlavesSockets(rdbSaveInfo *rsi);
void rdbRemoveTempFile(pid_t childpid);
-int rdbSave(char *filename);
-int rdbSaveObject(rio *rdb, robj *o);
-off_t rdbSavedObjectLen(robj *o);
-off_t rdbSavedObjectPages(robj *o);
+int rdbSave(char *filename, rdbSaveInfo *rsi);
+ssize_t rdbSaveObject(rio *rdb, robj *o);
+size_t rdbSavedObjectLen(robj *o);
robj *rdbLoadObject(int type, rio *rdb);
void backgroundSaveDoneHandler(int exitcode, int bysignal);
int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, long long now);
robj *rdbLoadStringObject(rio *rdb);
+int rdbSaveStringObject(rio *rdb, robj *obj);
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len);
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr);
+int rdbSaveBinaryDoubleValue(rio *rdb, double val);
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val);
+int rdbSaveBinaryFloatValue(rio *rdb, float val);
+int rdbLoadBinaryFloatValue(rio *rdb, float *val);
+int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi);
#endif
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 5ab2625d2..dec8ecb52 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -40,13 +40,13 @@
#include <signal.h>
#include <assert.h>
+#include <sds.h> /* Use hiredis sds. */
#include "ae.h"
#include "hiredis.h"
-#include "sds.h"
#include "adlist.h"
#include "zmalloc.h"
-#define REDIS_NOTUSED(V) ((void) V)
+#define UNUSED(V) ((void) V)
#define RANDPTR_INITIAL_SIZE 8
static struct config {
@@ -65,6 +65,7 @@ static struct config {
int randomkeys_keyspacelen;
int keepalive;
int pipeline;
+ int showerrors;
long long start;
long long totlatency;
long long *latency;
@@ -77,6 +78,7 @@ static struct config {
int dbnum;
sds dbnumstr;
char *tests;
+ char *auth;
} config;
typedef struct _client {
@@ -85,13 +87,14 @@ typedef struct _client {
char **randptr; /* Pointers to :rand: strings inside the command buf */
size_t randlen; /* Number of pointers in client->randptr */
size_t randfree; /* Number of unused pointers in client->randptr */
- unsigned int written; /* Bytes of 'obuf' already written */
+ size_t written; /* Bytes of 'obuf' already written */
long long start; /* Start time of a request */
long long latency; /* Request latency */
int pending; /* Number of pending requests (replies to consume) */
- int selectlen; /* If non-zero, a SELECT of 'selectlen' bytes is currently
- used as a prefix of the pipline of commands. This gets
- discarded the first time it's sent. */
+ int prefix_pending; /* If non-zero, number of pending prefix commands. Commands
+ such as auth and select are prefixed to the pipeline of
+ benchmark commands and discarded after the first send. */
+ int prefixlen; /* Size in bytes of the pending prefix commands */
} *client;
/* Prototypes */
@@ -186,9 +189,9 @@ static void clientDone(client c) {
static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
client c = privdata;
void *reply = NULL;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(fd);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
/* Calculate latency only for the first read event. This means that the
* server already sent the reply and we need to parse it. Parsing overhead
@@ -210,20 +213,31 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
exit(1);
}
- freeReplyObject(reply);
-
- if (c->selectlen) {
- int j;
+ if (config.showerrors) {
+ static time_t lasterr_time = 0;
+ time_t now = time(NULL);
+ redisReply *r = reply;
+ if (r->type == REDIS_REPLY_ERROR && lasterr_time != now) {
+ lasterr_time = now;
+ printf("Error from server: %s\n", r->str);
+ }
+ }
- /* This is the OK from SELECT. Just discard the SELECT
- * from the buffer. */
+ freeReplyObject(reply);
+ /* This is an OK for prefix commands such as auth and select.*/
+ if (c->prefix_pending > 0) {
+ c->prefix_pending--;
c->pending--;
- sdsrange(c->obuf,c->selectlen,-1);
- /* We also need to fix the pointers to the strings
- * we need to randomize. */
- for (j = 0; j < c->randlen; j++)
- c->randptr[j] -= c->selectlen;
- c->selectlen = 0;
+ /* Discard prefix commands on first response.*/
+ if (c->prefixlen > 0) {
+ size_t j;
+ sdsrange(c->obuf, c->prefixlen, -1);
+ /* We also need to fix the pointers to the strings
+ * we need to randomize. */
+ for (j = 0; j < c->randlen; j++)
+ c->randptr[j] -= c->prefixlen;
+ c->prefixlen = 0;
+ }
continue;
}
@@ -243,9 +257,9 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
client c = privdata;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(fd);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
/* Initialize request when nothing was written. */
if (c->written == 0) {
@@ -263,7 +277,7 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (sdslen(c->obuf) > c->written) {
void *ptr = c->obuf+c->written;
- int nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
+ ssize_t nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
if (nwritten == -1) {
if (errno != EPIPE)
fprintf(stderr, "Writing to socket: %s\n", strerror(errno));
@@ -298,8 +312,7 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* 2) The offsets of the __rand_int__ elements inside the command line, used
* for arguments randomization.
*
- * Even when cloning another client, the SELECT command is automatically prefixed
- * if needed. */
+ * Even when cloning another client, prefix commands are applied if needed.*/
static client createClient(char *cmd, size_t len, client from) {
int j;
client c = zmalloc(sizeof(struct _client));
@@ -324,6 +337,17 @@ static client createClient(char *cmd, size_t len, client from) {
* Queue N requests accordingly to the pipeline size, or simply clone
* the example client buffer. */
c->obuf = sdsempty();
+ /* Prefix the request buffer with AUTH and/or SELECT commands, if applicable.
+ * These commands are discarded after the first response, so if the client is
+ * reused the commands will not be used again. */
+ c->prefix_pending = 0;
+ if (config.auth) {
+ char *buf = NULL;
+ int len = redisFormatCommand(&buf, "AUTH %s", config.auth);
+ c->obuf = sdscatlen(c->obuf, buf, len);
+ free(buf);
+ c->prefix_pending++;
+ }
/* If a DB number different than zero is selected, prefix our request
* buffer with the SELECT command, that will be discarded the first
@@ -332,25 +356,23 @@ static client createClient(char *cmd, size_t len, client from) {
if (config.dbnum != 0) {
c->obuf = sdscatprintf(c->obuf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
(int)sdslen(config.dbnumstr),config.dbnumstr);
- c->selectlen = sdslen(c->obuf);
- } else {
- c->selectlen = 0;
+ c->prefix_pending++;
}
-
+ c->prefixlen = sdslen(c->obuf);
/* Append the request itself. */
if (from) {
c->obuf = sdscatlen(c->obuf,
- from->obuf+from->selectlen,
- sdslen(from->obuf)-from->selectlen);
+ from->obuf+from->prefixlen,
+ sdslen(from->obuf)-from->prefixlen);
} else {
for (j = 0; j < config.pipeline; j++)
c->obuf = sdscatlen(c->obuf,cmd,len);
}
+
c->written = 0;
- c->pending = config.pipeline;
+ c->pending = config.pipeline+c->prefix_pending;
c->randptr = NULL;
c->randlen = 0;
- if (c->selectlen) c->pending++;
/* Find substrings in the output buffer that need to be randomized. */
if (config.randomkeys) {
@@ -359,10 +381,10 @@ static client createClient(char *cmd, size_t len, client from) {
c->randfree = 0;
c->randptr = zmalloc(sizeof(char*)*c->randlen);
/* copy the offsets. */
- for (j = 0; j < c->randlen; j++) {
+ for (j = 0; j < (int)c->randlen; j++) {
c->randptr[j] = c->obuf + (from->randptr[j]-from->obuf);
/* Adjust for the different select prefix length. */
- c->randptr[j] += c->selectlen - from->selectlen;
+ c->randptr[j] += c->prefixlen - from->prefixlen;
}
} else {
char *p = c->obuf;
@@ -381,7 +403,8 @@ static client createClient(char *cmd, size_t len, client from) {
}
}
}
- aeCreateFileEvent(config.el,c->context->fd,AE_WRITABLE,writeHandler,c);
+ if (config.idlemode == 0)
+ aeCreateFileEvent(config.el,c->context->fd,AE_WRITABLE,writeHandler,c);
listAddNodeTail(config.clients,c);
config.liveclients++;
return c;
@@ -389,15 +412,6 @@ static client createClient(char *cmd, size_t len, client from) {
static void createMissingClients(client c) {
int n = 0;
- char *buf = c->obuf;
- size_t buflen = sdslen(c->obuf);
-
- /* If we are cloning from a client with a SELECT prefix, skip it since the
- * client will be created with the prefixed SELECT if needed. */
- if (c->selectlen) {
- buf += c->selectlen;
- buflen -= c->selectlen;
- }
while(config.liveclients < config.numclients) {
createClient(NULL,0,c);
@@ -489,6 +503,9 @@ int parseOptions(int argc, const char **argv) {
} else if (!strcmp(argv[i],"-s")) {
if (lastarg) goto invalid;
config.hostsocket = strdup(argv[++i]);
+ } else if (!strcmp(argv[i],"-a") ) {
+ if (lastarg) goto invalid;
+ config.auth = strdup(argv[++i]);
} else if (!strcmp(argv[i],"-d")) {
if (lastarg) goto invalid;
config.datasize = atoi(argv[++i]);
@@ -512,6 +529,8 @@ int parseOptions(int argc, const char **argv) {
config.loop = 1;
} else if (!strcmp(argv[i],"-I")) {
config.idlemode = 1;
+ } else if (!strcmp(argv[i],"-e")) {
+ config.showerrors = 1;
} else if (!strcmp(argv[i],"-t")) {
if (lastarg) goto invalid;
/* We get the list of tests to run as a string in the form
@@ -546,14 +565,15 @@ invalid:
usage:
printf(
-"Usage: redis-benchmark [-h <host>] [-p <port>] [-c <clients>] [-n <requests]> [-k <boolean>]\n\n"
+"Usage: redis-benchmark [-h <host>] [-p <port>] [-c <clients>] [-n <requests>] [-k <boolean>]\n\n"
" -h <hostname> Server hostname (default 127.0.0.1)\n"
" -p <port> Server port (default 6379)\n"
" -s <socket> Server socket (overrides host and port)\n"
+" -a <password> Password for Redis Auth\n"
" -c <clients> Number of parallel connections (default 50)\n"
-" -n <requests> Total number of requests (default 10000)\n"
+" -n <requests> Total number of requests (default 100000)\n"
" -d <size> Data size of SET/GET value in bytes (default 2)\n"
-" -dbnum <db> SELECT the specified db number (default 0)\n"
+" --dbnum <db> SELECT the specified db number (default 0)\n"
" -k <boolean> 1=keep alive 0=reconnect (default 1)\n"
" -r <keyspacelen> Use random keys for SET/GET/INCR, random values for SADD\n"
" Using this option the benchmark will expand the string __rand_int__\n"
@@ -562,6 +582,8 @@ usage:
" is executed. Default tests use this to hit random keys in the\n"
" specified range.\n"
" -P <numreq> Pipeline <numreq> requests. Default 1 (no pipeline).\n"
+" -e If server replies with errors, show them on stdout.\n"
+" (no more than 1 error per second is displayed)\n"
" -q Quiet. Just show query/sec values\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
@@ -588,16 +610,20 @@ usage:
}
int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData) {
- REDIS_NOTUSED(eventLoop);
- REDIS_NOTUSED(id);
- REDIS_NOTUSED(clientData);
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
if (config.liveclients == 0) {
- fprintf(stderr,"All clients disconnected... aborting.");
+ fprintf(stderr,"All clients disconnected... aborting.\n");
exit(1);
}
-
if (config.csv) return 250;
+ if (config.idlemode == 1) {
+ printf("clients: %d\r", config.liveclients);
+ fflush(stdout);
+ return 250;
+ }
float dt = (float)(mstime()-config.start)/1000.0;
float rps = (float)config.requests_finished/dt;
printf("%s: %.2f\r", config.title, rps);
@@ -631,13 +657,14 @@ int main(int argc, const char **argv) {
signal(SIGPIPE, SIG_IGN);
config.numclients = 50;
- config.requests = 10000;
+ config.requests = 100000;
config.liveclients = 0;
config.el = aeCreateEventLoop(1024*10);
aeCreateTimeEvent(config.el,1,showThroughput,NULL,NULL);
config.keepalive = 1;
config.datasize = 3;
config.pipeline = 1;
+ config.showerrors = 0;
config.randomkeys = 0;
config.randomkeys_keyspacelen = 0;
config.quiet = 0;
@@ -651,6 +678,7 @@ int main(int argc, const char **argv) {
config.hostsocket = NULL;
config.tests = NULL;
config.dbnum = 0;
+ config.auth = NULL;
i = parseOptions(argc,argv);
argc -= i;
@@ -688,8 +716,8 @@ int main(int argc, const char **argv) {
}
/* Run default benchmark suite. */
+ data = zmalloc(config.datasize+1);
do {
- data = zmalloc(config.datasize+1);
memset(data,'x',config.datasize);
data[config.datasize] = '\0';
@@ -726,12 +754,24 @@ int main(int argc, const char **argv) {
free(cmd);
}
+ if (test_is_selected("rpush")) {
+ len = redisFormatCommand(&cmd,"RPUSH mylist %s",data);
+ benchmark("RPUSH",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("lpop")) {
len = redisFormatCommand(&cmd,"LPOP mylist");
benchmark("LPOP",cmd,len);
free(cmd);
}
+ if (test_is_selected("rpop")) {
+ len = redisFormatCommand(&cmd,"RPOP mylist");
+ benchmark("RPOP",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("sadd")) {
len = redisFormatCommand(&cmd,
"SADD myset element:__rand_int__");
@@ -739,6 +779,13 @@ int main(int argc, const char **argv) {
free(cmd);
}
+ if (test_is_selected("hset")) {
+ len = redisFormatCommand(&cmd,
+ "HSET myset:__rand_int__ element:__rand_int__ %s",data);
+ benchmark("HSET",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("spop")) {
len = redisFormatCommand(&cmd,"SPOP myset");
benchmark("SPOP",cmd,len);
diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c
index 6c8f55279..c4d5a225e 100644
--- a/src/redis-check-aof.c
+++ b/src/redis-check-aof.c
@@ -28,13 +28,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "fmacros.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
+#include "server.h"
#include <sys/stat.h>
-#include "config.h"
#define ERROR(...) { \
char __buf[1024]; \
@@ -60,7 +55,7 @@ int readLong(FILE *fp, char prefix, long *target) {
return 0;
}
if (buf[0] != prefix) {
- ERROR("Expected prefix '%c', got: '%c'",buf[0],prefix);
+ ERROR("Expected prefix '%c', got: '%c'",prefix,buf[0]);
return 0;
}
*target = strtol(buf+1,&eptr,10);
@@ -87,7 +82,7 @@ int readString(FILE *fp, char** target) {
/* Increase length to also consume \r\n */
len += 2;
- *target = (char*)malloc(len);
+ *target = (char*)zmalloc(len);
if (!readBytes(fp,*target,len)) {
return 0;
}
@@ -127,12 +122,12 @@ off_t process(FILE *fp) {
}
}
}
- free(str);
+ zfree(str);
}
/* Stop if the loop did not finish */
if (i < argc) {
- if (str) free(str);
+ if (str) zfree(str);
break;
}
}
@@ -146,7 +141,7 @@ off_t process(FILE *fp) {
return pos;
}
-int main(int argc, char **argv) {
+int redis_check_aof_main(int argc, char **argv) {
char *filename;
int fix = 0;
@@ -185,6 +180,25 @@ int main(int argc, char **argv) {
exit(1);
}
+ /* This AOF file may have an RDB preamble. Check this to start, and if this
+ * is the case, start processing the RDB part. */
+ if (size >= 8) { /* There must be at least room for the RDB header. */
+ char sig[5];
+ int has_preamble = fread(sig,sizeof(sig),1,fp) == 1 &&
+ memcmp(sig,"REDIS",sizeof(sig)) == 0;
+ rewind(fp);
+ if (has_preamble) {
+ printf("The AOF appears to start with an RDB preamble.\n"
+ "Checking the RDB preamble to start:\n");
+ if (redis_check_rdb_main(argc,argv,fp) == C_ERR) {
+ printf("RDB preamble of AOF file is not sane, aborting.\n");
+ exit(1);
+ } else {
+ printf("RDB preamble is OK, proceeding with AOF tail...\n");
+ }
+ }
+ }
+
off_t pos = process(fp);
off_t diff = size-pos;
printf("AOF analyzed: size=%lld, ok_up_to=%lld, diff=%lld\n",
@@ -206,7 +220,8 @@ int main(int argc, char **argv) {
printf("Successfully truncated AOF\n");
}
} else {
- printf("AOF is not valid\n");
+ printf("AOF is not valid. "
+ "Use the --fix option to try fixing it.\n");
exit(1);
}
} else {
@@ -214,5 +229,5 @@ int main(int argc, char **argv) {
}
fclose(fp);
- return 0;
+ exit(0);
}
diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c
deleted file mode 100644
index a4f103fdd..000000000
--- a/src/redis-check-dump.c
+++ /dev/null
@@ -1,768 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <stdint.h>
-#include <limits.h>
-#include "lzf.h"
-#include "crc64.h"
-
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-#define REDIS_HASH_ZIPMAP 9
-#define REDIS_LIST_ZIPLIST 10
-#define REDIS_SET_INTSET 11
-#define REDIS_ZSET_ZIPLIST 12
-#define REDIS_HASH_ZIPLIST 13
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
-#define REDIS_ENCODING_HT 3 /* Encoded as a hash table */
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME_MS 252
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
-#define ERROR(...) { \
- printf(__VA_ARGS__); \
- exit(1); \
-}
-
-/* data type to hold offset in file and size */
-typedef struct {
- void *data;
- size_t size;
- size_t offset;
-} pos;
-
-static unsigned char level = 0;
-static pos positions[16];
-
-#define CURR_OFFSET (positions[level].offset)
-
-/* Hold a stack of errors */
-typedef struct {
- char error[16][1024];
- size_t offset[16];
- size_t level;
-} errors_t;
-static errors_t errors;
-
-#define SHIFT_ERROR(provided_offset, ...) { \
- sprintf(errors.error[errors.level], __VA_ARGS__); \
- errors.offset[errors.level] = provided_offset; \
- errors.level++; \
-}
-
-/* Data type to hold opcode with optional key name an success status */
-typedef struct {
- char* key;
- int type;
- char success;
-} entry;
-
-/* Global vars that are actually used as constants. The following double
- * values are used for double on-disk serialization, and are initialized
- * at runtime to avoid strange compiler optimizations. */
-static double R_Zero, R_PosInf, R_NegInf, R_Nan;
-
-/* store string types for output */
-static char types[256][16];
-
-/* Return true if 't' is a valid object type. */
-int checkType(unsigned char t) {
- /* In case a new object type is added, update the following
- * condition as necessary. */
- return
- (t >= REDIS_HASH_ZIPMAP && t <= REDIS_HASH_ZIPLIST) ||
- t <= REDIS_HASH ||
- t >= REDIS_EXPIRETIME_MS;
-}
-
-/* when number of bytes to read is negative, do a peek */
-int readBytes(void *target, long num) {
- char peek = (num < 0) ? 1 : 0;
- num = (num < 0) ? -num : num;
-
- pos p = positions[level];
- if (p.offset + num > p.size) {
- return 0;
- } else {
- memcpy(target, (void*)((size_t)p.data + p.offset), num);
- if (!peek) positions[level].offset += num;
- }
- return 1;
-}
-
-int processHeader() {
- char buf[10] = "_________";
- int dump_version;
-
- if (!readBytes(buf, 9)) {
- ERROR("Cannot read header\n");
- }
-
- /* expect the first 5 bytes to equal REDIS */
- if (memcmp(buf,"REDIS",5) != 0) {
- ERROR("Wrong signature in header\n");
- }
-
- dump_version = (int)strtol(buf + 5, NULL, 10);
- if (dump_version < 1 || dump_version > 6) {
- ERROR("Unknown RDB format version: %d\n", dump_version);
- }
- return dump_version;
-}
-
-int loadType(entry *e) {
- uint32_t offset = CURR_OFFSET;
-
- /* this byte needs to qualify as type */
- unsigned char t;
- if (readBytes(&t, 1)) {
- if (checkType(t)) {
- e->type = t;
- return 1;
- } else {
- SHIFT_ERROR(offset, "Unknown type (0x%02x)", t);
- }
- } else {
- SHIFT_ERROR(offset, "Could not read type");
- }
-
- /* failure */
- return 0;
-}
-
-int peekType() {
- unsigned char t;
- if (readBytes(&t, -1) && (checkType(t)))
- return t;
- return -1;
-}
-
-/* discard time, just consume the bytes */
-int processTime(int type) {
- uint32_t offset = CURR_OFFSET;
- unsigned char t[8];
- int timelen = (type == REDIS_EXPIRETIME_MS) ? 8 : 4;
-
- if (readBytes(t,timelen)) {
- return 1;
- } else {
- SHIFT_ERROR(offset, "Could not read time");
- }
-
- /* failure */
- return 0;
-}
-
-uint32_t loadLength(int *isencoded) {
- unsigned char buf[2];
- uint32_t len;
- int type;
-
- if (isencoded) *isencoded = 0;
- if (!readBytes(buf, 1)) return REDIS_RDB_LENERR;
- type = (buf[0] & 0xC0) >> 6;
- if (type == REDIS_RDB_6BITLEN) {
- /* Read a 6 bit len */
- return buf[0] & 0x3F;
- } else if (type == REDIS_RDB_ENCVAL) {
- /* Read a 6 bit len encoding type */
- if (isencoded) *isencoded = 1;
- return buf[0] & 0x3F;
- } else if (type == REDIS_RDB_14BITLEN) {
- /* Read a 14 bit len */
- if (!readBytes(buf+1,1)) return REDIS_RDB_LENERR;
- return ((buf[0] & 0x3F) << 8) | buf[1];
- } else {
- /* Read a 32 bit len */
- if (!readBytes(&len, 4)) return REDIS_RDB_LENERR;
- return (unsigned int)ntohl(len);
- }
-}
-
-char *loadIntegerObject(int enctype) {
- uint32_t offset = CURR_OFFSET;
- unsigned char enc[4];
- long long val;
-
- if (enctype == REDIS_RDB_ENC_INT8) {
- uint8_t v;
- if (!readBytes(enc, 1)) return NULL;
- v = enc[0];
- val = (int8_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT16) {
- uint16_t v;
- if (!readBytes(enc, 2)) return NULL;
- v = enc[0]|(enc[1]<<8);
- val = (int16_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT32) {
- uint32_t v;
- if (!readBytes(enc, 4)) return NULL;
- v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
- val = (int32_t)v;
- } else {
- SHIFT_ERROR(offset, "Unknown integer encoding (0x%02x)", enctype);
- return NULL;
- }
-
- /* convert val into string */
- char *buf;
- buf = malloc(sizeof(char) * 128);
- sprintf(buf, "%lld", val);
- return buf;
-}
-
-char* loadLzfStringObject() {
- unsigned int slen, clen;
- char *c, *s;
-
- if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
-
- c = malloc(clen);
- if (!readBytes(c, clen)) {
- free(c);
- return NULL;
- }
-
- s = malloc(slen+1);
- if (lzf_decompress(c,clen,s,slen) == 0) {
- free(c); free(s);
- return NULL;
- }
-
- free(c);
- return s;
-}
-
-/* returns NULL when not processable, char* when valid */
-char* loadStringObject() {
- uint32_t offset = CURR_OFFSET;
- int isencoded;
- uint32_t len;
-
- len = loadLength(&isencoded);
- if (isencoded) {
- switch(len) {
- case REDIS_RDB_ENC_INT8:
- case REDIS_RDB_ENC_INT16:
- case REDIS_RDB_ENC_INT32:
- return loadIntegerObject(len);
- case REDIS_RDB_ENC_LZF:
- return loadLzfStringObject();
- default:
- /* unknown encoding */
- SHIFT_ERROR(offset, "Unknown string encoding (0x%02x)", len);
- return NULL;
- }
- }
-
- if (len == REDIS_RDB_LENERR) return NULL;
-
- char *buf = malloc(sizeof(char) * (len+1));
- buf[len] = '\0';
- if (!readBytes(buf, len)) {
- free(buf);
- return NULL;
- }
- return buf;
-}
-
-int processStringObject(char** store) {
- unsigned long offset = CURR_OFFSET;
- char *key = loadStringObject();
- if (key == NULL) {
- SHIFT_ERROR(offset, "Error reading string object");
- free(key);
- return 0;
- }
-
- if (store != NULL) {
- *store = key;
- } else {
- free(key);
- }
- return 1;
-}
-
-double* loadDoubleValue() {
- char buf[256];
- unsigned char len;
- double* val;
-
- if (!readBytes(&len,1)) return NULL;
-
- val = malloc(sizeof(double));
- switch(len) {
- case 255: *val = R_NegInf; return val;
- case 254: *val = R_PosInf; return val;
- case 253: *val = R_Nan; return val;
- default:
- if (!readBytes(buf, len)) {
- free(val);
- return NULL;
- }
- buf[len] = '\0';
- sscanf(buf, "%lg", val);
- return val;
- }
-}
-
-int processDoubleValue(double** store) {
- unsigned long offset = CURR_OFFSET;
- double *val = loadDoubleValue();
- if (val == NULL) {
- SHIFT_ERROR(offset, "Error reading double value");
- free(val);
- return 0;
- }
-
- if (store != NULL) {
- *store = val;
- } else {
- free(val);
- }
- return 1;
-}
-
-int loadPair(entry *e) {
- uint32_t offset = CURR_OFFSET;
- uint32_t i;
-
- /* read key first */
- char *key;
- if (processStringObject(&key)) {
- e->key = key;
- } else {
- SHIFT_ERROR(offset, "Error reading entry key");
- return 0;
- }
-
- uint32_t length = 0;
- if (e->type == REDIS_LIST ||
- e->type == REDIS_SET ||
- e->type == REDIS_ZSET ||
- e->type == REDIS_HASH) {
- if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
- SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
- return 0;
- }
- }
-
- switch(e->type) {
- case REDIS_STRING:
- case REDIS_HASH_ZIPMAP:
- case REDIS_LIST_ZIPLIST:
- case REDIS_SET_INTSET:
- case REDIS_ZSET_ZIPLIST:
- case REDIS_HASH_ZIPLIST:
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading entry value");
- return 0;
- }
- break;
- case REDIS_LIST:
- case REDIS_SET:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- case REDIS_ZSET:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
- return 0;
- }
- offset = CURR_OFFSET;
- if (!processDoubleValue(NULL)) {
- SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- case REDIS_HASH:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
- return 0;
- }
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- default:
- SHIFT_ERROR(offset, "Type not implemented");
- return 0;
- }
- /* because we're done, we assume success */
- e->success = 1;
- return 1;
-}
-
-entry loadEntry() {
- entry e = { NULL, -1, 0 };
- uint32_t length, offset[4];
-
- /* reset error container */
- errors.level = 0;
-
- offset[0] = CURR_OFFSET;
- if (!loadType(&e)) {
- return e;
- }
-
- offset[1] = CURR_OFFSET;
- if (e.type == REDIS_SELECTDB) {
- if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
- SHIFT_ERROR(offset[1], "Error reading database number");
- return e;
- }
- if (length > 63) {
- SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
- return e;
- }
- } else if (e.type == REDIS_EOF) {
- if (positions[level].offset < positions[level].size) {
- SHIFT_ERROR(offset[0], "Unexpected EOF");
- } else {
- e.success = 1;
- }
- return e;
- } else {
- /* optionally consume expire */
- if (e.type == REDIS_EXPIRETIME ||
- e.type == REDIS_EXPIRETIME_MS) {
- if (!processTime(e.type)) return e;
- if (!loadType(&e)) return e;
- }
-
- offset[1] = CURR_OFFSET;
- if (!loadPair(&e)) {
- SHIFT_ERROR(offset[1], "Error for type %s", types[e.type]);
- return e;
- }
- }
-
- /* all entries are followed by a valid type:
- * e.g. a new entry, SELECTDB, EXPIRE, EOF */
- offset[2] = CURR_OFFSET;
- if (peekType() == -1) {
- SHIFT_ERROR(offset[2], "Followed by invalid type");
- SHIFT_ERROR(offset[0], "Error for type %s", types[e.type]);
- e.success = 0;
- } else {
- e.success = 1;
- }
-
- return e;
-}
-
-void printCentered(int indent, int width, char* body) {
- char head[256], tail[256];
- memset(head, '\0', 256);
- memset(tail, '\0', 256);
-
- memset(head, '=', indent);
- memset(tail, '=', width - 2 - indent - strlen(body));
- printf("%s %s %s\n", head, body, tail);
-}
-
-void printValid(uint64_t ops, uint64_t bytes) {
- char body[80];
- sprintf(body, "Processed %llu valid opcodes (in %llu bytes)",
- (unsigned long long) ops, (unsigned long long) bytes);
- printCentered(4, 80, body);
-}
-
-void printSkipped(uint64_t bytes, uint64_t offset) {
- char body[80];
- sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)",
- (unsigned long long) bytes, (unsigned long long) offset);
- printCentered(4, 80, body);
-}
-
-void printErrorStack(entry *e) {
- unsigned int i;
- char body[64];
-
- if (e->type == -1) {
- sprintf(body, "Error trace");
- } else if (e->type >= 253) {
- sprintf(body, "Error trace (%s)", types[e->type]);
- } else if (!e->key) {
- sprintf(body, "Error trace (%s: (unknown))", types[e->type]);
- } else {
- char tmp[41];
- strncpy(tmp, e->key, 40);
-
- /* display truncation at the last 3 chars */
- if (strlen(e->key) > 40) {
- memset(&tmp[37], '.', 3);
- }
-
- /* display unprintable characters as ? */
- for (i = 0; i < strlen(tmp); i++) {
- if (tmp[i] <= 32) tmp[i] = '?';
- }
- sprintf(body, "Error trace (%s: %s)", types[e->type], tmp);
- }
-
- printCentered(4, 80, body);
-
- /* display error stack */
- for (i = 0; i < errors.level; i++) {
- printf("0x%08lx - %s\n",
- (unsigned long) errors.offset[i], errors.error[i]);
- }
-}
-
-void process() {
- uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
- entry entry;
- int dump_version = processHeader();
-
- /* Exclude the final checksum for RDB >= 5. Will be checked at the end. */
- if (dump_version >= 5) {
- if (positions[0].size < 8) {
- printf("RDB version >= 5 but no room for checksum.\n");
- exit(1);
- }
- positions[0].size -= 8;;
- }
-
- level = 1;
- while(positions[0].offset < positions[0].size) {
- positions[1] = positions[0];
-
- entry = loadEntry();
- if (!entry.success) {
- printValid(num_valid_ops, num_valid_bytes);
- printErrorStack(&entry);
- num_errors++;
- num_valid_ops = 0;
- num_valid_bytes = 0;
-
- /* search for next valid entry */
- uint64_t offset = positions[0].offset + 1;
- int i = 0;
-
- while (!entry.success && offset < positions[0].size) {
- positions[1].offset = offset;
-
- /* find 3 consecutive valid entries */
- for (i = 0; i < 3; i++) {
- entry = loadEntry();
- if (!entry.success) break;
- }
- /* check if we found 3 consecutive valid entries */
- if (i < 3) {
- offset++;
- }
- }
-
- /* print how many bytes we have skipped to find a new valid opcode */
- if (offset < positions[0].size) {
- printSkipped(offset - positions[0].offset, offset);
- }
-
- positions[0].offset = offset;
- } else {
- num_valid_ops++;
- num_valid_bytes += positions[1].offset - positions[0].offset;
-
- /* advance position */
- positions[0] = positions[1];
- }
- free(entry.key);
- }
-
- /* because there is another potential error,
- * print how many valid ops we have processed */
- printValid(num_valid_ops, num_valid_bytes);
-
- /* expect an eof */
- if (entry.type != REDIS_EOF) {
- /* last byte should be EOF, add error */
- errors.level = 0;
- SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
-
- /* this is an EOF error so reset type */
- entry.type = -1;
- printErrorStack(&entry);
-
- num_errors++;
- }
-
- /* Verify checksum */
- if (dump_version >= 5) {
- uint64_t crc = crc64(0,positions[0].data,positions[0].size);
- uint64_t crc2;
- unsigned char *p = (unsigned char*)positions[0].data+positions[0].size;
- crc2 = ((uint64_t)p[0] << 0) |
- ((uint64_t)p[1] << 8) |
- ((uint64_t)p[2] << 16) |
- ((uint64_t)p[3] << 24) |
- ((uint64_t)p[4] << 32) |
- ((uint64_t)p[5] << 40) |
- ((uint64_t)p[6] << 48) |
- ((uint64_t)p[7] << 56);
- if (crc != crc2) {
- SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match.");
- } else {
- printf("CRC64 checksum is OK\n");
- }
- }
-
- /* print summary on errors */
- if (num_errors) {
- printf("\n");
- printf("Total unprocessable opcodes: %llu\n",
- (unsigned long long) num_errors);
- }
-}
-
-int main(int argc, char **argv) {
- /* expect the first argument to be the dump file */
- if (argc <= 1) {
- printf("Usage: %s <dump.rdb>\n", argv[0]);
- exit(0);
- }
-
- int fd;
- off_t size;
- struct stat stat;
- void *data;
-
- fd = open(argv[1], O_RDONLY);
- if (fd < 1) {
- ERROR("Cannot open file: %s\n", argv[1]);
- }
- if (fstat(fd, &stat) == -1) {
- ERROR("Cannot stat: %s\n", argv[1]);
- } else {
- size = stat.st_size;
- }
-
- if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) {
- ERROR("Cannot check dump files >2GB on a 32-bit platform\n");
- }
-
- data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
- if (data == MAP_FAILED) {
- ERROR("Cannot mmap: %s\n", argv[1]);
- }
-
- /* Initialize static vars */
- positions[0].data = data;
- positions[0].size = size;
- positions[0].offset = 0;
- errors.level = 0;
-
- /* Object types */
- sprintf(types[REDIS_STRING], "STRING");
- sprintf(types[REDIS_LIST], "LIST");
- sprintf(types[REDIS_SET], "SET");
- sprintf(types[REDIS_ZSET], "ZSET");
- sprintf(types[REDIS_HASH], "HASH");
-
- /* Object types only used for dumping to disk */
- sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
- sprintf(types[REDIS_SELECTDB], "SELECTDB");
- sprintf(types[REDIS_EOF], "EOF");
-
- /* Double constants initialization */
- R_Zero = 0.0;
- R_PosInf = 1.0/R_Zero;
- R_NegInf = -1.0/R_Zero;
- R_Nan = R_Zero/R_Zero;
-
- process();
-
- munmap(data, size);
- close(fd);
- return 0;
-}
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
new file mode 100644
index 000000000..4027536e5
--- /dev/null
+++ b/src/redis-check-rdb.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "rdb.h"
+
+#include <stdarg.h>
+
+void createSharedObjects(void);
+void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
+long long rdbLoadMillisecondTime(rio *rdb);
+int rdbCheckMode = 0;
+
+struct {
+ rio *rio;
+ robj *key; /* Current key we are reading. */
+ int key_type; /* Current key type if != -1. */
+ unsigned long keys; /* Number of keys processed. */
+ unsigned long expires; /* Number of keys with an expire. */
+ unsigned long already_expired; /* Number of keys already expired. */
+ int doing; /* The state while reading the RDB. */
+ int error_set; /* True if error is populated. */
+ char error[1024];
+} rdbstate;
+
+/* At every loading step try to remember what we were about to do, so that
+ * we can log this information when an error is encountered. */
+#define RDB_CHECK_DOING_START 0
+#define RDB_CHECK_DOING_READ_TYPE 1
+#define RDB_CHECK_DOING_READ_EXPIRE 2
+#define RDB_CHECK_DOING_READ_KEY 3
+#define RDB_CHECK_DOING_READ_OBJECT_VALUE 4
+#define RDB_CHECK_DOING_CHECK_SUM 5
+#define RDB_CHECK_DOING_READ_LEN 6
+#define RDB_CHECK_DOING_READ_AUX 7
+
+char *rdb_check_doing_string[] = {
+ "start",
+ "read-type",
+ "read-expire",
+ "read-key",
+ "read-object-value",
+ "check-sum",
+ "read-len",
+ "read-aux"
+};
+
+char *rdb_type_string[] = {
+ "string",
+ "list-linked",
+ "set-hashtable",
+ "zset-v1",
+ "hash-hashtable",
+ "zset-v2",
+ "module-value",
+ "","",
+ "hash-zipmap",
+ "list-ziplist",
+ "set-intset",
+ "zset-ziplist",
+ "hash-ziplist",
+ "quicklist"
+};
+
+/* Show a few stats collected into 'rdbstate' */
+void rdbShowGenericInfo(void) {
+ printf("[info] %lu keys read\n", rdbstate.keys);
+ printf("[info] %lu expires\n", rdbstate.expires);
+ printf("[info] %lu already expired\n", rdbstate.already_expired);
+}
+
+/* Called on RDB errors. Provides details about the RDB and the offset
+ * we were when the error was detected. */
+void rdbCheckError(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("--- RDB ERROR DETECTED ---\n");
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+ printf("[additional info] While doing: %s\n",
+ rdb_check_doing_string[rdbstate.doing]);
+ if (rdbstate.key)
+ printf("[additional info] Reading key '%s'\n",
+ (char*)rdbstate.key->ptr);
+ if (rdbstate.key_type != -1)
+ printf("[additional info] Reading type %d (%s)\n",
+ rdbstate.key_type,
+ ((unsigned)rdbstate.key_type <
+ sizeof(rdb_type_string)/sizeof(char*)) ?
+ rdb_type_string[rdbstate.key_type] : "unknown");
+ rdbShowGenericInfo();
+}
+
+/* Print informations during RDB checking. */
+void rdbCheckInfo(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+}
+
+/* Used inside rdb.c in order to log specific errors happening inside
+ * the RDB loading internals. */
+void rdbCheckSetError(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(rdbstate.error, sizeof(rdbstate.error), fmt, ap);
+ va_end(ap);
+ rdbstate.error_set = 1;
+}
+
+/* During RDB check we setup a special signal handler for memory violations
+ * and similar conditions, so that we can log the offending part of the RDB
+ * if the crash is due to broken content. */
+void rdbCheckHandleCrash(int sig, siginfo_t *info, void *secret) {
+ UNUSED(sig);
+ UNUSED(info);
+ UNUSED(secret);
+
+ rdbCheckError("Server crash checking the specified RDB file!");
+ exit(1);
+}
+
+void rdbCheckSetupSignals(void) {
+ struct sigaction act;
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
+ act.sa_sigaction = rdbCheckHandleCrash;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+}
+
+/* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise
+ * 1 is returned.
+ * The file is specified as a filename in 'rdbfilename' if 'fp' is not NULL,
+ * otherwise the already open file 'fp' is checked. */
+int redis_check_rdb(char *rdbfilename, FILE *fp) {
+ uint64_t dbid;
+ int type, rdbver;
+ char buf[1024];
+ long long expiretime, now = mstime();
+ static rio rdb; /* Pointed by global struct riostate. */
+
+ int closefile = (fp == NULL);
+ if (fp == NULL && (fp = fopen(rdbfilename,"r")) == NULL) return 1;
+
+ rioInitWithFile(&rdb,fp);
+ rdbstate.rio = &rdb;
+ rdb.update_cksum = rdbLoadProgressCallback;
+ if (rioRead(&rdb,buf,9) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ rdbCheckError("Wrong signature trying to load DB from file");
+ return 1;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ rdbCheckError("Can't handle RDB format version %d",rdbver);
+ return 1;
+ }
+
+ startLoading(fp);
+ while(1) {
+ robj *key, *val;
+ expiretime = -1;
+
+ /* Read type. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE;
+ if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
+ /* We read the time so we need to read the object type again. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE;
+ if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+ /* the EXPIRETIME opcode specifies time in seconds, so convert
+ * into milliseconds. */
+ expiretime *= 1000;
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
+ /* We read the time so we need to read the object type again. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE;
+ if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
+ break;
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ rdbCheckInfo("Selecting DB ID %d", dbid);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are requierd to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ rdbstate.doing = RDB_CHECK_DOING_READ_AUX;
+ if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+
+ rdbCheckInfo("AUX FIELD %s = '%s'",
+ (char*)auxkey->ptr, (char*)auxval->ptr);
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
+ } else {
+ if (!rdbIsObjectType(type)) {
+ rdbCheckError("Invalid object type: %d", type);
+ return 1;
+ }
+ rdbstate.key_type = type;
+ }
+
+ /* Read key */
+ rdbstate.doing = RDB_CHECK_DOING_READ_KEY;
+ if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ rdbstate.key = key;
+ rdbstate.keys++;
+ /* Read value */
+ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
+ if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
+ /* Check if the key already expired. This function is used when loading
+ * an RDB file from disk, either at startup, or when an RDB was
+ * received from the master. In the latter case, the master is
+ * responsible for key expiry. If we would expire keys here, the
+ * snapshot taken by the master may not be reflected on the slave. */
+ if (server.masterhost == NULL && expiretime != -1 && expiretime < now)
+ rdbstate.already_expired++;
+ if (expiretime != -1) rdbstate.expires++;
+ rdbstate.key = NULL;
+ decrRefCount(key);
+ decrRefCount(val);
+ rdbstate.key_type = -1;
+ }
+ /* Verify the checksum if RDB version is >= 5 */
+ if (rdbver >= 5 && server.rdb_checksum) {
+ uint64_t cksum, expected = rdb.cksum;
+
+ rdbstate.doing = RDB_CHECK_DOING_CHECK_SUM;
+ if (rioRead(&rdb,&cksum,8) == 0) goto eoferr;
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ rdbCheckInfo("RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ rdbCheckError("RDB CRC error");
+ } else {
+ rdbCheckInfo("Checksum OK");
+ }
+ }
+
+ if (closefile) fclose(fp);
+ return 0;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ if (rdbstate.error_set) {
+ rdbCheckError(rdbstate.error);
+ } else {
+ rdbCheckError("Unexpected EOF reading RDB file");
+ }
+ return 1;
+}
+
+/* RDB check main: called form redis.c when Redis is executed with the
+ * redis-check-rdb alias, on during RDB loading errors.
+ *
+ * The function works in two ways: can be called with argc/argv as a
+ * standalone executable, or called with a non NULL 'fp' argument if we
+ * already have an open file to check. This happens when the function
+ * is used to check an RDB preamble inside an AOF file.
+ *
+ * When called with fp = NULL, the function never returns, but exits with the
+ * status code according to success (RDB is sane) or error (RDB is corrupted).
+ * Otherwise if called with a non NULL fp, the function returns C_OK or
+ * C_ERR depending on the success or failure. */
+int redis_check_rdb_main(int argc, char **argv, FILE *fp) {
+ if (argc != 2 && fp == NULL) {
+ fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
+ exit(1);
+ }
+ /* In order to call the loading functions we need to create the shared
+ * integer objects, however since this function may be called from
+ * an already initialized Redis instance, check if we really need to. */
+ if (shared.integers[0] == NULL)
+ createSharedObjects();
+ server.loading_process_events_interval_bytes = 0;
+ rdbCheckMode = 1;
+ rdbCheckInfo("Checking RDB file %s", argv[1]);
+ rdbCheckSetupSignals();
+ int retval = redis_check_rdb(argv[1],fp);
+ if (retval == 0) {
+ rdbCheckInfo("\\o/ RDB looks OK! \\o/");
+ rdbShowGenericInfo();
+ }
+ if (fp) return (retval == 0) ? C_OK : C_ERR;
+ exit(retval);
+}
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 907e62b74..61068483f 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -44,22 +44,38 @@
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
+#include <math.h>
-#include "hiredis.h"
-#include "sds.h"
+#include <hiredis.h>
+#include <sds.h> /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */
#include "zmalloc.h"
#include "linenoise.h"
#include "help.h"
#include "anet.h"
#include "ae.h"
-#define REDIS_NOTUSED(V) ((void) V)
+#define UNUSED(V) ((void) V)
#define OUTPUT_STANDARD 0
#define OUTPUT_RAW 1
#define OUTPUT_CSV 2
#define REDIS_CLI_KEEPALIVE_INTERVAL 15 /* seconds */
#define REDIS_CLI_DEFAULT_PIPE_TIMEOUT 30 /* seconds */
+#define REDIS_CLI_HISTFILE_ENV "REDISCLI_HISTFILE"
+#define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history"
+#define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE"
+#define REDIS_CLI_RCFILE_DEFAULT ".redisclirc"
+
+/* --latency-dist palettes. */
+int spectrum_palette_color_size = 19;
+int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196};
+
+int spectrum_palette_mono_size = 13;
+int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253};
+
+/* The actual palette in use. */
+int *spectrum_palette;
+int spectrum_palette_size;
static redisContext *context;
static struct config {
@@ -74,7 +90,10 @@ static struct config {
int monitor_mode;
int pubsub_mode;
int latency_mode;
+ int latency_dist_mode;
int latency_history;
+ int lru_test_mode;
+ long long lru_test_sample_size;
int cluster_mode;
int cluster_reissue_command;
int slave_mode;
@@ -94,13 +113,24 @@ static struct config {
sds mb_delim;
char prompt[128];
char *eval;
+ int eval_ldb;
+ int eval_ldb_sync; /* Ask for synchronous mode of the Lua debugger. */
+ int eval_ldb_end; /* Lua debugging session ended. */
+ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */
+ int last_cmd_type;
} config;
+/* User preferences. */
+static struct pref {
+ int hints;
+} pref;
+
static volatile sig_atomic_t force_cancel_loop = 0;
-static void usage();
+static void usage(void);
static void slaveMode(void);
char *redisGitSHA1(void);
char *redisGitDirty(void);
+static int cliConnect(int force);
/*------------------------------------------------------------------------------
* Utility functions
@@ -123,13 +153,13 @@ static long long mstime(void) {
static void cliRefreshPrompt(void) {
int len;
+ if (config.eval_ldb) return;
if (config.hostsocket != NULL)
len = snprintf(config.prompt,sizeof(config.prompt),"redis %s",
config.hostsocket);
else
- len = snprintf(config.prompt,sizeof(config.prompt),
- strchr(config.hostip,':') ? "[%s]:%d" : "%s:%d",
- config.hostip, config.hostport);
+ len = anetFormatAddr(config.prompt, sizeof(config.prompt),
+ config.hostip, config.hostport);
/* Add [dbnum] if needed */
if (config.dbnum != 0)
len += snprintf(config.prompt+len,sizeof(config.prompt)-len,"[%d]",
@@ -137,6 +167,37 @@ static void cliRefreshPrompt(void) {
snprintf(config.prompt+len,sizeof(config.prompt)-len,"> ");
}
+/* Return the name of the dotfile for the specified 'dotfilename'.
+ * Normally it just concatenates user $HOME to the file specified
+ * in 'dotfilename'. However if the environment varialbe 'envoverride'
+ * is set, its value is taken as the path.
+ *
+ * The function returns NULL (if the file is /dev/null or cannot be
+ * obtained for some error), or an SDS string that must be freed by
+ * the user. */
+static sds getDotfilePath(char *envoverride, char *dotfilename) {
+ char *path = NULL;
+ sds dotPath = NULL;
+
+ /* Check the env for a dotfile override. */
+ path = getenv(envoverride);
+ if (path != NULL && *path != '\0') {
+ if (!strcmp("/dev/null", path)) {
+ return NULL;
+ }
+
+ /* If the env is set, return it. */
+ dotPath = sdsnew(path);
+ } else {
+ char *home = getenv("HOME");
+ if (home != NULL && *home != '\0') {
+ /* If no override is set use $HOME/<dotfilename>. */
+ dotPath = sdscatprintf(sdsempty(), "%s/%s", home, dotfilename);
+ }
+ }
+ return dotPath;
+}
+
/*------------------------------------------------------------------------------
* Help functions
*--------------------------------------------------------------------------- */
@@ -157,7 +218,7 @@ typedef struct {
static helpEntry *helpEntries;
static int helpEntriesLen;
-static sds cliVersion() {
+static sds cliVersion(void) {
sds version;
version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
@@ -171,18 +232,18 @@ static sds cliVersion() {
return version;
}
-static void cliInitHelp() {
+static void cliInitHelp(void) {
int commandslen = sizeof(commandHelp)/sizeof(struct commandHelp);
int groupslen = sizeof(commandGroups)/sizeof(char*);
int i, len, pos = 0;
helpEntry tmp;
helpEntriesLen = len = commandslen+groupslen;
- helpEntries = malloc(sizeof(helpEntry)*len);
+ helpEntries = zmalloc(sizeof(helpEntry)*len);
for (i = 0; i < groupslen; i++) {
tmp.argc = 1;
- tmp.argv = malloc(sizeof(sds));
+ tmp.argv = zmalloc(sizeof(sds));
tmp.argv[0] = sdscatprintf(sdsempty(),"@%s",commandGroups[i]);
tmp.full = tmp.argv[0];
tmp.type = CLI_HELP_GROUP;
@@ -199,6 +260,65 @@ static void cliInitHelp() {
}
}
+/* cliInitHelp() setups the helpEntries array with the command and group
+ * names from the help.h file. However the Redis instance we are connecting
+ * to may support more commands, so this function integrates the previous
+ * entries with additional entries obtained using the COMMAND command
+ * available in recent versions of Redis. */
+static void cliIntegrateHelp(void) {
+ if (cliConnect(0) == REDIS_ERR) return;
+
+ redisReply *reply = redisCommand(context, "COMMAND");
+ if(reply == NULL || reply->type != REDIS_REPLY_ARRAY) return;
+
+ /* Scan the array reported by COMMAND and fill only the entries that
+ * don't already match what we have. */
+ for (size_t j = 0; j < reply->elements; j++) {
+ redisReply *entry = reply->element[j];
+ if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 ||
+ entry->element[0]->type != REDIS_REPLY_STRING ||
+ entry->element[1]->type != REDIS_REPLY_INTEGER ||
+ entry->element[3]->type != REDIS_REPLY_INTEGER) return;
+ char *cmdname = entry->element[0]->str;
+ int i;
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ helpEntry *he = helpEntries+i;
+ if (!strcasecmp(he->argv[0],cmdname))
+ break;
+ }
+ if (i != helpEntriesLen) continue;
+
+ helpEntriesLen++;
+ helpEntries = zrealloc(helpEntries,sizeof(helpEntry)*helpEntriesLen);
+ helpEntry *new = helpEntries+(helpEntriesLen-1);
+
+ new->argc = 1;
+ new->argv = zmalloc(sizeof(sds));
+ new->argv[0] = sdsnew(cmdname);
+ new->full = new->argv[0];
+ new->type = CLI_HELP_COMMAND;
+ sdstoupper(new->argv[0]);
+
+ struct commandHelp *ch = zmalloc(sizeof(*ch));
+ ch->name = new->argv[0];
+ ch->params = sdsempty();
+ int args = llabs(entry->element[1]->integer);
+ if (entry->element[3]->integer == 1) {
+ ch->params = sdscat(ch->params,"key ");
+ args--;
+ }
+ while(args--) ch->params = sdscat(ch->params,"arg ");
+ if (entry->element[1]->integer < 0)
+ ch->params = sdscat(ch->params,"...options...");
+ ch->summary = "Help not available";
+ ch->group = 0;
+ ch->since = "not known";
+ new->org = ch;
+ }
+ freeReplyObject(reply);
+}
+
/* Output command help to stdout. */
static void cliOutputCommandHelp(struct commandHelp *help, int group) {
printf("\r\n \x1b[1m%s\x1b[0m \x1b[90m%s\x1b[0m\r\n", help->name, help->params);
@@ -210,14 +330,20 @@ static void cliOutputCommandHelp(struct commandHelp *help, int group) {
}
/* Print generic help. */
-static void cliOutputGenericHelp() {
+static void cliOutputGenericHelp(void) {
sds version = cliVersion();
printf(
- "redis-cli %s\r\n"
- "Type: \"help @<group>\" to get a list of commands in <group>\r\n"
- " \"help <command>\" for help on <command>\r\n"
- " \"help <tab>\" to get a list of possible help topics\r\n"
- " \"quit\" to exit\r\n",
+ "redis-cli %s\n"
+ "To get help about Redis commands type:\n"
+ " \"help @<group>\" to get a list of commands in <group>\n"
+ " \"help <command>\" for help on <command>\n"
+ " \"help <tab>\" to get a list of possible help topics\n"
+ " \"quit\" to exit\n"
+ "\n"
+ "To set redis-cli preferences:\n"
+ " \":set hints\" enable online hints\n"
+ " \":set nohints\" disable online hints\n"
+ "Set your preferences in ~/.redisclirc\n",
version
);
sdsfree(version);
@@ -268,6 +394,7 @@ static void cliOutputHelp(int argc, char **argv) {
printf("\r\n");
}
+/* Linenoise completion callback. */
static void completionCallback(const char *buf, linenoiseCompletions *lc) {
size_t startpos = 0;
int mask;
@@ -296,12 +423,64 @@ static void completionCallback(const char *buf, linenoiseCompletions *lc) {
}
}
+/* Linenoise hints callback. */
+static char *hintsCallback(const char *buf, int *color, int *bold) {
+ if (!pref.hints) return NULL;
+
+ int i, argc, buflen = strlen(buf);
+ sds *argv = sdssplitargs(buf,&argc);
+ int endspace = buflen && isspace(buf[buflen-1]);
+
+ /* Check if the argument list is empty and return ASAP. */
+ if (argc == 0) {
+ sdsfreesplitres(argv,argc);
+ return NULL;
+ }
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ if (!(helpEntries[i].type & CLI_HELP_COMMAND)) continue;
+
+ if (strcasecmp(argv[0],helpEntries[i].full) == 0)
+ {
+ *color = 90;
+ *bold = 0;
+ sds hint = sdsnew(helpEntries[i].org->params);
+
+ /* Remove arguments from the returned hint to show only the
+ * ones the user did not yet typed. */
+ int toremove = argc-1;
+ while(toremove > 0 && sdslen(hint)) {
+ if (hint[0] == '[') break;
+ if (hint[0] == ' ') toremove--;
+ sdsrange(hint,1,-1);
+ }
+
+ /* Add an initial space if needed. */
+ if (!endspace) {
+ sds newhint = sdsnewlen(" ",1);
+ newhint = sdscatsds(newhint,hint);
+ sdsfree(hint);
+ hint = newhint;
+ }
+
+ sdsfreesplitres(argv,argc);
+ return hint;
+ }
+ }
+ sdsfreesplitres(argv,argc);
+ return NULL;
+}
+
+static void freeHintsCallback(void *ptr) {
+ sdsfree(ptr);
+}
+
/*------------------------------------------------------------------------------
* Networking / parsing
*--------------------------------------------------------------------------- */
/* Send AUTH command to the server */
-static int cliAuth() {
+static int cliAuth(void) {
redisReply *reply;
if (config.auth == NULL) return REDIS_OK;
@@ -314,14 +493,16 @@ static int cliAuth() {
}
/* Send SELECT dbnum to the server */
-static int cliSelect() {
+static int cliSelect(void) {
redisReply *reply;
if (config.dbnum == 0) return REDIS_OK;
reply = redisCommand(context,"SELECT %d",config.dbnum);
if (reply != NULL) {
+ int result = REDIS_OK;
+ if (reply->type == REDIS_REPLY_ERROR) result = REDIS_ERR;
freeReplyObject(reply);
- return REDIS_OK;
+ return result;
}
return REDIS_ERR;
}
@@ -330,8 +511,9 @@ static int cliSelect() {
* even if there is already a connected socket. */
static int cliConnect(int force) {
if (context == NULL || force) {
- if (context != NULL)
+ if (context != NULL) {
redisFree(context);
+ }
if (config.hostsocket == NULL) {
context = redisConnect(config.hostip,config.hostport);
@@ -365,7 +547,7 @@ static int cliConnect(int force) {
return REDIS_OK;
}
-static void cliPrintContextError() {
+static void cliPrintContextError(void) {
if (context == NULL) return;
fprintf(stderr,"Error: %s\n",context->errstr);
}
@@ -415,7 +597,7 @@ static sds cliFormatReplyTTY(redisReply *r, char *prefix) {
_prefix = sdscat(sdsnew(prefix),_prefixlen);
/* Setup prefix format for every entry */
- snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%dd) ",idxlen);
+ snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%ud) ",idxlen);
for (i = 0; i < r->elements; i++) {
/* Don't use the prefix for the first element, as the parent
@@ -437,6 +619,51 @@ static sds cliFormatReplyTTY(redisReply *r, char *prefix) {
return out;
}
+int isColorTerm(void) {
+ char *t = getenv("TERM");
+ return t != NULL && strstr(t,"xterm") != NULL;
+}
+
+/* Helpe function for sdsCatColorizedLdbReply() appending colorize strings
+ * to an SDS string. */
+sds sdscatcolor(sds o, char *s, size_t len, char *color) {
+ if (!isColorTerm()) return sdscatlen(o,s,len);
+
+ int bold = strstr(color,"bold") != NULL;
+ int ccode = 37; /* Defaults to white. */
+ if (strstr(color,"red")) ccode = 31;
+ else if (strstr(color,"red")) ccode = 31;
+ else if (strstr(color,"green")) ccode = 32;
+ else if (strstr(color,"yellow")) ccode = 33;
+ else if (strstr(color,"blue")) ccode = 34;
+ else if (strstr(color,"magenta")) ccode = 35;
+ else if (strstr(color,"cyan")) ccode = 36;
+ else if (strstr(color,"white")) ccode = 37;
+
+ o = sdscatfmt(o,"\033[%i;%i;49m",bold,ccode);
+ o = sdscatlen(o,s,len);
+ o = sdscat(o,"\033[0m");
+ return o;
+}
+
+/* Colorize Lua debugger status replies according to the prefix they
+ * have. */
+sds sdsCatColorizedLdbReply(sds o, char *s, size_t len) {
+ char *color = "white";
+
+ if (strstr(s,"<debug>")) color = "bold";
+ if (strstr(s,"<redis>")) color = "green";
+ if (strstr(s,"<reply>")) color = "cyan";
+ if (strstr(s,"<error>")) color = "red";
+ if (strstr(s,"<hint>")) color = "bold";
+ if (strstr(s,"<value>") || strstr(s,"<retval>")) color = "magenta";
+ if (len > 4 && isdigit(s[3])) {
+ if (s[1] == '>') color = "yellow"; /* Current line. */
+ else if (s[2] == '#') color = "bold"; /* Break point. */
+ }
+ return sdscatcolor(o,s,len,color);
+}
+
static sds cliFormatReplyRaw(redisReply *r) {
sds out = sdsempty(), tmp;
size_t i;
@@ -451,7 +678,24 @@ static sds cliFormatReplyRaw(redisReply *r) {
break;
case REDIS_REPLY_STATUS:
case REDIS_REPLY_STRING:
- out = sdscatlen(out,r->str,r->len);
+ if (r->type == REDIS_REPLY_STATUS && config.eval_ldb) {
+ /* The Lua debugger replies with arrays of simple (status)
+ * strings. We colorize the output for more fun if this
+ * is a debugging session. */
+
+ /* Detect the end of a debugging session. */
+ if (strstr(r->str,"<endsession>") == r->str) {
+ config.enable_ldb_on_eval = 0;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 1; /* Signal the caller session ended. */
+ config.output = OUTPUT_STANDARD;
+ cliRefreshPrompt();
+ } else {
+ out = sdsCatColorizedLdbReply(out,r->str,r->len);
+ }
+ } else {
+ out = sdscatlen(out,r->str,r->len);
+ }
break;
case REDIS_REPLY_INTEGER:
out = sdscatprintf(out,"%lld",r->integer);
@@ -490,7 +734,7 @@ static sds cliFormatReplyCSV(redisReply *r) {
out = sdscatrepr(out,r->str,r->len);
break;
case REDIS_REPLY_NIL:
- out = sdscat(out,"NIL\n");
+ out = sdscat(out,"NIL");
break;
case REDIS_REPLY_ARRAY:
for (i = 0; i < r->elements; i++) {
@@ -514,11 +758,15 @@ static int cliReadReply(int output_raw_strings) {
int output = 1;
if (redisGetReply(context,&_reply) != REDIS_OK) {
- if (config.shutdown)
+ if (config.shutdown) {
+ redisFree(context);
+ context = NULL;
return REDIS_OK;
+ }
if (config.interactive) {
/* Filter cases where we should reconnect */
- if (context->err == REDIS_ERR_IO && errno == ECONNRESET)
+ if (context->err == REDIS_ERR_IO &&
+ (errno == ECONNRESET || errno == EPIPE))
return REDIS_ERR;
if (context->err == REDIS_ERR_EOF)
return REDIS_ERR;
@@ -530,6 +778,8 @@ static int cliReadReply(int output_raw_strings) {
reply = (redisReply*)_reply;
+ config.last_cmd_type = reply->type;
+
/* Check if we need to connect to a different node and reissue the
* request. */
if (config.cluster_mode && reply->type == REDIS_REPLY_ERROR &&
@@ -548,7 +798,7 @@ static int cliReadReply(int output_raw_strings) {
p = strchr(s+1,' '); /* MOVED[S]3999[P]127.0.0.1:6381 */
*p = '\0';
slot = atoi(s+1);
- s = strchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */
+ s = strrchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */
*s = '\0';
sdsfree(config.hostip);
config.hostip = sdsnew(p+1);
@@ -586,7 +836,8 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
size_t *argvlen;
int j, output_raw;
- if (!strcasecmp(command,"help") || !strcasecmp(command,"?")) {
+ if (!config.eval_ldb && /* In debugging mode, let's pass "help" to Redis. */
+ (!strcasecmp(command,"help") || !strcasecmp(command,"?"))) {
cliOutputHelp(--argc, ++argv);
return REDIS_OK;
}
@@ -595,6 +846,11 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
output_raw = 0;
if (!strcasecmp(command,"info") ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"htstats")) ||
+ (argc >= 2 && !strcasecmp(command,"memory") &&
+ (!strcasecmp(argv[1],"malloc-stats") ||
+ !strcasecmp(argv[1],"doctor"))) ||
(argc == 2 && !strcasecmp(command,"cluster") &&
(!strcasecmp(argv[1],"nodes") ||
!strcasecmp(argv[1],"info"))) ||
@@ -615,8 +871,26 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
if (!strcasecmp(command,"sync") ||
!strcasecmp(command,"psync")) config.slave_mode = 1;
+ /* When the user manually calls SCRIPT DEBUG, setup the activation of
+ * debugging mode on the next eval if needed. */
+ if (argc == 3 && !strcasecmp(argv[0],"script") &&
+ !strcasecmp(argv[1],"debug"))
+ {
+ if (!strcasecmp(argv[2],"yes") || !strcasecmp(argv[2],"sync")) {
+ config.enable_ldb_on_eval = 1;
+ } else {
+ config.enable_ldb_on_eval = 0;
+ }
+ }
+
+ /* Actually activate LDB on EVAL if needed. */
+ if (!strcasecmp(command,"eval") && config.enable_ldb_on_eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ }
+
/* Setup argument length */
- argvlen = malloc(argc*sizeof(size_t));
+ argvlen = zmalloc(argc*sizeof(size_t));
for (j = 0; j < argc; j++)
argvlen[j] = sdslen(argv[j]);
@@ -639,37 +913,41 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
printf("Entering slave output mode... (press Ctrl-C to quit)\n");
slaveMode();
config.slave_mode = 0;
+ zfree(argvlen);
return REDIS_ERR; /* Error = slaveMode lost connection to master */
}
if (cliReadReply(output_raw) != REDIS_OK) {
- free(argvlen);
+ zfree(argvlen);
return REDIS_ERR;
} else {
/* Store database number when SELECT was successfully executed. */
- if (!strcasecmp(command,"select") && argc == 2) {
+ if (!strcasecmp(command,"select") && argc == 2 && config.last_cmd_type != REDIS_REPLY_ERROR) {
config.dbnum = atoi(argv[1]);
cliRefreshPrompt();
+ } else if (!strcasecmp(command,"auth") && argc == 2) {
+ cliSelect();
}
}
if (config.interval) usleep(config.interval);
fflush(stdout); /* Make it grep friendly */
}
- free(argvlen);
+ zfree(argvlen);
return REDIS_OK;
}
-/* Send the INFO command, reconnecting the link if needed. */
-static redisReply *reconnectingInfo(void) {
- redisContext *c = context;
+/* Send a command reconnecting the link if needed. */
+static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ...) {
redisReply *reply = NULL;
int tries = 0;
+ va_list ap;
assert(!c->err);
while(reply == NULL) {
while (c->err & (REDIS_ERR_IO | REDIS_ERR_EOF)) {
- printf("Reconnecting (%d)...\r", ++tries);
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
+ printf("Reconnecting... %d\r", ++tries);
fflush(stdout);
redisFree(c);
@@ -677,12 +955,15 @@ static redisReply *reconnectingInfo(void) {
usleep(1000000);
}
- reply = redisCommand(c,"INFO");
+ va_start(ap,fmt);
+ reply = redisvCommand(c,fmt,ap);
+ va_end(ap);
+
if (c->err && !(c->err & (REDIS_ERR_IO | REDIS_ERR_EOF))) {
fprintf(stderr, "Error: %s\n", c->errstr);
exit(1);
} else if (tries > 0) {
- printf("\n");
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
}
}
@@ -724,13 +1005,23 @@ static int parseOptions(int argc, char **argv) {
config.auth = argv[++i];
} else if (!strcmp(argv[i],"--raw")) {
config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"--no-raw")) {
+ config.output = OUTPUT_STANDARD;
} else if (!strcmp(argv[i],"--csv")) {
config.output = OUTPUT_CSV;
} else if (!strcmp(argv[i],"--latency")) {
config.latency_mode = 1;
+ } else if (!strcmp(argv[i],"--latency-dist")) {
+ config.latency_dist_mode = 1;
+ } else if (!strcmp(argv[i],"--mono")) {
+ spectrum_palette = spectrum_palette_mono;
+ spectrum_palette_size = spectrum_palette_mono_size;
} else if (!strcmp(argv[i],"--latency-history")) {
config.latency_mode = 1;
config.latency_history = 1;
+ } else if (!strcmp(argv[i],"--lru-test") && !lastarg) {
+ config.lru_test_mode = 1;
+ config.lru_test_sample_size = strtoll(argv[++i],NULL,10);
} else if (!strcmp(argv[i],"--slave")) {
config.slave_mode = 1;
} else if (!strcmp(argv[i],"--stat")) {
@@ -753,6 +1044,13 @@ static int parseOptions(int argc, char **argv) {
config.bigkeys = 1;
} else if (!strcmp(argv[i],"--eval") && !lastarg) {
config.eval = argv[++i];
+ } else if (!strcmp(argv[i],"--ldb")) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"--ldb-sync-mode")) {
+ config.eval_ldb = 1;
+ config.eval_ldb_sync = 1;
+ config.output = OUTPUT_RAW;
} else if (!strcmp(argv[i],"-c")) {
config.cluster_mode = 1;
} else if (!strcmp(argv[i],"-d") && !lastarg) {
@@ -775,6 +1073,13 @@ static int parseOptions(int argc, char **argv) {
}
}
}
+
+ /* --ldb requires --eval. */
+ if (config.eval_ldb && config.eval == NULL) {
+ fprintf(stderr,"Options --ldb and --ldb-sync-mode require --eval.\n");
+ fprintf(stderr,"Try %s --help for more information.\n", argv[0]);
+ exit(1);
+ }
return i;
}
@@ -795,7 +1100,7 @@ static sds readArgFromStdin(void) {
return arg;
}
-static void usage() {
+static void usage(void) {
sds version = cliVersion();
fprintf(stderr,
"redis-cli %s\n"
@@ -814,10 +1119,21 @@ static void usage() {
" -c Enable cluster mode (follow -ASK and -MOVED redirections).\n"
" --raw Use raw formatting for replies (default when STDOUT is\n"
" not a tty).\n"
+" --no-raw Force formatted output even when STDOUT is not a tty.\n"
" --csv Output in CSV format.\n"
+" --stat Print rolling stats about server: mem, clients, ...\n"
" --latency Enter a special mode continuously sampling latency.\n"
+" If you use this mode in an interactive session it runs\n"
+" forever displaying real-time stats. Otherwise if --raw or\n"
+" --csv is specified, or if you redirect the output to a non\n"
+" TTY, it samples the latency for 1 second (you can use\n"
+" -i to change the interval), then produces a single output\n"
+" and exits.\n"
" --latency-history Like --latency but tracking latency changes over time.\n"
" Default time interval is 15 sec. Change it using -i.\n"
+" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n"
+" Default time interval is 1 sec. Change it using -i.\n"
+" --lru-test <keys> Simulate a cache workload with an 80-20 distribution.\n"
" --slave Simulate a slave showing commands received from the master.\n"
" --rdb <filename> Transfer an RDB dump from remote server to local file.\n"
" --pipe Transfer raw Redis protocol from stdin to server.\n"
@@ -830,6 +1146,10 @@ static void usage() {
" --intrinsic-latency <sec> Run a test to measure intrinsic system latency.\n"
" The test will run for the specified amount of seconds.\n"
" --eval <file> Send an EVAL command using the Lua script at <file>.\n"
+" --ldb Used with --eval enable the Redis Lua debugger.\n"
+" --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n"
+" this mode the server is blocked and script changes are\n"
+" are not rolled back from the server memory.\n"
" --help Output this help and exit.\n"
" --version Output version and exit.\n"
"\n"
@@ -844,7 +1164,8 @@ static void usage() {
" (Note: when using --eval the comma separates KEYS[] from ARGV[] items)\n"
"\n"
"When no command is given, redis-cli starts in interactive mode.\n"
-"Type \"help\" in interactive mode for information on available commands.\n"
+"Type \"help\" in interactive mode for information on available commands\n"
+"and settings.\n"
"\n",
version, REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
sdsfree(version);
@@ -862,44 +1183,150 @@ static char **convertToSds(int count, char** args) {
return sds;
}
-#define LINE_BUFLEN 4096
-static void repl() {
+static int issueCommandRepeat(int argc, char **argv, long repeat) {
+ while (1) {
+ config.cluster_reissue_command = 0;
+ if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
+ cliConnect(1);
+
+ /* If we still cannot send the command print error.
+ * We'll try to reconnect the next time. */
+ if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
+ cliPrintContextError();
+ return REDIS_ERR;
+ }
+ }
+ /* Issue the command again if we got redirected in cluster mode */
+ if (config.cluster_mode && config.cluster_reissue_command) {
+ cliConnect(1);
+ } else {
+ break;
+ }
+ }
+ return REDIS_OK;
+}
+
+static int issueCommand(int argc, char **argv) {
+ return issueCommandRepeat(argc, argv, config.repeat);
+}
+
+/* Split the user provided command into multiple SDS arguments.
+ * This function normally uses sdssplitargs() from sds.c which is able
+ * to understand "quoted strings", escapes and so forth. However when
+ * we are in Lua debugging mode and the "eval" command is used, we want
+ * the remaining Lua script (after "e " or "eval ") to be passed verbatim
+ * as a single big argument. */
+static sds *cliSplitArgs(char *line, int *argc) {
+ if (config.eval_ldb && (strstr(line,"eval ") == line ||
+ strstr(line,"e ") == line))
+ {
+ sds *argv = sds_malloc(sizeof(sds)*2);
+ *argc = 2;
+ int len = strlen(line);
+ int elen = line[1] == ' ' ? 2 : 5; /* "e " or "eval "? */
+ argv[0] = sdsnewlen(line,elen-1);
+ argv[1] = sdsnewlen(line+elen,len-elen);
+ return argv;
+ } else {
+ return sdssplitargs(line,argc);
+ }
+}
+
+/* Set the CLI preferences. This function is invoked when an interactive
+ * ":command" is called, or when reading ~/.redisclirc file, in order to
+ * set user preferences. */
+void cliSetPreferences(char **argv, int argc, int interactive) {
+ if (!strcasecmp(argv[0],":set") && argc >= 2) {
+ if (!strcasecmp(argv[1],"hints")) pref.hints = 1;
+ else if (!strcasecmp(argv[1],"nohints")) pref.hints = 0;
+ else {
+ printf("%sunknown redis-cli preference '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[1]);
+ }
+ } else {
+ printf("%sunknown redis-cli internal command '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[0]);
+ }
+}
+
+/* Load the ~/.redisclirc file if any. */
+void cliLoadPreferences(void) {
+ sds rcfile = getDotfilePath(REDIS_CLI_RCFILE_ENV,REDIS_CLI_RCFILE_DEFAULT);
+ if (rcfile == NULL) return;
+ FILE *fp = fopen(rcfile,"r");
+ char buf[1024];
+
+ if (fp) {
+ while(fgets(buf,sizeof(buf),fp) != NULL) {
+ sds *argv;
+ int argc;
+
+ argv = sdssplitargs(buf,&argc);
+ if (argc > 0) cliSetPreferences(argv,argc,0);
+ sdsfreesplitres(argv,argc);
+ }
+ fclose(fp);
+ }
+ sdsfree(rcfile);
+}
+
+static void repl(void) {
sds historyfile = NULL;
int history = 0;
char *line;
int argc;
sds *argv;
+ /* Initialize the help and, if possible, use the COMMAND command in order
+ * to retrieve missing entries. */
+ cliInitHelp();
+ cliIntegrateHelp();
+
config.interactive = 1;
linenoiseSetMultiLine(1);
linenoiseSetCompletionCallback(completionCallback);
+ linenoiseSetHintsCallback(hintsCallback);
+ linenoiseSetFreeHintsCallback(freeHintsCallback);
- /* Only use history when stdin is a tty. */
+ /* Only use history and load the rc file when stdin is a tty. */
if (isatty(fileno(stdin))) {
- history = 1;
-
- if (getenv("HOME") != NULL) {
- historyfile = sdscatprintf(sdsempty(),"%s/.rediscli_history",getenv("HOME"));
+ historyfile = getDotfilePath(REDIS_CLI_HISTFILE_ENV,REDIS_CLI_HISTFILE_DEFAULT);
+ if (historyfile != NULL) {
+ history = 1;
linenoiseHistoryLoad(historyfile);
}
+ cliLoadPreferences();
}
cliRefreshPrompt();
while((line = linenoise(context ? config.prompt : "not connected> ")) != NULL) {
if (line[0] != '\0') {
- argv = sdssplitargs(line,&argc);
+ argv = cliSplitArgs(line,&argc);
if (history) linenoiseHistoryAdd(line);
if (historyfile) linenoiseHistorySave(historyfile);
if (argv == NULL) {
printf("Invalid argument(s)\n");
- free(line);
+ linenoiseFree(line);
continue;
} else if (argc > 0) {
if (strcasecmp(argv[0],"quit") == 0 ||
strcasecmp(argv[0],"exit") == 0)
{
exit(0);
+ } else if (argv[0][0] == ':') {
+ cliSetPreferences(argv,argc,1);
+ continue;
+ } else if (strcasecmp(argv[0],"restart") == 0) {
+ if (config.eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ return; /* Return to evalMode to restart the session. */
+ } else {
+ printf("Use 'restart' only in Lua debugging mode.");
+ }
} else if (argc == 3 && !strcasecmp(argv[0],"connect")) {
sdsfree(config.hostip);
config.hostip = sdsnew(argv[1]);
@@ -911,36 +1338,31 @@ static void repl() {
} else {
long long start_time = mstime(), elapsed;
int repeat, skipargs = 0;
+ char *endptr;
- repeat = atoi(argv[0]);
- if (argc > 1 && repeat) {
+ repeat = strtol(argv[0], &endptr, 10);
+ if (argc > 1 && *endptr == '\0' && repeat) {
skipargs = 1;
} else {
repeat = 1;
}
- while (1) {
- config.cluster_reissue_command = 0;
- if (cliSendCommand(argc-skipargs,argv+skipargs,repeat)
- != REDIS_OK)
- {
- cliConnect(1);
-
- /* If we still cannot send the command print error.
- * We'll try to reconnect the next time. */
- if (cliSendCommand(argc-skipargs,argv+skipargs,repeat)
- != REDIS_OK)
- cliPrintContextError();
- }
- /* Issue the command again if we got redirected in cluster mode */
- if (config.cluster_mode && config.cluster_reissue_command) {
- cliConnect(1);
- } else {
- break;
- }
+ issueCommandRepeat(argc-skipargs, argv+skipargs, repeat);
+
+ /* If our debugging session ended, show the EVAL final
+ * reply. */
+ if (config.eval_ldb_end) {
+ config.eval_ldb_end = 0;
+ cliReadReply(0);
+ printf("\n(Lua debugging session ended%s)\n\n",
+ config.eval_ldb_sync ? "" :
+ " -- dataset changes rolled back");
}
+
elapsed = mstime()-start_time;
- if (elapsed >= 500) {
+ if (elapsed >= 500 &&
+ config.output == OUTPUT_STANDARD)
+ {
printf("(%.2fs)\n",(double)elapsed/1000);
}
}
@@ -949,7 +1371,7 @@ static void repl() {
sdsfreesplitres(argv,argc);
}
/* linenoise() returns malloc-ed lines like readline() */
- free(line);
+ linenoiseFree(line);
}
exit(0);
}
@@ -959,10 +1381,9 @@ static int noninteractive(int argc, char **argv) {
if (config.stdinarg) {
argv = zrealloc(argv, (argc+1)*sizeof(char*));
argv[argc] = readArgFromStdin();
- retval = cliSendCommand(argc+1, argv, config.repeat);
+ retval = issueCommand(argc+1, argv);
} else {
- /* stdin is probably a tty, can be tested with S_ISCHR(s.st_mode) */
- retval = cliSendCommand(argc, argv, config.repeat);
+ retval = issueCommand(argc, argv);
}
return retval;
}
@@ -972,47 +1393,104 @@ static int noninteractive(int argc, char **argv) {
*--------------------------------------------------------------------------- */
static int evalMode(int argc, char **argv) {
- sds script = sdsempty();
+ sds script = NULL;
FILE *fp;
char buf[1024];
size_t nread;
char **argv2;
- int j, got_comma = 0, keys = 0;
+ int j, got_comma, keys;
+ int retval = REDIS_OK;
- /* Load the script from the file, as an sds string. */
- fp = fopen(config.eval,"r");
- if (!fp) {
- fprintf(stderr,
- "Can't open file '%s': %s\n", config.eval, strerror(errno));
- exit(1);
- }
- while((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
- script = sdscatlen(script,buf,nread);
- }
- fclose(fp);
+ while(1) {
+ if (config.eval_ldb) {
+ printf(
+ "Lua debugging session started, please use:\n"
+ "quit -- End the session.\n"
+ "restart -- Restart the script in debug mode again.\n"
+ "help -- Show Lua script debugging commands.\n\n"
+ );
+ }
- /* Create our argument vector */
- argv2 = zmalloc(sizeof(sds)*(argc+3));
- argv2[0] = sdsnew("EVAL");
- argv2[1] = script;
- for (j = 0; j < argc; j++) {
- if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) {
- got_comma = 1;
- continue;
+ sdsfree(script);
+ script = sdsempty();
+ got_comma = 0;
+ keys = 0;
+
+ /* Load the script from the file, as an sds string. */
+ fp = fopen(config.eval,"r");
+ if (!fp) {
+ fprintf(stderr,
+ "Can't open file '%s': %s\n", config.eval, strerror(errno));
+ exit(1);
+ }
+ while((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
+ script = sdscatlen(script,buf,nread);
+ }
+ fclose(fp);
+
+ /* If we are debugging a script, enable the Lua debugger. */
+ if (config.eval_ldb) {
+ redisReply *reply = redisCommand(context,
+ config.eval_ldb_sync ?
+ "SCRIPT DEBUG sync": "SCRIPT DEBUG yes");
+ if (reply) freeReplyObject(reply);
}
- argv2[j+3-got_comma] = sdsnew(argv[j]);
- if (!got_comma) keys++;
- }
- argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
- /* Call it */
- return cliSendCommand(argc+3-got_comma, argv2, config.repeat);
+ /* Create our argument vector */
+ argv2 = zmalloc(sizeof(sds)*(argc+3));
+ argv2[0] = sdsnew("EVAL");
+ argv2[1] = script;
+ for (j = 0; j < argc; j++) {
+ if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) {
+ got_comma = 1;
+ continue;
+ }
+ argv2[j+3-got_comma] = sdsnew(argv[j]);
+ if (!got_comma) keys++;
+ }
+ argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
+
+ /* Call it */
+ int eval_ldb = config.eval_ldb; /* Save it, may be reverteed. */
+ retval = issueCommand(argc+3-got_comma, argv2);
+ if (eval_ldb) {
+ if (!config.eval_ldb) {
+ /* If the debugging session ended immediately, there was an
+ * error compiling the script. Show it and don't enter
+ * the REPL at all. */
+ printf("Eval debugging session can't start:\n");
+ cliReadReply(0);
+ break; /* Return to the caller. */
+ } else {
+ strncpy(config.prompt,"lua debugger> ",sizeof(config.prompt));
+ repl();
+ /* Restart the session if repl() returned. */
+ cliConnect(1);
+ printf("\n");
+ }
+ } else {
+ break; /* Return to the caller. */
+ }
+ }
+ return retval;
}
/*------------------------------------------------------------------------------
* Latency and latency history modes
*--------------------------------------------------------------------------- */
+static void latencyModePrint(long long min, long long max, double avg, long long count) {
+ if (config.output == OUTPUT_STANDARD) {
+ printf("min: %lld, max: %lld, avg: %.2f (%lld samples)",
+ min, max, avg, count);
+ fflush(stdout);
+ } else if (config.output == OUTPUT_CSV) {
+ printf("%lld,%lld,%.2f,%lld\n", min, max, avg, count);
+ } else if (config.output == OUTPUT_RAW) {
+ printf("%lld %lld %.2f %lld\n", min, max, avg, count);
+ }
+}
+
#define LATENCY_SAMPLE_RATE 10 /* milliseconds. */
#define LATENCY_HISTORY_DEFAULT_INTERVAL 15000 /* milliseconds. */
static void latencyMode(void) {
@@ -1024,10 +1502,18 @@ static void latencyMode(void) {
double avg;
long long history_start = mstime();
+ /* Set a default for the interval in case of --latency option
+ * with --raw, --csv or when it is redirected to non tty. */
+ if (config.interval == 0) {
+ config.interval = 1000;
+ } else {
+ config.interval /= 1000; /* We need to convert to milliseconds. */
+ }
+
if (!context) exit(1);
while(1) {
start = mstime();
- reply = redisCommand(context,"PING");
+ reply = reconnectingRedisCommand(context,"PING");
if (reply == NULL) {
fprintf(stderr,"\nI/O error\n");
exit(1);
@@ -1044,9 +1530,19 @@ static void latencyMode(void) {
tot += latency;
avg = (double) tot/count;
}
- printf("\x1b[0G\x1b[2Kmin: %lld, max: %lld, avg: %.2f (%lld samples)",
- min, max, avg, count);
- fflush(stdout);
+
+ if (config.output == OUTPUT_STANDARD) {
+ printf("\x1b[0G\x1b[2K"); /* Clear the line. */
+ latencyModePrint(min,max,avg,count);
+ } else {
+ if (config.latency_history) {
+ latencyModePrint(min,max,avg,count);
+ } else if (mstime()-history_start > config.interval) {
+ latencyModePrint(min,max,avg,count);
+ exit(0);
+ }
+ }
+
if (config.latency_history && mstime()-history_start > history_interval)
{
printf(" -- %.2f seconds range\n", (float)(mstime()-history_start)/1000);
@@ -1058,6 +1554,148 @@ static void latencyMode(void) {
}
/*------------------------------------------------------------------------------
+ * Latency distribution mode -- requires 256 colors xterm
+ *--------------------------------------------------------------------------- */
+
+#define LATENCY_DIST_DEFAULT_INTERVAL 1000 /* milliseconds. */
+
+/* Structure to store samples distribution. */
+struct distsamples {
+ long long max; /* Max latency to fit into this interval (usec). */
+ long long count; /* Number of samples in this interval. */
+ int character; /* Associated character in visualization. */
+};
+
+/* Helper function for latencyDistMode(). Performs the spectrum visualization
+ * of the collected samples targeting an xterm 256 terminal.
+ *
+ * Takes an array of distsamples structures, ordered from smaller to bigger
+ * 'max' value. Last sample max must be 0, to mean that it olds all the
+ * samples greater than the previous one, and is also the stop sentinel.
+ *
+ * "tot' is the total number of samples in the different buckets, so it
+ * is the SUM(samples[i].conut) for i to 0 up to the max sample.
+ *
+ * As a side effect the function sets all the buckets count to 0. */
+void showLatencyDistSamples(struct distsamples *samples, long long tot) {
+ int j;
+
+ /* We convert samples into a index inside the palette
+ * proportional to the percentage a given bucket represents.
+ * This way intensity of the different parts of the spectrum
+ * don't change relative to the number of requests, which avoids to
+ * pollute the visualization with non-latency related info. */
+ printf("\033[38;5;0m"); /* Set foreground color to black. */
+ for (j = 0; ; j++) {
+ int coloridx =
+ ceil((float) samples[j].count / tot * (spectrum_palette_size-1));
+ int color = spectrum_palette[coloridx];
+ printf("\033[48;5;%dm%c", (int)color, samples[j].character);
+ samples[j].count = 0;
+ if (samples[j].max == 0) break; /* Last sample. */
+ }
+ printf("\033[0m\n");
+ fflush(stdout);
+}
+
+/* Show the legend: different buckets values and colors meaning, so
+ * that the spectrum is more easily readable. */
+void showLatencyDistLegend(void) {
+ int j;
+
+ printf("---------------------------------------------\n");
+ printf(". - * # .01 .125 .25 .5 milliseconds\n");
+ printf("1,2,3,...,9 from 1 to 9 milliseconds\n");
+ printf("A,B,C,D,E 10,20,30,40,50 milliseconds\n");
+ printf("F,G,H,I,J .1,.2,.3,.4,.5 seconds\n");
+ printf("K,L,M,N,O,P,Q,? 1,2,4,8,16,30,60,>60 seconds\n");
+ printf("From 0 to 100%%: ");
+ for (j = 0; j < spectrum_palette_size; j++) {
+ printf("\033[48;5;%dm ", spectrum_palette[j]);
+ }
+ printf("\033[0m\n");
+ printf("---------------------------------------------\n");
+}
+
+static void latencyDistMode(void) {
+ redisReply *reply;
+ long long start, latency, count = 0;
+ long long history_interval =
+ config.interval ? config.interval/1000 :
+ LATENCY_DIST_DEFAULT_INTERVAL;
+ long long history_start = ustime();
+ int j, outputs = 0;
+
+ struct distsamples samples[] = {
+ /* We use a mostly logarithmic scale, with certain linear intervals
+ * which are more interesting than others, like 1-10 milliseconds
+ * range. */
+ {10,0,'.'}, /* 0.01 ms */
+ {125,0,'-'}, /* 0.125 ms */
+ {250,0,'*'}, /* 0.25 ms */
+ {500,0,'#'}, /* 0.5 ms */
+ {1000,0,'1'}, /* 1 ms */
+ {2000,0,'2'}, /* 2 ms */
+ {3000,0,'3'}, /* 3 ms */
+ {4000,0,'4'}, /* 4 ms */
+ {5000,0,'5'}, /* 5 ms */
+ {6000,0,'6'}, /* 6 ms */
+ {7000,0,'7'}, /* 7 ms */
+ {8000,0,'8'}, /* 8 ms */
+ {9000,0,'9'}, /* 9 ms */
+ {10000,0,'A'}, /* 10 ms */
+ {20000,0,'B'}, /* 20 ms */
+ {30000,0,'C'}, /* 30 ms */
+ {40000,0,'D'}, /* 40 ms */
+ {50000,0,'E'}, /* 50 ms */
+ {100000,0,'F'}, /* 0.1 s */
+ {200000,0,'G'}, /* 0.2 s */
+ {300000,0,'H'}, /* 0.3 s */
+ {400000,0,'I'}, /* 0.4 s */
+ {500000,0,'J'}, /* 0.5 s */
+ {1000000,0,'K'}, /* 1 s */
+ {2000000,0,'L'}, /* 2 s */
+ {4000000,0,'M'}, /* 4 s */
+ {8000000,0,'N'}, /* 8 s */
+ {16000000,0,'O'}, /* 16 s */
+ {30000000,0,'P'}, /* 30 s */
+ {60000000,0,'Q'}, /* 1 minute */
+ {0,0,'?'}, /* > 1 minute */
+ };
+
+ if (!context) exit(1);
+ while(1) {
+ start = ustime();
+ reply = reconnectingRedisCommand(context,"PING");
+ if (reply == NULL) {
+ fprintf(stderr,"\nI/O error\n");
+ exit(1);
+ }
+ latency = ustime()-start;
+ freeReplyObject(reply);
+ count++;
+
+ /* Populate the relevant bucket. */
+ for (j = 0; ; j++) {
+ if (samples[j].max == 0 || latency <= samples[j].max) {
+ samples[j].count++;
+ break;
+ }
+ }
+
+ /* From time to time show the spectrum. */
+ if (count && (ustime()-history_start)/1000 > history_interval) {
+ if ((outputs++ % 20) == 0)
+ showLatencyDistLegend();
+ showLatencyDistSamples(samples,count);
+ history_start = ustime();
+ count = 0;
+ }
+ usleep(LATENCY_SAMPLE_RATE * 1000);
+ }
+}
+
+/*------------------------------------------------------------------------------
* Slave mode
*--------------------------------------------------------------------------- */
@@ -1177,6 +1815,7 @@ static void getRDB(void) {
* Bulk import (pipe) mode
*--------------------------------------------------------------------------- */
+#define PIPEMODE_WRITE_LOOP_MAX_BYTES (128*1024)
static void pipeMode(void) {
int fd = context->fd;
long long errors = 0, replies = 0, obuf_len = 0, obuf_pos = 0;
@@ -1253,6 +1892,8 @@ static void pipeMode(void) {
/* Handle the writable state: we can send protocol to the server. */
if (mask & AE_WRITABLE) {
+ ssize_t loop_nwritten = 0;
+
while(1) {
/* Transfer current buffer to server. */
if (obuf_len != 0) {
@@ -1269,6 +1910,7 @@ static void pipeMode(void) {
}
obuf_len -= nwritten;
obuf_pos += nwritten;
+ loop_nwritten += nwritten;
if (obuf_len != 0) break; /* Can't accept more data. */
}
/* If buffer is empty, load from stdin. */
@@ -1304,7 +1946,8 @@ static void pipeMode(void) {
obuf_pos = 0;
}
}
- if (obuf_len == 0 && eof) break;
+ if ((obuf_len == 0 && eof) ||
+ loop_nwritten > PIPEMODE_WRITE_LOOP_MAX_BYTES) break;
}
}
@@ -1362,7 +2005,7 @@ static redisReply *sendScan(unsigned long long *it) {
assert(reply->element[1]->type == REDIS_REPLY_ARRAY);
/* Update iterator */
- *it = atoi(reply->element[0]->str);
+ *it = strtoull(reply->element[0]->str, NULL, 10);
return reply;
}
@@ -1406,7 +2049,7 @@ static int toIntType(char *key, char *type) {
static void getKeyTypes(redisReply *keys, int *types) {
redisReply *reply;
- int i;
+ unsigned int i;
/* Pipeline TYPE commands */
for(i=0;i<keys->elements;i++) {
@@ -1420,8 +2063,13 @@ static void getKeyTypes(redisReply *keys, int *types) {
keys->element[i]->str, context->err, context->errstr);
exit(1);
} else if(reply->type != REDIS_REPLY_STATUS) {
- fprintf(stderr, "Invalid reply type (%d) for TYPE on key '%s'!\n",
- reply->type, keys->element[i]->str);
+ if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "TYPE returned an error: %s\n", reply->str);
+ } else {
+ fprintf(stderr,
+ "Invalid reply type (%d) for TYPE on key '%s'!\n",
+ reply->type, keys->element[i]->str);
+ }
exit(1);
}
@@ -1435,7 +2083,7 @@ static void getKeySizes(redisReply *keys, int *types,
{
redisReply *reply;
char *sizecmds[] = {"STRLEN","LLEN","SCARD","HLEN","ZCARD"};
- int i;
+ unsigned int i;
/* Pipeline size commands */
for(i=0;i<keys->elements;i++) {
@@ -1482,7 +2130,8 @@ static void findBigKeys(void) {
char *typename[] = {"string","list","set","hash","zset"};
char *typeunit[] = {"bytes","items","members","fields","members"};
redisReply *reply, *keys;
- int type, *types=NULL, arrsize=0, i;
+ unsigned int arrsize=0, i;
+ int type, *types=NULL;
double pct;
/* Total keys pre scanning */
@@ -1497,7 +2146,7 @@ static void findBigKeys(void) {
for(i=0;i<TYPE_NONE; i++) {
maxkeys[i] = sdsempty();
if(!maxkeys[i]) {
- fprintf(stderr, "Failed to allocate memory for largest key names!");
+ fprintf(stderr, "Failed to allocate memory for largest key names!\n");
exit(1);
}
}
@@ -1622,7 +2271,7 @@ static char *getInfoField(char *info, char *field) {
n1 = strchr(p,'\r');
n2 = strchr(p,',');
if (n2 && n2 < n1) n1 = n2;
- result = malloc(sizeof(char)*(n1-p)+1);
+ result = zmalloc(sizeof(char)*(n1-p)+1);
memcpy(result,p,(n1-p));
result[n1-p] = '\0';
return result;
@@ -1636,7 +2285,7 @@ static long getLongInfoField(char *info, char *field) {
if (!value) return LONG_MIN;
l = strtol(value,NULL,10);
- free(value);
+ zfree(value);
return l;
}
@@ -1652,7 +2301,7 @@ void bytesToHuman(char *s, long long n) {
}
if (n < 1024) {
/* Bytes */
- sprintf(s,"%lluB",n);
+ sprintf(s,"%lldB",n);
return;
} else if (n < (1024*1024)) {
d = (double)n/(1024);
@@ -1666,7 +2315,7 @@ void bytesToHuman(char *s, long long n) {
}
}
-static void statMode() {
+static void statMode(void) {
redisReply *reply;
long aux, requests = 0;
int i = 0;
@@ -1675,7 +2324,7 @@ static void statMode() {
char buf[64];
int j;
- reply = reconnectingInfo();
+ reply = reconnectingRedisCommand(context,"INFO");
if (reply->type == REDIS_REPLY_ERROR) {
printf("ERROR: %s\n", reply->str);
exit(1);
@@ -1729,6 +2378,7 @@ static void statMode() {
/* Children */
aux = getLongInfoField(reply->str,"bgsave_in_progress");
aux |= getLongInfoField(reply->str,"aof_rewrite_in_progress") << 1;
+ aux |= getLongInfoField(reply->str,"loading") << 2;
switch(aux) {
case 0: break;
case 1:
@@ -1740,6 +2390,9 @@ static void statMode() {
case 3:
printf("SAVE+AOF");
break;
+ case 4:
+ printf("LOAD");
+ break;
}
printf("\n");
@@ -1752,7 +2405,7 @@ static void statMode() {
* Scan mode
*--------------------------------------------------------------------------- */
-static void scanMode() {
+static void scanMode(void) {
redisReply *reply;
unsigned long long cur = 0;
@@ -1769,7 +2422,7 @@ static void scanMode() {
printf("ERROR: %s\n", reply->str);
exit(1);
} else {
- int j;
+ unsigned int j;
cur = strtoull(reply->element[0]->str,NULL,10);
for (j = 0; j < reply->element[1]->elements; j++)
@@ -1782,6 +2435,97 @@ static void scanMode() {
}
/*------------------------------------------------------------------------------
+ * LRU test mode
+ *--------------------------------------------------------------------------- */
+
+/* Return an integer from min to max (both inclusive) using a power-law
+ * distribution, depending on the value of alpha: the greater the alpha
+ * the more bias towards lower values.
+ *
+ * With alpha = 6.2 the output follows the 80-20 rule where 20% of
+ * the returned numbers will account for 80% of the frequency. */
+long long powerLawRand(long long min, long long max, double alpha) {
+ double pl, r;
+
+ max += 1;
+ r = ((double)rand()) / RAND_MAX;
+ pl = pow(
+ ((pow(max,alpha+1) - pow(min,alpha+1))*r + pow(min,alpha+1)),
+ (1.0/(alpha+1)));
+ return (max-1-(long long)pl)+min;
+}
+
+/* Generates a key name among a set of lru_test_sample_size keys, using
+ * an 80-20 distribution. */
+void LRUTestGenKey(char *buf, size_t buflen) {
+ snprintf(buf, buflen, "lru:%lld",
+ powerLawRand(1, config.lru_test_sample_size, 6.2));
+}
+
+#define LRU_CYCLE_PERIOD 1000 /* 1000 milliseconds. */
+#define LRU_CYCLE_PIPELINE_SIZE 250
+static void LRUTestMode(void) {
+ redisReply *reply;
+ char key[128];
+ long long start_cycle;
+ int j;
+
+ srand(time(NULL)^getpid());
+ while(1) {
+ /* Perform cycles of 1 second with 50% writes and 50% reads.
+ * We use pipelining batching writes / reads N times per cycle in order
+ * to fill the target instance easily. */
+ start_cycle = mstime();
+ long long hits = 0, misses = 0;
+ while(mstime() - start_cycle < 1000) {
+ /* Write cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ char val[6];
+ val[5] = '\0';
+ for (int i = 0; i < 5; i++) val[i] = 'A'+rand()%('z'-'A');
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "SET %s %s",key,val);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++)
+ redisGetReply(context, (void**)&reply);
+
+ /* Read cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "GET %s",key);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ if (redisGetReply(context, (void**)&reply) == REDIS_OK) {
+ switch(reply->type) {
+ case REDIS_REPLY_ERROR:
+ printf("%s\n", reply->str);
+ break;
+ case REDIS_REPLY_NIL:
+ misses++;
+ break;
+ default:
+ hits++;
+ break;
+ }
+ }
+ }
+
+ if (context->err) {
+ fprintf(stderr,"I/O error during LRU test\n");
+ exit(1);
+ }
+ }
+ /* Print stats. */
+ printf(
+ "%lld Gets/sec | Hits: %lld (%.2f%%) | Misses: %lld (%.2f%%)\n",
+ hits+misses,
+ hits, (double)hits/(hits+misses)*100,
+ misses, (double)misses/(hits+misses)*100);
+ }
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
* Intrisic latency mode.
*
* Measure max latency of a running process that does not result from
@@ -1813,7 +2557,7 @@ unsigned long compute_something_fast(void) {
}
static void intrinsicLatencyModeStop(int s) {
- REDIS_NOTUSED(s);
+ UNUSED(s);
force_cancel_loop = 1;
}
@@ -1840,11 +2584,15 @@ static void intrinsicLatencyMode(void) {
printf("Max latency so far: %lld microseconds.\n", max_latency);
}
+ double avg_us = (double)run_time/runs;
+ double avg_ns = avg_us * 1e3;
if (force_cancel_loop || end > test_end) {
- printf("\n%lld total runs (avg %lld microseconds per run).\n",
- runs, run_time/runs);
- printf("Worst run took %.02fx times the average.\n",
- (double) max_latency / (run_time/runs));
+ printf("\n%lld total runs "
+ "(avg latency: "
+ "%.4f microseconds / %.2f nanoseconds per run).\n",
+ runs, avg_us, avg_ns);
+ printf("Worst run took %.0fx longer than the average latency.\n",
+ max_latency / avg_us);
exit(0);
}
}
@@ -1868,7 +2616,10 @@ int main(int argc, char **argv) {
config.monitor_mode = 0;
config.pubsub_mode = 0;
config.latency_mode = 0;
+ config.latency_dist_mode = 0;
config.latency_history = 0;
+ config.lru_test_mode = 0;
+ config.lru_test_sample_size = 0;
config.cluster_mode = 0;
config.slave_mode = 0;
config.getrdb_mode = 0;
@@ -1883,12 +2634,22 @@ int main(int argc, char **argv) {
config.stdinarg = 0;
config.auth = NULL;
config.eval = NULL;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 0;
+ config.eval_ldb_sync = 0;
+ config.enable_ldb_on_eval = 0;
+ config.last_cmd_type = -1;
+
+ pref.hints = 1;
+
+ spectrum_palette = spectrum_palette_color;
+ spectrum_palette_size = spectrum_palette_color_size;
+
if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL))
config.output = OUTPUT_RAW;
else
config.output = OUTPUT_STANDARD;
config.mb_delim = sdsnew("\n");
- cliInitHelp();
firstarg = parseOptions(argc,argv);
argc -= firstarg;
@@ -1900,6 +2661,12 @@ int main(int argc, char **argv) {
latencyMode();
}
+ /* Latency distribution mode */
+ if (config.latency_dist_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ latencyDistMode();
+ }
+
/* Slave mode */
if (config.slave_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
@@ -1937,11 +2704,20 @@ int main(int argc, char **argv) {
scanMode();
}
+ /* LRU test mode */
+ if (config.lru_test_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ LRUTestMode();
+ }
+
/* Intrinsic latency mode */
if (config.intrinsic_latency_mode) intrinsicLatencyMode();
/* Start interactive mode when no command is provided */
if (argc == 0 && !config.eval) {
+ /* Ignore SIGPIPE in interactive mode to force a reconnect */
+ signal(SIGPIPE, SIG_IGN);
+
/* Note that in repl mode we don't abort on connection error.
* A new attempt will be performed for every command send. */
cliConnect(0);
diff --git a/src/redis-trib.rb b/src/redis-trib.rb
index ee11bb904..39db97947 100755
--- a/src/redis-trib.rb
+++ b/src/redis-trib.rb
@@ -25,6 +25,11 @@ require 'rubygems'
require 'redis'
ClusterHashSlots = 16384
+MigrateDefaultTimeout = 60000
+MigrateDefaultPipeline = 10
+RebalanceDefaultThreshold = 2
+
+$verbose = false
def xputs(s)
case s[0..2]
@@ -32,6 +37,8 @@ def xputs(s)
color="29;1"
when "[ER"
color="31;1"
+ when "[WA"
+ color="31;1"
when "[OK"
color="32"
when "[FA","***"
@@ -49,15 +56,17 @@ end
class ClusterNode
def initialize(addr)
- s = addr.split(":")
- if s.length != 2
- puts "Invalid node name #{addr}"
- exit 1
+ s = addr.split("@")[0].split(":")
+ if s.length < 2
+ puts "Invalid IP or Port (given as #{addr}) - use IP:Port format"
+ exit 1
end
+ port = s.pop # removes port from split array
+ ip = s.join(":") # if s.length > 1 here, it's IPv6, so restore address
@r = nil
@info = {}
- @info[:host] = s[0]
- @info[:port] = s[1]
+ @info[:host] = ip
+ @info[:port] = port
@info[:slots] = {}
@info[:migrating] = {}
@info[:importing] = {}
@@ -70,7 +79,7 @@ class ClusterNode
@friends
end
- def slots
+ def slots
@info[:slots]
end
@@ -84,7 +93,7 @@ class ClusterNode
def connect(o={})
return if @r
- print "Connecting to node #{self}: "
+ print "Connecting to node #{self}: " if $verbose
STDOUT.flush
begin
@r = Redis.new(:host => @info[:host], :port => @info[:port], :timeout => 60)
@@ -94,7 +103,7 @@ class ClusterNode
exit 1 if o[:abort]
@r = nil
end
- xputs "OK"
+ xputs "OK" if $verbose
end
def assert_cluster
@@ -152,7 +161,7 @@ class ClusterNode
end
} if slots
@dirty = false
- @r.cluster("info").split("\n").each{|e|
+ @r.cluster("info").split("\n").each{|e|
k,v=e.split(":")
k = k.to_sym
v.chop!
@@ -211,7 +220,7 @@ class ClusterNode
#
# Note: this could be easily written without side effects,
# we use 'slots' just to split the computation into steps.
-
+
# First step: we want an increasing array of integers
# for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
slots = @info[:slots].keys.sort
@@ -271,7 +280,7 @@ class ClusterNode
def info
@info
end
-
+
def is_dirty?
@dirty
end
@@ -286,6 +295,7 @@ class RedisTrib
@nodes = []
@fix = false
@errors = []
+ @timeout = MigrateDefaultTimeout
end
def check_arity(req_args, num_args)
@@ -300,11 +310,16 @@ class RedisTrib
@nodes << node
end
+ def reset_nodes
+ @nodes = []
+ end
+
def cluster_error(msg)
@errors << msg
xputs msg
end
+ # Return the node with the specified ID or Nil.
def get_node_by_name(name)
@nodes.each{|n|
return n if n.info[:name] == name.downcase
@@ -312,6 +327,21 @@ class RedisTrib
return nil
end
+ # Like get_node_by_name but the specified name can be just the first
+ # part of the node ID as long as the prefix in unique across the
+ # cluster.
+ def get_node_by_abbreviated_name(name)
+ l = name.length
+ candidates = []
+ @nodes.each{|n|
+ if n.info[:name][0...l] == name.downcase
+ candidates << n
+ end
+ }
+ return nil if candidates.length != 1
+ candidates[0]
+ end
+
# This function returns the master that has the least number of replicas
# in the cluster. If there are multiple masters with the same smaller
# number of replicas, one at random is returned.
@@ -323,14 +353,30 @@ class RedisTrib
sorted[0]
end
- def check_cluster
+ def check_cluster(opt={})
xputs ">>> Performing Cluster Check (using node #{@nodes[0]})"
- show_nodes
+ show_nodes if !opt[:quiet]
check_config_consistency
check_open_slots
check_slots_coverage
end
+ def show_cluster_info
+ masters = 0
+ keys = 0
+ @nodes.each{|n|
+ if n.has_flag?("master")
+ puts "#{n} (#{n.info[:name][0...8]}...) -> #{n.r.dbsize} keys | #{n.slots.length} slots | "+
+ "#{n.info[:replicas].length} slaves."
+ masters += 1
+ keys += n.r.dbsize
+ end
+ }
+ xputs "[OK] #{keys} keys in #{masters} masters."
+ keys_per_slot = sprintf("%.2f",keys/16384.0)
+ puts "#{keys_per_slot} keys per slot on average."
+ end
+
# Merge slots of every known node. If the resulting slots are equal
# to ClusterHashSlots, then all slots are served.
def covered_slots
@@ -361,7 +407,8 @@ class RedisTrib
cluster_error \
"[WARNING] Node #{n} has slots in migrating state (#{n.info[:migrating].keys.join(",")})."
open_slots += n.info[:migrating].keys
- elsif n.info[:importing].size > 0
+ end
+ if n.info[:importing].size > 0
cluster_error \
"[WARNING] Node #{n} has slots in importing state (#{n.info[:importing].keys.join(",")})."
open_slots += n.info[:importing].keys
@@ -379,6 +426,7 @@ class RedisTrib
def nodes_with_keys_in_slot(slot)
nodes = []
@nodes.each{|n|
+ next if n.has_flag?("slave")
nodes << n if n.r.cluster("getkeysinslot",slot,1).length > 0
}
nodes
@@ -397,7 +445,7 @@ class RedisTrib
not_covered.each{|slot|
nodes = nodes_with_keys_in_slot(slot)
slots[slot] = nodes
- xputs "Slot #{slot} has keys in #{nodes.length} nodes: #{nodes.join}"
+ xputs "Slot #{slot} has keys in #{nodes.length} nodes: #{nodes.join(", ")}"
}
none = slots.select {|k,v| v.length == 0}
@@ -433,26 +481,50 @@ class RedisTrib
xputs multi.keys.join(",")
yes_or_die "Fix these slots by moving keys into a single node?"
multi.each{|slot,nodes|
- xputs ">>> Covering slot #{slot} moving keys to #{nodes[0]}"
- # TODO
- # 1) Set all nodes as "MIGRATING" for this slot, so that we
- # can access keys in the hash slot using ASKING.
- # 2) Move everything to node[0]
- # 3) Clear MIGRATING from nodes, and ADDSLOTS the slot to
- # node[0].
- raise "TODO: Work in progress"
+ target = get_node_with_most_keys_in_slot(nodes,slot)
+ xputs ">>> Covering slot #{slot} moving keys to #{target}"
+
+ target.r.cluster('addslots',slot)
+ target.r.cluster('setslot',slot,'stable')
+ nodes.each{|src|
+ next if src == target
+ # Set the source node in 'importing' state (even if we will
+ # actually migrate keys away) in order to avoid receiving
+ # redirections for MIGRATE.
+ src.r.cluster('setslot',slot,'importing',target.info[:name])
+ move_slot(src,target,slot,:dots=>true,:fix=>true,:cold=>true)
+ src.r.cluster('setslot',slot,'stable')
+ }
}
end
end
# Return the owner of the specified slot
- def get_slot_owner(slot)
+ def get_slot_owners(slot)
+ owners = []
@nodes.each{|n|
+ next if n.has_flag?("slave")
n.slots.each{|s,_|
- return n if s == slot
+ owners << n if s == slot
}
}
- nil
+ owners
+ end
+
+ # Return the node, among 'nodes' with the greatest number of keys
+ # in the specified slot.
+ def get_node_with_most_keys_in_slot(nodes,slot)
+ best = nil
+ best_numkeys = 0
+ @nodes.each{|n|
+ next if n.has_flag?("slave")
+ numkeys = n.r.cluster("countkeysinslot",slot)
+ if numkeys > best_numkeys || best == nil
+ best = n
+ best_numkeys = numkeys
+ end
+ }
+ return best
end
# Slot 'slot' was found to be in importing or migrating state in one or
@@ -463,16 +535,8 @@ class RedisTrib
# Try to obtain the current slot owner, according to the current
# nodes configuration.
- owner = get_slot_owner(slot)
-
- # If there is no slot owner, set as owner the slot with the biggest
- # number of keys, among the set of migrating / importing nodes.
- if !owner
- xputs "*** Fix me, some work to do here."
- # Select owner...
- # Use ADDSLOTS to assign the slot.
- exit 1
- end
+ owners = get_slot_owners(slot)
+ owner = owners[0] if owners.length == 1
migrating = []
importing = []
@@ -490,20 +554,77 @@ class RedisTrib
puts "Set as migrating in: #{migrating.join(",")}"
puts "Set as importing in: #{importing.join(",")}"
+ # If there is no slot owner, set as owner the slot with the biggest
+ # number of keys, among the set of migrating / importing nodes.
+ if !owner
+ xputs ">>> Nobody claims ownership, selecting an owner..."
+ owner = get_node_with_most_keys_in_slot(@nodes,slot)
+
+ # If we still don't have an owner, we can't fix it.
+ if !owner
+ xputs "[ERR] Can't select a slot owner. Impossible to fix."
+ exit 1
+ end
+
+ # Use ADDSLOTS to assign the slot.
+ puts "*** Configuring #{owner} as the slot owner"
+ owner.r.cluster("setslot",slot,"stable")
+ owner.r.cluster("addslots",slot)
+ # Make sure this information will propagate. Not strictly needed
+ # since there is no past owner, so all the other nodes will accept
+ # whatever epoch this node will claim the slot with.
+ owner.r.cluster("bumpepoch")
+
+ # Remove the owner from the list of migrating/importing
+ # nodes.
+ migrating.delete(owner)
+ importing.delete(owner)
+ end
+
+ # If there are multiple owners of the slot, we need to fix it
+ # so that a single node is the owner and all the other nodes
+ # are in importing state. Later the fix can be handled by one
+ # of the base cases above.
+ #
+ # Note that this case also covers multiple nodes having the slot
+ # in migrating state, since migrating is a valid state only for
+ # slot owners.
+ if owners.length > 1
+ owner = get_node_with_most_keys_in_slot(owners,slot)
+ owners.each{|n|
+ next if n == owner
+ n.r.cluster('delslots',slot)
+ n.r.cluster('setslot',slot,'importing',owner.info[:name])
+ importing.delete(n) # Avoid duplciates
+ importing << n
+ }
+ owner.r.cluster('bumpepoch')
+ end
+
# Case 1: The slot is in migrating state in one slot, and in
# importing state in 1 slot. That's trivial to address.
if migrating.length == 1 && importing.length == 1
- move_slot(migrating[0],importing[0],slot,:verbose=>true,:fix=>true)
+ move_slot(migrating[0],importing[0],slot,:dots=>true,:fix=>true)
+ # Case 2: There are multiple nodes that claim the slot as importing,
+ # they probably got keys about the slot after a restart so opened
+ # the slot. In this case we just move all the keys to the owner
+ # according to the configuration.
elsif migrating.length == 0 && importing.length > 0
xputs ">>> Moving all the #{slot} slot keys to its owner #{owner}"
importing.each {|node|
next if node == owner
- move_slot(node,owner,slot,:verbose=>true,:fix=>true,:cold=>true)
+ move_slot(node,owner,slot,:dots=>true,:fix=>true,:cold=>true)
xputs ">>> Setting #{slot} as STABLE in #{node}"
node.r.cluster("setslot",slot,"stable")
}
+ # Case 3: There are no slots claiming to be in importing state, but
+ # there is a migrating node that actually don't have any key. We
+ # can just close the slot, probably a reshard interrupted in the middle.
+ elsif importing.length == 0 && migrating.length == 1 &&
+ migrating[0].r.cluster("getkeysinslot",slot,10).length == 0
+ migrating[0].r.cluster("setslot",slot,"stable")
else
- xputs "[ERR] Sorry, Redis-trib can't fix this slot yet (work in progress)"
+ xputs "[ERR] Sorry, Redis-trib can't fix this slot yet (work in progress). Slot is set as migrating in #{migrating.join(",")}, as importing in #{importing.join(",")}, owner is #{owner}"
end
end
@@ -538,7 +659,6 @@ class RedisTrib
nodes_count = @nodes.length
masters_count = @nodes.length / (@replicas+1)
masters = []
- slaves = []
# The first step is to split instances by IP. This is useful as
# we'll try to allocate master nodes in different physical machines
@@ -556,16 +676,31 @@ class RedisTrib
# Select master instances
puts "Using #{masters_count} masters:"
- while masters.length < masters_count
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- masters << nodes_list.shift
- puts masters[-1]
- nodes_count -= 1
- break if masters.length == masters_count
- }
+ interleaved = []
+ stop = false
+ while not stop do
+ # Take one node from each IP until we run out of nodes
+ # across every IP.
+ ips.each do |ip,nodes|
+ if nodes.empty?
+ # if this IP has no remaining nodes, check for termination
+ if interleaved.length == nodes_count
+ # stop when 'interleaved' has accumulated all nodes
+ stop = true
+ next
+ end
+ else
+ # else, move one node from this IP to 'interleaved'
+ interleaved.push nodes.shift
+ end
+ end
end
+ masters = interleaved.slice!(0, masters_count)
+ nodes_count -= masters.length
+
+ masters.each{|m| puts m}
+
# Alloc slots on masters
slots_per_node = ClusterHashSlots.to_f / masters_count
first = 0
@@ -592,8 +727,8 @@ class RedisTrib
# all nodes will be used.
assignment_verbose = false
- [:requested,:unused].each{|assign|
- masters.each{|m|
+ [:requested,:unused].each do |assign|
+ masters.each do |m|
assigned_replicas = 0
while assigned_replicas < @replicas
break if nodes_count == 0
@@ -607,21 +742,33 @@ class RedisTrib
"role too (#{nodes_count} remaining)."
end
end
- ips.each{|ip,nodes_list|
- next if nodes_list.length == 0
- # Skip instances with the same IP as the master if we
- # have some more IPs available.
- next if ip == m.info[:host] && nodes_count > nodes_list.length
- slave = nodes_list.shift
- slave.set_as_replica(m.info[:name])
- nodes_count -= 1
- assigned_replicas += 1
- puts "Adding replica #{slave} to #{m}"
- break
- }
+
+ # Return the first node not matching our current master
+ node = interleaved.find{|n| n.info[:host] != m.info[:host]}
+
+ # If we found a node, use it as a best-first match.
+ # Otherwise, we didn't find a node on a different IP, so we
+ # go ahead and use a same-IP replica.
+ if node
+ slave = node
+ interleaved.delete node
+ else
+ slave = interleaved.shift
+ end
+ slave.set_as_replica(m.info[:name])
+ nodes_count -= 1
+ assigned_replicas += 1
+ puts "Adding replica #{slave} to #{m}"
+
+ # If we are in the "assign extra nodes" loop,
+ # we want to assign one extra replica to each
+ # master before repeating masters.
+ # This break lets us assign extra replicas to masters
+ # in a round-robin way.
+ break if assign == :unused
end
- }
- }
+ end
+ end
end
def flush_nodes_config
@@ -686,8 +833,13 @@ class RedisTrib
f[:flags].index("fail")
fnode = ClusterNode.new(f[:addr])
fnode.connect()
- fnode.load_info()
- add_node(fnode)
+ next if !fnode.r
+ begin
+ fnode.load_info()
+ add_node(fnode)
+ rescue => e
+ xputs "[ERR] Unable to load info for node #{fnode}"
+ end
}
populate_nodes_replicas_info
end
@@ -756,65 +908,241 @@ class RedisTrib
# Move slots between source and target nodes using MIGRATE.
#
- # Options:
+ # Options:
# :verbose -- Print a dot for every moved key.
# :fix -- We are moving in the context of a fix. Use REPLACE.
- # :cold -- Move keys without opening / reconfiguring the nodes.
+ # :cold -- Move keys without opening slots / reconfiguring the nodes.
+ # :update -- Update nodes.info[:slots] for source/target nodes.
+ # :quiet -- Don't print info messages.
def move_slot(source,target,slot,o={})
+ o = {:pipeline => MigrateDefaultPipeline}.merge(o)
+
# We start marking the slot as importing in the destination node,
# and the slot as migrating in the target host. Note that the order of
# the operations is important, as otherwise a client may be redirected
# to the target node that does not yet know it is importing this slot.
- print "Moving slot #{slot} from #{source} to #{target}: "; STDOUT.flush
+ if !o[:quiet]
+ print "Moving slot #{slot} from #{source} to #{target}: "
+ STDOUT.flush
+ end
+
if !o[:cold]
target.r.cluster("setslot",slot,"importing",source.info[:name])
source.r.cluster("setslot",slot,"migrating",target.info[:name])
end
# Migrate all the keys from source to target using the MIGRATE command
while true
- keys = source.r.cluster("getkeysinslot",slot,10)
+ keys = source.r.cluster("getkeysinslot",slot,o[:pipeline])
break if keys.length == 0
- keys.each{|key|
- begin
- source.r.client.call(["migrate",target.info[:host],target.info[:port],key,0,15000])
- rescue => e
- if o[:fix] && e.to_s =~ /BUSYKEY/
- xputs "*** Target key #{key} exists. Replace it for FIX."
- source.r.client.call(["migrate",target.info[:host],target.info[:port],key,0,15000,:replace])
- else
- puts ""
- xputs "[ERR] #{e}"
- exit 1
- end
+ begin
+ source.r.client.call(["migrate",target.info[:host],target.info[:port],"",0,@timeout,:keys,*keys])
+ rescue => e
+ if o[:fix] && e.to_s =~ /BUSYKEY/
+ xputs "*** Target key exists. Replacing it for FIX."
+ source.r.client.call(["migrate",target.info[:host],target.info[:port],"",0,@timeout,:replace,:keys,*keys])
+ else
+ puts ""
+ xputs "[ERR] Calling MIGRATE: #{e}"
+ exit 1
end
- print "." if o[:verbose]
- STDOUT.flush
- }
+ end
+ print "."*keys.length if o[:dots]
+ STDOUT.flush
end
- puts
+ puts if !o[:quiet]
# Set the new node as the owner of the slot in all the known nodes.
if !o[:cold]
@nodes.each{|n|
+ next if n.has_flag?("slave")
n.r.cluster("setslot",slot,"node",target.info[:name])
}
end
+
+ # Update the node logical config
+ if o[:update] then
+ source.info[:slots].delete(slot)
+ target.info[:slots][slot] = true
+ end
end
- # redis-trib subcommands implementations
+ # redis-trib subcommands implementations.
def check_cluster_cmd(argv,opt)
load_cluster_info_from_node(argv[0])
check_cluster
end
+ def info_cluster_cmd(argv,opt)
+ load_cluster_info_from_node(argv[0])
+ show_cluster_info
+ end
+
+ def rebalance_cluster_cmd(argv,opt)
+ opt = {
+ 'pipeline' => MigrateDefaultPipeline,
+ 'threshold' => RebalanceDefaultThreshold
+ }.merge(opt)
+
+ # Load nodes info before parsing options, otherwise we can't
+ # handle --weight.
+ load_cluster_info_from_node(argv[0])
+
+ # Options parsing
+ threshold = opt['threshold'].to_i
+ autoweights = opt['auto-weights']
+ weights = {}
+ opt['weight'].each{|w|
+ fields = w.split("=")
+ node = get_node_by_abbreviated_name(fields[0])
+ if !node || !node.has_flag?("master")
+ puts "*** No such master node #{fields[0]}"
+ exit 1
+ end
+ weights[node.info[:name]] = fields[1].to_f
+ } if opt['weight']
+ useempty = opt['use-empty-masters']
+
+ # Assign a weight to each node, and compute the total cluster weight.
+ total_weight = 0
+ nodes_involved = 0
+ @nodes.each{|n|
+ if n.has_flag?("master")
+ next if !useempty && n.slots.length == 0
+ n.info[:w] = weights[n.info[:name]] ? weights[n.info[:name]] : 1
+ total_weight += n.info[:w]
+ nodes_involved += 1
+ end
+ }
+
+ # Check cluster, only proceed if it looks sane.
+ check_cluster(:quiet => true)
+ if @errors.length != 0
+ puts "*** Please fix your cluster problems before rebalancing"
+ exit 1
+ end
+
+ # Calculate the slots balance for each node. It's the number of
+ # slots the node should lose (if positive) or gain (if negative)
+ # in order to be balanced.
+ threshold = opt['threshold'].to_f
+ threshold_reached = false
+ @nodes.each{|n|
+ if n.has_flag?("master")
+ next if !n.info[:w]
+ expected = ((ClusterHashSlots.to_f / total_weight) *
+ n.info[:w]).to_i
+ n.info[:balance] = n.slots.length - expected
+ # Compute the percentage of difference between the
+ # expected number of slots and the real one, to see
+ # if it's over the threshold specified by the user.
+ over_threshold = false
+ if threshold > 0
+ if n.slots.length > 0
+ err_perc = (100-(100.0*expected/n.slots.length)).abs
+ over_threshold = true if err_perc > threshold
+ elsif expected > 0
+ over_threshold = true
+ end
+ end
+ threshold_reached = true if over_threshold
+ end
+ }
+ if !threshold_reached
+ xputs "*** No rebalancing needed! All nodes are within the #{threshold}% threshold."
+ return
+ end
+
+ # Only consider nodes we want to change
+ sn = @nodes.select{|n|
+ n.has_flag?("master") && n.info[:w]
+ }
+
+ # Because of rounding, it is possible that the balance of all nodes
+ # summed does not give 0. Make sure that nodes that have to provide
+ # slots are always matched by nodes receiving slots.
+ total_balance = sn.map{|x| x.info[:balance]}.reduce{|a,b| a+b}
+ while total_balance > 0
+ sn.each{|n|
+ if n.info[:balance] < 0 && total_balance > 0
+ n.info[:balance] -= 1
+ total_balance -= 1
+ end
+ }
+ end
+
+ # Sort nodes by their slots balance.
+ sn = sn.sort{|a,b|
+ a.info[:balance] <=> b.info[:balance]
+ }
+
+ xputs ">>> Rebalancing across #{nodes_involved} nodes. Total weight = #{total_weight}"
+
+ if $verbose
+ sn.each{|n|
+ puts "#{n} balance is #{n.info[:balance]} slots"
+ }
+ end
+
+ # Now we have at the start of the 'sn' array nodes that should get
+ # slots, at the end nodes that must give slots.
+ # We take two indexes, one at the start, and one at the end,
+ # incrementing or decrementing the indexes accordingly til we
+ # find nodes that need to get/provide slots.
+ dst_idx = 0
+ src_idx = sn.length - 1
+
+ while dst_idx < src_idx
+ dst = sn[dst_idx]
+ src = sn[src_idx]
+ numslots = [dst.info[:balance],src.info[:balance]].map{|n|
+ n.abs
+ }.min
+
+ if numslots > 0
+ puts "Moving #{numslots} slots from #{src} to #{dst}"
+
+ # Actaully move the slots.
+ reshard_table = compute_reshard_table([src],numslots)
+ if reshard_table.length != numslots
+ xputs "*** Assertio failed: Reshard table != number of slots"
+ exit 1
+ end
+ if opt['simulate']
+ print "#"*reshard_table.length
+ else
+ reshard_table.each{|e|
+ move_slot(e[:source],dst,e[:slot],
+ :quiet=>true,
+ :dots=>false,
+ :update=>true,
+ :pipeline=>opt['pipeline'])
+ print "#"
+ STDOUT.flush
+ }
+ end
+ puts
+ end
+
+ # Update nodes balance.
+ dst.info[:balance] += numslots
+ src.info[:balance] -= numslots
+ dst_idx += 1 if dst.info[:balance] == 0
+ src_idx -= 1 if src.info[:balance] == 0
+ end
+ end
+
def fix_cluster_cmd(argv,opt)
@fix = true
+ @timeout = opt['timeout'].to_i if opt['timeout']
+
load_cluster_info_from_node(argv[0])
check_cluster
end
def reshard_cluster_cmd(argv,opt)
+ opt = {'pipeline' => MigrateDefaultPipeline}.merge(opt)
+
load_cluster_info_from_node(argv[0])
check_cluster
if @errors.length != 0
@@ -822,6 +1150,8 @@ class RedisTrib
exit 1
end
+ @timeout = opt['timeout'].to_i if opt['timeout'].to_i
+
# Get number of slots
if opt['slots']
numslots = opt['slots'].to_i
@@ -925,7 +1255,9 @@ class RedisTrib
exit(1) if (yesno != "yes")
end
reshard_table.each{|e|
- move_slot(e[:source],target,e[:slot],:verbose=>true)
+ move_slot(e[:source],target,e[:slot],
+ :dots=>true,
+ :pipeline=>opt['pipeline'])
}
end
@@ -973,6 +1305,11 @@ class RedisTrib
sleep 1
wait_cluster_join
flush_nodes_config # Useful for the replicas
+ # Reset the node information, so that when the
+ # final summary is listed in check_cluster about the newly created cluster
+ # all the nodes would get properly listed as slaves or masters
+ reset_nodes
+ load_cluster_info_from_node(argv[0])
check_cluster
end
@@ -1106,6 +1443,8 @@ class RedisTrib
def import_cluster_cmd(argv,opt)
source_addr = opt['from']
xputs ">>> Importing data from #{source_addr} to cluster #{argv[1]}"
+ use_copy = opt['copy']
+ use_replace = opt['replace']
# Check the existing cluster.
load_cluster_info_from_node(argv[0])
@@ -1132,7 +1471,7 @@ class RedisTrib
# right node as needed.
cursor = nil
while cursor != 0
- cursor,keys = source.scan(cursor,:count,1000)
+ cursor,keys = source.scan(cursor, :count => 1000)
cursor = cursor.to_i
keys.each{|k|
# Migrate keys using the MIGRATE command.
@@ -1141,7 +1480,10 @@ class RedisTrib
print "Migrating #{k} to #{target}: "
STDOUT.flush
begin
- source.client.call(["migrate",target.info[:host],target.info[:port],k,0,15000])
+ cmd = ["migrate",target.info[:host],target.info[:port],k,0,@timeout]
+ cmd << :copy if use_copy
+ cmd << :replace if use_replace
+ source.client.call(cmd)
rescue => e
puts e
else
@@ -1166,17 +1508,32 @@ class RedisTrib
if ARGV[idx][0..1] == "--"
option = ARGV[idx][2..-1]
idx += 1
+
+ # --verbose is a global option
+ if option == "verbose"
+ $verbose = true
+ next
+ end
+
if ALLOWED_OPTIONS[cmd] == nil || ALLOWED_OPTIONS[cmd][option] == nil
puts "Unknown option '#{option}' for command '#{cmd}'"
exit 1
end
- if ALLOWED_OPTIONS[cmd][option]
+ if ALLOWED_OPTIONS[cmd][option] != false
value = ARGV[idx]
idx += 1
else
value = true
end
- options[option] = value
+
+ # If the option is set to [], it's a multiple arguments
+ # option. We just queue every new value into an array.
+ if ALLOWED_OPTIONS[cmd][option] == []
+ options[option] = [] if !options[option]
+ options[option] << value
+ else
+ options[option] = value
+ end
else
# Remaining arguments are not options.
break
@@ -1199,7 +1556,7 @@ end
#################################################################################
# Libraries
-#
+#
# We try to don't depend on external libs since this is a critical part
# of Redis Cluster.
#################################################################################
@@ -1288,8 +1645,10 @@ end
COMMANDS={
"create" => ["create_cluster_cmd", -2, "host1:port1 ... hostN:portN"],
"check" => ["check_cluster_cmd", 2, "host:port"],
+ "info" => ["info_cluster_cmd", 2, "host:port"],
"fix" => ["fix_cluster_cmd", 2, "host:port"],
"reshard" => ["reshard_cluster_cmd", 2, "host:port"],
+ "rebalance" => ["rebalance_cluster_cmd", -2, "host:port"],
"add-node" => ["addnode_cluster_cmd", 3, "new_host:new_port existing_host:existing_port"],
"del-node" => ["delnode_cluster_cmd", 3, "host:port node_id"],
"set-timeout" => ["set_timeout_cluster_cmd", 3, "host:port milliseconds"],
@@ -1301,14 +1660,15 @@ COMMANDS={
ALLOWED_OPTIONS={
"create" => {"replicas" => true},
"add-node" => {"slave" => false, "master-id" => true},
- "import" => {"from" => :required},
- "reshard" => {"from" => true, "to" => true, "slots" => true, "yes" => false}
+ "import" => {"from" => :required, "copy" => false, "replace" => false},
+ "reshard" => {"from" => true, "to" => true, "slots" => true, "yes" => false, "timeout" => true, "pipeline" => true},
+ "rebalance" => {"weight" => [], "auto-weights" => false, "use-empty-masters" => false, "timeout" => true, "simulate" => false, "pipeline" => true, "threshold" => true},
+ "fix" => {"timeout" => MigrateDefaultTimeout},
}
def show_help
puts "Usage: redis-trib <command> <options> <arguments ...>\n\n"
COMMANDS.each{|k,v|
- o = ""
puts " #{k.ljust(15)} #{v[2]}"
if ALLOWED_OPTIONS[k]
ALLOWED_OPTIONS[k].each{|optname,has_arg|
diff --git a/src/redis.h b/src/redis.h
deleted file mode 100644
index d2b00bff5..000000000
--- a/src/redis.h
+++ /dev/null
@@ -1,1530 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __REDIS_H
-#define __REDIS_H
-
-#include "fmacros.h"
-#include "config.h"
-
-#if defined(__sun)
-#include "solarisfixes.h"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <limits.h>
-#include <unistd.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <syslog.h>
-#include <netinet/in.h>
-#include <lua.h>
-#include <signal.h>
-
-typedef long long mstime_t; /* millisecond time type. */
-
-#include "ae.h" /* Event driven programming library */
-#include "sds.h" /* Dynamic safe strings */
-#include "dict.h" /* Hash tables */
-#include "adlist.h" /* Linked lists */
-#include "zmalloc.h" /* total memory usage aware version of malloc/free */
-#include "anet.h" /* Networking the easy way */
-#include "ziplist.h" /* Compact list data structure */
-#include "intset.h" /* Compact integer set structure */
-#include "version.h" /* Version macro */
-#include "util.h" /* Misc functions useful in many places */
-#include "latency.h" /* Latency monitor API */
-#include "sparkline.h" /* ASII graphs API */
-
-/* Error codes */
-#define REDIS_OK 0
-#define REDIS_ERR -1
-
-/* Static server configuration */
-#define REDIS_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
-#define REDIS_MIN_HZ 1
-#define REDIS_MAX_HZ 500
-#define REDIS_SERVERPORT 6379 /* TCP port */
-#define REDIS_TCP_BACKLOG 511 /* TCP listen backlog */
-#define REDIS_MAXIDLETIME 0 /* default client timeout: infinite */
-#define REDIS_DEFAULT_DBNUM 16
-#define REDIS_CONFIGLINE_MAX 1024
-#define REDIS_DBCRON_DBS_PER_CALL 16
-#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
-#define REDIS_SHARED_SELECT_CMDS 10
-#define REDIS_SHARED_INTEGERS 10000
-#define REDIS_SHARED_BULKHDR_LEN 32
-#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
-#define REDIS_AOF_REWRITE_PERC 100
-#define REDIS_AOF_REWRITE_MIN_SIZE (64*1024*1024)
-#define REDIS_AOF_REWRITE_ITEMS_PER_CMD 64
-#define REDIS_SLOWLOG_LOG_SLOWER_THAN 10000
-#define REDIS_SLOWLOG_MAX_LEN 128
-#define REDIS_MAX_CLIENTS 10000
-#define REDIS_AUTHPASS_MAX_LEN 512
-#define REDIS_DEFAULT_SLAVE_PRIORITY 100
-#define REDIS_REPL_TIMEOUT 60
-#define REDIS_REPL_PING_SLAVE_PERIOD 10
-#define REDIS_RUN_ID_SIZE 40
-#define REDIS_OPS_SEC_SAMPLES 16
-#define REDIS_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
-#define REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
-#define REDIS_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
-#define REDIS_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
-#define REDIS_DEFAULT_PID_FILE "/var/run/redis.pid"
-#define REDIS_DEFAULT_SYSLOG_IDENT "redis"
-#define REDIS_DEFAULT_CLUSTER_CONFIG_FILE "nodes.conf"
-#define REDIS_DEFAULT_DAEMONIZE 0
-#define REDIS_DEFAULT_UNIX_SOCKET_PERM 0
-#define REDIS_DEFAULT_TCP_KEEPALIVE 0
-#define REDIS_DEFAULT_LOGFILE ""
-#define REDIS_DEFAULT_SYSLOG_ENABLED 0
-#define REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR 1
-#define REDIS_DEFAULT_RDB_COMPRESSION 1
-#define REDIS_DEFAULT_RDB_CHECKSUM 1
-#define REDIS_DEFAULT_RDB_FILENAME "dump.rdb"
-#define REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA 1
-#define REDIS_DEFAULT_SLAVE_READ_ONLY 1
-#define REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY 0
-#define REDIS_DEFAULT_MAXMEMORY 0
-#define REDIS_DEFAULT_MAXMEMORY_SAMPLES 5
-#define REDIS_DEFAULT_AOF_FILENAME "appendonly.aof"
-#define REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0
-#define REDIS_DEFAULT_ACTIVE_REHASHING 1
-#define REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
-#define REDIS_DEFAULT_MIN_SLAVES_TO_WRITE 0
-#define REDIS_DEFAULT_MIN_SLAVES_MAX_LAG 10
-#define REDIS_IP_STR_LEN INET6_ADDRSTRLEN
-#define REDIS_PEER_ID_LEN (REDIS_IP_STR_LEN+32) /* Must be enough for ip:port */
-#define REDIS_BINDADDR_MAX 16
-#define REDIS_MIN_RESERVED_FDS 32
-#define REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD 0
-
-#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
-#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
-#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
-#define ACTIVE_EXPIRE_CYCLE_SLOW 0
-#define ACTIVE_EXPIRE_CYCLE_FAST 1
-
-/* Protocol and I/O related defines */
-#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
-#define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
-#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
-#define REDIS_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
-#define REDIS_MBULK_BIG_ARG (1024*32)
-#define REDIS_LONGSTR_SIZE 21 /* Bytes needed for long -> str */
-#define REDIS_AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
-/* When configuring the Redis eventloop, we setup it so that the total number
- * of file descriptors we can handle are server.maxclients + RESERVED_FDS + FDSET_INCR
- * that is our safety margin. */
-#define REDIS_EVENTLOOP_FDSET_INCR (REDIS_MIN_RESERVED_FDS+96)
-
-/* Hash table parameters */
-#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
-
-/* Command flags. Please check the command table defined in the redis.c file
- * for more information about the meaning of every flag. */
-#define REDIS_CMD_WRITE 1 /* "w" flag */
-#define REDIS_CMD_READONLY 2 /* "r" flag */
-#define REDIS_CMD_DENYOOM 4 /* "m" flag */
-#define REDIS_CMD_NOT_USED_1 8 /* no longer used flag */
-#define REDIS_CMD_ADMIN 16 /* "a" flag */
-#define REDIS_CMD_PUBSUB 32 /* "p" flag */
-#define REDIS_CMD_NOSCRIPT 64 /* "s" flag */
-#define REDIS_CMD_RANDOM 128 /* "R" flag */
-#define REDIS_CMD_SORT_FOR_SCRIPT 256 /* "S" flag */
-#define REDIS_CMD_LOADING 512 /* "l" flag */
-#define REDIS_CMD_STALE 1024 /* "t" flag */
-#define REDIS_CMD_SKIP_MONITOR 2048 /* "M" flag */
-#define REDIS_CMD_ASKING 4096 /* "k" flag */
-#define REDIS_CMD_FAST 8192 /* "F" flag */
-
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
-#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
-#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
-#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
-#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
-#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
-#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 10, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
-/* AOF states */
-#define REDIS_AOF_OFF 0 /* AOF is off */
-#define REDIS_AOF_ON 1 /* AOF is on */
-#define REDIS_AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
-
-/* Client flags */
-#define REDIS_SLAVE (1<<0) /* This client is a slave server */
-#define REDIS_MASTER (1<<1) /* This client is a master server */
-#define REDIS_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
-#define REDIS_MULTI (1<<3) /* This client is in a MULTI context */
-#define REDIS_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
-#define REDIS_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
-#define REDIS_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
-#define REDIS_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
- server.unblocked_clients */
-#define REDIS_LUA_CLIENT (1<<8) /* This is a non connected client used by Lua */
-#define REDIS_ASKING (1<<9) /* Client issued the ASKING command */
-#define REDIS_CLOSE_ASAP (1<<10)/* Close this client ASAP */
-#define REDIS_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
-#define REDIS_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
-#define REDIS_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
-#define REDIS_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
-#define REDIS_FORCE_REPL (1<<15) /* Force replication of current cmd. */
-#define REDIS_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
-#define REDIS_READONLY (1<<17) /* Cluster client is in read-only state. */
-#define REDIS_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
-
-/* Client block type (btype field in client structure)
- * if REDIS_BLOCKED flag is set. */
-#define REDIS_BLOCKED_NONE 0 /* Not blocked, no REDIS_BLOCKED flag set. */
-#define REDIS_BLOCKED_LIST 1 /* BLPOP & co. */
-#define REDIS_BLOCKED_WAIT 2 /* WAIT for synchronous replication. */
-
-/* Client request types */
-#define REDIS_REQ_INLINE 1
-#define REDIS_REQ_MULTIBULK 2
-
-/* Client classes for client limits, currently used only for
- * the max-client-output-buffer limit implementation. */
-#define REDIS_CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
-#define REDIS_CLIENT_TYPE_SLAVE 1 /* Slaves. */
-#define REDIS_CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
-#define REDIS_CLIENT_TYPE_COUNT 3
-
-/* Slave replication state - from the point of view of the slave. */
-#define REDIS_REPL_NONE 0 /* No active replication */
-#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_CONNECTING 2 /* Connecting to master */
-#define REDIS_REPL_RECEIVE_PONG 3 /* Wait for PING reply */
-#define REDIS_REPL_TRANSFER 4 /* Receiving .rdb from master */
-#define REDIS_REPL_CONNECTED 5 /* Connected to master */
-
-/* Slave replication state - from the point of view of the master.
- * In SEND_BULK and ONLINE state the slave receives new updates
- * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
- * to start the next background saving in order to send updates to it. */
-#define REDIS_REPL_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
-#define REDIS_REPL_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
-#define REDIS_REPL_SEND_BULK 8 /* Sending RDB file to slave. */
-#define REDIS_REPL_ONLINE 9 /* RDB file transmitted, sending just updates. */
-
-/* Synchronous read timeout - slave side */
-#define REDIS_REPL_SYNCIO_TIMEOUT 5
-
-/* List related stuff */
-#define REDIS_HEAD 0
-#define REDIS_TAIL 1
-
-/* Sort operations */
-#define REDIS_SORT_GET 0
-#define REDIS_SORT_ASC 1
-#define REDIS_SORT_DESC 2
-#define REDIS_SORTKEY_MAX 1024
-
-/* Log levels */
-#define REDIS_DEBUG 0
-#define REDIS_VERBOSE 1
-#define REDIS_NOTICE 2
-#define REDIS_WARNING 3
-#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */
-#define REDIS_DEFAULT_VERBOSITY REDIS_NOTICE
-
-/* Anti-warning macro... */
-#define REDIS_NOTUSED(V) ((void) V)
-
-#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
-#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
-
-/* Append only defines */
-#define AOF_FSYNC_NO 0
-#define AOF_FSYNC_ALWAYS 1
-#define AOF_FSYNC_EVERYSEC 2
-#define REDIS_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
-
-/* Zip structure related defaults */
-#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512
-#define REDIS_HASH_MAX_ZIPLIST_VALUE 64
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 512
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 64
-#define REDIS_SET_MAX_INTSET_ENTRIES 512
-#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
-#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
-
-/* HyperLogLog defines */
-#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
-
-/* Sets operations codes */
-#define REDIS_OP_UNION 0
-#define REDIS_OP_DIFF 1
-#define REDIS_OP_INTER 2
-
-/* Redis maxmemory strategies */
-#define REDIS_MAXMEMORY_VOLATILE_LRU 0
-#define REDIS_MAXMEMORY_VOLATILE_TTL 1
-#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2
-#define REDIS_MAXMEMORY_ALLKEYS_LRU 3
-#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4
-#define REDIS_MAXMEMORY_NO_EVICTION 5
-#define REDIS_DEFAULT_MAXMEMORY_POLICY REDIS_MAXMEMORY_NO_EVICTION
-
-/* Scripting */
-#define REDIS_LUA_TIME_LIMIT 5000 /* milliseconds */
-
-/* Units */
-#define UNIT_SECONDS 0
-#define UNIT_MILLISECONDS 1
-
-/* SHUTDOWN flags */
-#define REDIS_SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
- points are configured. */
-#define REDIS_SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
-
-/* Command call flags, see call() function */
-#define REDIS_CALL_NONE 0
-#define REDIS_CALL_SLOWLOG 1
-#define REDIS_CALL_STATS 2
-#define REDIS_CALL_PROPAGATE 4
-#define REDIS_CALL_FULL (REDIS_CALL_SLOWLOG | REDIS_CALL_STATS | REDIS_CALL_PROPAGATE)
-
-/* Command propagation flags, see propagate() function */
-#define REDIS_PROPAGATE_NONE 0
-#define REDIS_PROPAGATE_AOF 1
-#define REDIS_PROPAGATE_REPL 2
-
-/* Keyspace changes notification classes. Every class is associated with a
- * character for configuration purposes. */
-#define REDIS_NOTIFY_KEYSPACE (1<<0) /* K */
-#define REDIS_NOTIFY_KEYEVENT (1<<1) /* E */
-#define REDIS_NOTIFY_GENERIC (1<<2) /* g */
-#define REDIS_NOTIFY_STRING (1<<3) /* $ */
-#define REDIS_NOTIFY_LIST (1<<4) /* l */
-#define REDIS_NOTIFY_SET (1<<5) /* s */
-#define REDIS_NOTIFY_HASH (1<<6) /* h */
-#define REDIS_NOTIFY_ZSET (1<<7) /* z */
-#define REDIS_NOTIFY_EXPIRED (1<<8) /* x */
-#define REDIS_NOTIFY_EVICTED (1<<9) /* e */
-#define REDIS_NOTIFY_ALL (REDIS_NOTIFY_GENERIC | REDIS_NOTIFY_STRING | REDIS_NOTIFY_LIST | REDIS_NOTIFY_SET | REDIS_NOTIFY_HASH | REDIS_NOTIFY_ZSET | REDIS_NOTIFY_EXPIRED | REDIS_NOTIFY_EVICTED) /* A */
-
-/* Get the first bind addr or NULL */
-#define REDIS_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL)
-
-/* Using the following macro you can run code inside serverCron() with the
- * specified period, specified in milliseconds.
- * The actual resolution depends on server.hz. */
-#define run_with_period(_ms_) if ((_ms_ <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
-
-/* We can print the stacktrace, so our assert is defined this way: */
-#define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
-#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
-#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
-
-/*-----------------------------------------------------------------------------
- * Data types
- *----------------------------------------------------------------------------*/
-
-/* A redis object, that is a type able to hold a string / list / set */
-
-/* The actual Redis Object */
-#define REDIS_LRU_BITS 24
-#define REDIS_LRU_CLOCK_MAX ((1<<REDIS_LRU_BITS)-1) /* Max value of obj->lru */
-#define REDIS_LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
-typedef struct redisObject {
- unsigned type:4;
- unsigned encoding:4;
- unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock) */
- int refcount;
- void *ptr;
-} robj;
-
-/* Macro used to obtain the current LRU clock.
- * If the current resolution is lower than the frequency we refresh the
- * LRU clock (as it should be in production servers) we return the
- * precomputed value, otherwise we need to resort to a function call. */
-#define LRU_CLOCK() ((1000/server.hz <= REDIS_LRU_CLOCK_RESOLUTION) ? server.lruclock : getLRUClock())
-
-/* Macro used to initialize a Redis object allocated on the stack.
- * Note that this macro is taken near the structure definition to make sure
- * we'll update it when the structure is changed, to avoid bugs like
- * bug #85 introduced exactly in this way. */
-#define initStaticStringObject(_var,_ptr) do { \
- _var.refcount = 1; \
- _var.type = REDIS_STRING; \
- _var.encoding = REDIS_ENCODING_RAW; \
- _var.ptr = _ptr; \
-} while(0);
-
-/* To improve the quality of the LRU approximation we take a set of keys
- * that are good candidate for eviction across freeMemoryIfNeeded() calls.
- *
- * Entries inside the eviciton pool are taken ordered by idle time, putting
- * greater idle times to the right (ascending order).
- *
- * Empty entries have the key pointer set to NULL. */
-#define REDIS_EVICTION_POOL_SIZE 16
-struct evictionPoolEntry {
- unsigned long long idle; /* Object idle time. */
- sds key; /* Key name. */
-};
-
-/* Redis database representation. There are multiple databases identified
- * by integers from 0 (the default database) up to the max configured
- * database. The database number is the 'id' field in the structure. */
-typedef struct redisDb {
- dict *dict; /* The keyspace for this DB */
- dict *expires; /* Timeout of keys with a timeout set */
- dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
- dict *ready_keys; /* Blocked keys that received a PUSH */
- dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
- struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
- int id; /* Database ID */
- long long avg_ttl; /* Average TTL, just for stats */
-} redisDb;
-
-/* Client MULTI/EXEC state */
-typedef struct multiCmd {
- robj **argv;
- int argc;
- struct redisCommand *cmd;
-} multiCmd;
-
-typedef struct multiState {
- multiCmd *commands; /* Array of MULTI commands */
- int count; /* Total number of MULTI commands */
- int minreplicas; /* MINREPLICAS for synchronous replication */
- time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
-} multiState;
-
-/* This structure holds the blocking operation state for a client.
- * The fields used depend on client->btype. */
-typedef struct blockingState {
- /* Generic fields. */
- mstime_t timeout; /* Blocking operation timeout. If UNIX current time
- * is > timeout then the operation timed out. */
-
- /* REDIS_BLOCK_LIST */
- dict *keys; /* The keys we are waiting to terminate a blocking
- * operation such as BLPOP. Otherwise NULL. */
- robj *target; /* The key that should receive the element,
- * for BRPOPLPUSH. */
-
- /* REDIS_BLOCK_WAIT */
- int numreplicas; /* Number of replicas we are waiting for ACK. */
- long long reploffset; /* Replication offset to reach. */
-} blockingState;
-
-/* The following structure represents a node in the server.ready_keys list,
- * where we accumulate all the keys that had clients blocked with a blocking
- * operation such as B[LR]POP, but received new data in the context of the
- * last executed command.
- *
- * After the execution of every command or script, we run this list to check
- * if as a result we should serve data to clients blocked, unblocking them.
- * Note that server.ready_keys will not have duplicates as there dictionary
- * also called ready_keys in every structure representing a Redis database,
- * where we make sure to remember if a given key was already added in the
- * server.ready_keys list. */
-typedef struct readyList {
- redisDb *db;
- robj *key;
-} readyList;
-
-/* With multiplexing we need to take per-client state.
- * Clients are taken in a liked list. */
-typedef struct redisClient {
- uint64_t id; /* Client incremental unique ID. */
- int fd;
- redisDb *db;
- int dictid;
- robj *name; /* As set by CLIENT SETNAME */
- sds querybuf;
- size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size */
- int argc;
- robj **argv;
- struct redisCommand *cmd, *lastcmd;
- int reqtype;
- int multibulklen; /* number of multi bulk arguments left to read */
- long bulklen; /* length of bulk argument in multi bulk request */
- list *reply;
- unsigned long reply_bytes; /* Tot bytes of objects in reply list */
- int sentlen; /* Amount of bytes already sent in the current
- buffer or object being sent. */
- time_t ctime; /* Client creation time */
- time_t lastinteraction; /* time of the last interaction, used for timeout */
- time_t obuf_soft_limit_reached_time;
- int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
- int authenticated; /* when requirepass is non-NULL */
- int replstate; /* replication state if this is a slave */
- int repldbfd; /* replication DB file descriptor */
- off_t repldboff; /* replication DB file offset */
- off_t repldbsize; /* replication DB file size */
- sds replpreamble; /* replication DB preamble. */
- long long reploff; /* replication offset if this is our master */
- long long repl_ack_off; /* replication ack offset, if this is a slave */
- long long repl_ack_time;/* replication ack time, if this is a slave */
- char replrunid[REDIS_RUN_ID_SIZE+1]; /* master run id if this is a master */
- int slave_listening_port; /* As configured with: SLAVECONF listening-port */
- multiState mstate; /* MULTI/EXEC state */
- int btype; /* Type of blocking op if REDIS_BLOCKED. */
- blockingState bpop; /* blocking state */
- long long woff; /* Last write global replication offset. */
- list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
- dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
- list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
- sds peerid; /* Cached peer ID. */
-
- /* Response buffer */
- int bufpos;
- char buf[REDIS_REPLY_CHUNK_BYTES];
-} redisClient;
-
-struct saveparam {
- time_t seconds;
- int changes;
-};
-
-struct sharedObjectsStruct {
- robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
- *colon, *nullbulk, *nullmultibulk, *queued,
- *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
- *masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
- *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
- *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop,
- *lpush, *emptyscan, *minstring, *maxstring,
- *select[REDIS_SHARED_SELECT_CMDS],
- *integers[REDIS_SHARED_INTEGERS],
- *mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
- *bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
-};
-
-/* ZSETs use a specialized version of Skiplists */
-typedef struct zskiplistNode {
- robj *obj;
- double score;
- struct zskiplistNode *backward;
- struct zskiplistLevel {
- struct zskiplistNode *forward;
- unsigned int span;
- } level[];
-} zskiplistNode;
-
-typedef struct zskiplist {
- struct zskiplistNode *header, *tail;
- unsigned long length;
- int level;
-} zskiplist;
-
-typedef struct zset {
- dict *dict;
- zskiplist *zsl;
-} zset;
-
-typedef struct clientBufferLimitsConfig {
- unsigned long long hard_limit_bytes;
- unsigned long long soft_limit_bytes;
- time_t soft_limit_seconds;
-} clientBufferLimitsConfig;
-
-extern clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT];
-
-/* The redisOp structure defines a Redis Operation, that is an instance of
- * a command with an argument vector, database ID, propagation target
- * (REDIS_PROPAGATE_*), and command pointer.
- *
- * Currently only used to additionally propagate more commands to AOF/Replication
- * after the propagation of the executed command. */
-typedef struct redisOp {
- robj **argv;
- int argc, dbid, target;
- struct redisCommand *cmd;
-} redisOp;
-
-/* Defines an array of Redis operations. There is an API to add to this
- * structure in a easy way.
- *
- * redisOpArrayInit();
- * redisOpArrayAppend();
- * redisOpArrayFree();
- */
-typedef struct redisOpArray {
- redisOp *ops;
- int numops;
-} redisOpArray;
-
-/*-----------------------------------------------------------------------------
- * Global server state
- *----------------------------------------------------------------------------*/
-
-struct clusterState;
-
-struct redisServer {
- /* General */
- pid_t pid; /* Main process pid. */
- char *configfile; /* Absolute config file path, or NULL */
- int hz; /* serverCron() calls frequency in hertz */
- redisDb *db;
- dict *commands; /* Command table */
- dict *orig_commands; /* Command table before command renaming. */
- aeEventLoop *el;
- unsigned lruclock:REDIS_LRU_BITS; /* Clock for LRU eviction */
- int shutdown_asap; /* SHUTDOWN needed ASAP */
- int activerehashing; /* Incremental rehash in serverCron() */
- char *requirepass; /* Pass for AUTH command, or NULL */
- char *pidfile; /* PID file path */
- int arch_bits; /* 32 or 64 depending on sizeof(long) */
- int cronloops; /* Number of times the cron function run */
- char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */
- int sentinel_mode; /* True if this instance is a Sentinel. */
- /* Networking */
- int port; /* TCP listening port */
- int tcp_backlog; /* TCP listen() backlog */
- char *bindaddr[REDIS_BINDADDR_MAX]; /* Addresses we should bind to */
- int bindaddr_count; /* Number of addresses in server.bindaddr[] */
- char *unixsocket; /* UNIX socket path */
- mode_t unixsocketperm; /* UNIX socket permission */
- int ipfd[REDIS_BINDADDR_MAX]; /* TCP socket file descriptors */
- int ipfd_count; /* Used slots in ipfd[] */
- int sofd; /* Unix socket file descriptor */
- int cfd[REDIS_BINDADDR_MAX];/* Cluster bus listening socket */
- int cfd_count; /* Used slots in cfd[] */
- list *clients; /* List of active clients */
- list *clients_to_close; /* Clients to close asynchronously */
- list *slaves, *monitors; /* List of slaves and MONITORs */
- redisClient *current_client; /* Current client, only used on crash report */
- int clients_paused; /* True if clients are currently paused */
- mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
- char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
- dict *migrate_cached_sockets;/* MIGRATE cached sockets */
- uint64_t next_client_id; /* Next client unique ID. Incremental. */
- /* RDB / AOF loading information */
- int loading; /* We are loading data from disk if true */
- off_t loading_total_bytes;
- off_t loading_loaded_bytes;
- time_t loading_start_time;
- off_t loading_process_events_interval_bytes;
- /* Fast pointers to often looked up command */
- struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
- *rpopCommand;
- /* Fields used only for stats */
- time_t stat_starttime; /* Server start time */
- long long stat_numcommands; /* Number of processed commands */
- long long stat_numconnections; /* Number of connections received */
- long long stat_expiredkeys; /* Number of expired keys */
- long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
- long long stat_keyspace_hits; /* Number of successful lookups of keys */
- long long stat_keyspace_misses; /* Number of failed lookups of keys */
- size_t stat_peak_memory; /* Max used memory record */
- long long stat_fork_time; /* Time needed to perform latest fork() */
- double stat_fork_rate; /* Fork rate in GB/sec. */
- long long stat_rejected_conn; /* Clients rejected because of maxclients */
- long long stat_sync_full; /* Number of full resyncs with slaves. */
- long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
- long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
- list *slowlog; /* SLOWLOG list of commands */
- long long slowlog_entry_id; /* SLOWLOG current entry ID */
- long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
- unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
- size_t resident_set_size; /* RSS sampled in serverCron(). */
- /* The following two are used to track instantaneous "load" in terms
- * of operations per second. */
- long long ops_sec_last_sample_time; /* Timestamp of last sample (in ms) */
- long long ops_sec_last_sample_ops; /* numcommands in last sample */
- long long ops_sec_samples[REDIS_OPS_SEC_SAMPLES];
- int ops_sec_idx;
- /* Configuration */
- int verbosity; /* Loglevel in redis.conf */
- int maxidletime; /* Client timeout in seconds */
- int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
- int active_expire_enabled; /* Can be disabled for testing purposes. */
- size_t client_max_querybuf_len; /* Limit for client query buffer length */
- int dbnum; /* Total number of configured DBs */
- int daemonize; /* True if running as a daemon */
- clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_TYPE_COUNT];
- /* AOF persistence */
- int aof_state; /* REDIS_AOF_(ON|OFF|WAIT_REWRITE) */
- int aof_fsync; /* Kind of fsync() policy */
- char *aof_filename; /* Name of the AOF file */
- int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
- int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
- off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
- off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
- off_t aof_current_size; /* AOF current size. */
- int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
- pid_t aof_child_pid; /* PID if rewriting process */
- list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
- sds aof_buf; /* AOF buffer, written before entering the event loop */
- int aof_fd; /* File descriptor of currently selected AOF file */
- int aof_selected_db; /* Currently selected DB in AOF */
- time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
- time_t aof_last_fsync; /* UNIX time of last fsync() */
- time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
- time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
- int aof_lastbgrewrite_status; /* REDIS_OK or REDIS_ERR */
- unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
- int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
- int aof_last_write_status; /* REDIS_OK or REDIS_ERR */
- int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
- /* AOF pipes used to communicate between parent and child during rewrite. */
- int aof_pipe_write_data_to_child;
- int aof_pipe_read_data_from_parent;
- int aof_pipe_write_ack_to_parent;
- int aof_pipe_read_ack_from_child;
- int aof_pipe_write_ack_to_child;
- int aof_pipe_read_ack_from_parent;
- int aof_stop_sending_diff; /* If true stop sending accumulated diffs
- to child process. */
- sds aof_child_diff; /* AOF diff accumulator child side. */
- /* RDB persistence */
- long long dirty; /* Changes to DB from the last save */
- long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
- pid_t rdb_child_pid; /* PID of RDB saving child */
- struct saveparam *saveparams; /* Save points array for RDB */
- int saveparamslen; /* Number of saving points */
- char *rdb_filename; /* Name of RDB file */
- int rdb_compression; /* Use compression in RDB? */
- int rdb_checksum; /* Use RDB checksum? */
- time_t lastsave; /* Unix time of last successful save */
- time_t lastbgsave_try; /* Unix time of last attempted bgsave */
- time_t rdb_save_time_last; /* Time used by last RDB save run. */
- time_t rdb_save_time_start; /* Current RDB save start time. */
- int lastbgsave_status; /* REDIS_OK or REDIS_ERR */
- int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
- /* Propagation of commands in AOF / replication */
- redisOpArray also_propagate; /* Additional command to propagate. */
- /* Logging */
- char *logfile; /* Path of log file */
- int syslog_enabled; /* Is syslog enabled? */
- char *syslog_ident; /* Syslog ident */
- int syslog_facility; /* Syslog facility */
- /* Replication (master) */
- int slaveseldb; /* Last SELECTed DB in replication output */
- long long master_repl_offset; /* Global replication offset */
- int repl_ping_slave_period; /* Master pings the slave every N seconds */
- char *repl_backlog; /* Replication backlog for partial syncs */
- long long repl_backlog_size; /* Backlog circular buffer size */
- long long repl_backlog_histlen; /* Backlog actual data length */
- long long repl_backlog_idx; /* Backlog circular buffer current offset */
- long long repl_backlog_off; /* Replication offset of first byte in the
- backlog buffer. */
- time_t repl_backlog_time_limit; /* Time without slaves after the backlog
- gets released. */
- time_t repl_no_slaves_since; /* We have no slaves since that time.
- Only valid if server.slaves len is 0. */
- int repl_min_slaves_to_write; /* Min number of slaves to write. */
- int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
- int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
- /* Replication (slave) */
- char *masterauth; /* AUTH with this password with master */
- char *masterhost; /* Hostname of master */
- int masterport; /* Port of master */
- int repl_timeout; /* Timeout after N seconds of master idle */
- redisClient *master; /* Client that is master for this slave */
- redisClient *cached_master; /* Cached master to be reused for PSYNC. */
- int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
- int repl_state; /* Replication status if the instance is a slave */
- off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
- off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
- off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
- int repl_transfer_s; /* Slave -> Master SYNC socket */
- int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
- char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
- time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
- int repl_serve_stale_data; /* Serve stale data when link is down? */
- int repl_slave_ro; /* Slave is read only? */
- time_t repl_down_since; /* Unix time at which link with master went down */
- int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
- int slave_priority; /* Reported in INFO and used by Sentinel. */
- char repl_master_runid[REDIS_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */
- long long repl_master_initial_offset; /* Master PSYNC offset. */
- /* Replication script cache. */
- dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
- list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
- int repl_scriptcache_size; /* Max number of elements. */
- /* Synchronous replication. */
- list *clients_waiting_acks; /* Clients waiting in WAIT command. */
- int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
- /* Limits */
- int maxclients; /* Max number of simultaneous clients */
- unsigned long long maxmemory; /* Max number of memory bytes to use */
- int maxmemory_policy; /* Policy for key eviction */
- int maxmemory_samples; /* Pricision of random sampling */
- /* Blocked clients */
- unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */
- list *unblocked_clients; /* list of clients to unblock before next loop */
- list *ready_keys; /* List of readyList structures for BLPOP & co */
- /* Sort parameters - qsort_r() is only available under BSD so we
- * have to take this state global, in order to pass it to sortCompare() */
- int sort_desc;
- int sort_alpha;
- int sort_bypattern;
- int sort_store;
- /* Zip structure config, see redis.conf for more information */
- size_t hash_max_ziplist_entries;
- size_t hash_max_ziplist_value;
- size_t list_max_ziplist_entries;
- size_t list_max_ziplist_value;
- size_t set_max_intset_entries;
- size_t zset_max_ziplist_entries;
- size_t zset_max_ziplist_value;
- size_t hll_sparse_max_bytes;
- time_t unixtime; /* Unix time sampled every cron cycle. */
- long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
- /* Pubsub */
- dict *pubsub_channels; /* Map channels to list of subscribed clients */
- list *pubsub_patterns; /* A list of pubsub_patterns */
- int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
- xor of REDIS_NOTIFY... flags. */
- /* Cluster */
- int cluster_enabled; /* Is cluster enabled? */
- mstime_t cluster_node_timeout; /* Cluster node timeout. */
- char *cluster_configfile; /* Cluster auto-generated config file name. */
- struct clusterState *cluster; /* State of the cluster */
- int cluster_migration_barrier; /* Cluster replicas migration barrier. */
- int cluster_slave_validity_factor; /* Slave max data age for failover. */
- /* Scripting */
- lua_State *lua; /* The Lua interpreter. We use just one for all clients */
- redisClient *lua_client; /* The "fake client" to query Redis from Lua */
- redisClient *lua_caller; /* The client running EVAL right now, or NULL */
- dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
- mstime_t lua_time_limit; /* Script timeout in milliseconds */
- mstime_t lua_time_start; /* Start time of script, milliseconds time */
- int lua_write_dirty; /* True if a write command was called during the
- execution of the current script. */
- int lua_random_dirty; /* True if a random command was called during the
- execution of the current script. */
- int lua_timedout; /* True if we reached the time limit for script
- execution. */
- int lua_kill; /* Kill the script if true. */
- /* Latency monitor */
- long long latency_monitor_threshold;
- dict *latency_events;
- /* Assert & bug reporting */
- char *assert_failed;
- char *assert_file;
- int assert_line;
- int bug_report_start; /* True if bug report header was already logged. */
- int watchdog_period; /* Software watchdog period in ms. 0 = off */
-};
-
-typedef struct pubsubPattern {
- redisClient *client;
- robj *pattern;
-} pubsubPattern;
-
-typedef void redisCommandProc(redisClient *c);
-typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-struct redisCommand {
- char *name;
- redisCommandProc *proc;
- int arity;
- char *sflags; /* Flags as string representation, one char per flag. */
- int flags; /* The actual flags, obtained from the 'sflags' field. */
- /* Use a function to determine keys arguments in a command line.
- * Used for Redis Cluster redirect. */
- redisGetKeysProc *getkeys_proc;
- /* What keys should be loaded in background when calling this command? */
- int firstkey; /* The first argument that's a key (0 = no keys) */
- int lastkey; /* The last argument that's a key */
- int keystep; /* The step between first and last key */
- long long microseconds, calls;
-};
-
-struct redisFunctionSym {
- char *name;
- unsigned long pointer;
-};
-
-typedef struct _redisSortObject {
- robj *obj;
- union {
- double score;
- robj *cmpobj;
- } u;
-} redisSortObject;
-
-typedef struct _redisSortOperation {
- int type;
- robj *pattern;
-} redisSortOperation;
-
-/* Structure to hold list iteration abstraction. */
-typedef struct {
- robj *subject;
- unsigned char encoding;
- unsigned char direction; /* Iteration direction */
- unsigned char *zi;
- listNode *ln;
-} listTypeIterator;
-
-/* Structure for an entry while iterating over a list. */
-typedef struct {
- listTypeIterator *li;
- unsigned char *zi; /* Entry in ziplist */
- listNode *ln; /* Entry in linked list */
-} listTypeEntry;
-
-/* Structure to hold set iteration abstraction. */
-typedef struct {
- robj *subject;
- int encoding;
- int ii; /* intset iterator */
- dictIterator *di;
-} setTypeIterator;
-
-/* Structure to hold hash iteration abstraction. Note that iteration over
- * hashes involves both fields and values. Because it is possible that
- * not both are required, store pointers in the iterator to avoid
- * unnecessary memory allocation for fields/values. */
-typedef struct {
- robj *subject;
- int encoding;
-
- unsigned char *fptr, *vptr;
-
- dictIterator *di;
- dictEntry *de;
-} hashTypeIterator;
-
-#define REDIS_HASH_KEY 1
-#define REDIS_HASH_VALUE 2
-
-/*-----------------------------------------------------------------------------
- * Extern declarations
- *----------------------------------------------------------------------------*/
-
-extern struct redisServer server;
-extern struct sharedObjectsStruct shared;
-extern dictType setDictType;
-extern dictType zsetDictType;
-extern dictType clusterNodesDictType;
-extern dictType clusterNodesBlackListDictType;
-extern dictType dbDictType;
-extern dictType shaScriptObjectDictType;
-extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
-extern dictType hashDictType;
-extern dictType replScriptCacheDictType;
-
-/*-----------------------------------------------------------------------------
- * Functions prototypes
- *----------------------------------------------------------------------------*/
-
-/* Utils */
-long long ustime(void);
-long long mstime(void);
-void getRandomHexChars(char *p, unsigned int len);
-uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
-void exitFromChild(int retcode);
-size_t redisPopcount(void *s, long count);
-void redisSetProcTitle(char *title);
-
-/* networking.c -- Networking and Client related operations */
-redisClient *createClient(int fd);
-void closeTimedoutClients(void);
-void freeClient(redisClient *c);
-void freeClientAsync(redisClient *c);
-void resetClient(redisClient *c);
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void addReply(redisClient *c, robj *obj);
-void *addDeferredMultiBulkLength(redisClient *c);
-void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
-void addReplySds(redisClient *c, sds s);
-void processInputBuffer(redisClient *c);
-void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void addReplyBulk(redisClient *c, robj *obj);
-void addReplyBulkCString(redisClient *c, char *s);
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
-void addReplyBulkLongLong(redisClient *c, long long ll);
-void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void addReply(redisClient *c, robj *obj);
-void addReplySds(redisClient *c, sds s);
-void addReplyError(redisClient *c, char *err);
-void addReplyStatus(redisClient *c, char *status);
-void addReplyDouble(redisClient *c, double d);
-void addReplyLongLong(redisClient *c, long long ll);
-void addReplyMultiBulkLen(redisClient *c, long length);
-void copyClientOutputBuffer(redisClient *dst, redisClient *src);
-void *dupClientReplyValue(void *o);
-void getClientsMaxBuffers(unsigned long *longest_output_list,
- unsigned long *biggest_input_buffer);
-void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port);
-char *getClientPeerId(redisClient *client);
-sds catClientInfoString(sds s, redisClient *client);
-sds getAllClientsInfoString(void);
-void rewriteClientCommandVector(redisClient *c, int argc, ...);
-void rewriteClientCommandArgument(redisClient *c, int i, robj *newval);
-unsigned long getClientOutputBufferMemoryUsage(redisClient *c);
-void freeClientsInAsyncFreeQueue(void);
-void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
-int getClientType(redisClient *c);
-int getClientTypeByName(char *name);
-char *getClientTypeName(int class);
-void flushSlavesOutputBuffers(void);
-void disconnectSlaves(void);
-int listenToPort(int port, int *fds, int *count);
-void pauseClients(mstime_t duration);
-int clientsArePaused(void);
-int processEventsWhileBlocked(void);
-
-#ifdef __GNUC__
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-#else
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...);
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...);
-#endif
-
-/* List data type */
-void listTypeTryConversion(robj *subject, robj *value);
-void listTypePush(robj *subject, robj *value, int where);
-robj *listTypePop(robj *subject, int where);
-unsigned long listTypeLength(robj *subject);
-listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
-void listTypeReleaseIterator(listTypeIterator *li);
-int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
-robj *listTypeGet(listTypeEntry *entry);
-void listTypeInsert(listTypeEntry *entry, robj *value, int where);
-int listTypeEqual(listTypeEntry *entry, robj *o);
-void listTypeDelete(listTypeEntry *entry);
-void listTypeConvert(robj *subject, int enc);
-void unblockClientWaitingData(redisClient *c);
-void handleClientsBlockedOnLists(void);
-void popGenericCommand(redisClient *c, int where);
-void signalListAsReady(redisDb *db, robj *key);
-
-/* MULTI/EXEC/WATCH... */
-void unwatchAllKeys(redisClient *c);
-void initClientMultiState(redisClient *c);
-void freeClientMultiState(redisClient *c);
-void queueMultiCommand(redisClient *c);
-void touchWatchedKey(redisDb *db, robj *key);
-void touchWatchedKeysOnFlush(int dbid);
-void discardTransaction(redisClient *c);
-void flagTransaction(redisClient *c);
-
-/* Redis object implementation */
-void decrRefCount(robj *o);
-void decrRefCountVoid(void *o);
-void incrRefCount(robj *o);
-robj *resetRefCount(robj *obj);
-void freeStringObject(robj *o);
-void freeListObject(robj *o);
-void freeSetObject(robj *o);
-void freeZsetObject(robj *o);
-void freeHashObject(robj *o);
-robj *createObject(int type, void *ptr);
-robj *createStringObject(char *ptr, size_t len);
-robj *createRawStringObject(char *ptr, size_t len);
-robj *createEmbeddedStringObject(char *ptr, size_t len);
-robj *dupStringObject(robj *o);
-int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
-robj *tryObjectEncoding(robj *o);
-robj *getDecodedObject(robj *o);
-size_t stringObjectLen(robj *o);
-robj *createStringObjectFromLongLong(long long value);
-robj *createStringObjectFromLongDouble(long double value);
-robj *createListObject(void);
-robj *createZiplistObject(void);
-robj *createSetObject(void);
-robj *createIntsetObject(void);
-robj *createHashObject(void);
-robj *createZsetObject(void);
-robj *createZsetZiplistObject(void);
-int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg);
-int checkType(redisClient *c, robj *o, int type);
-int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg);
-int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg);
-int getLongLongFromObject(robj *o, long long *target);
-int getLongDoubleFromObject(robj *o, long double *target);
-int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg);
-char *strEncoding(int encoding);
-int compareStringObjects(robj *a, robj *b);
-int collateStringObjects(robj *a, robj *b);
-int equalStringObjects(robj *a, robj *b);
-unsigned long long estimateObjectIdleTime(robj *o);
-#define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR)
-
-/* Synchronous I/O with timeout */
-ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
-ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
-ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
-
-/* Replication */
-void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
-void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc);
-void updateSlavesWaitingBgsave(int bgsaveerr);
-void replicationCron(void);
-void replicationHandleMasterDisconnection(void);
-void replicationCacheMaster(redisClient *c);
-void resizeReplicationBacklog(long long newsize);
-void replicationSetMaster(char *ip, int port);
-void replicationUnsetMaster(void);
-void refreshGoodSlavesCount(void);
-void replicationScriptCacheInit(void);
-void replicationScriptCacheFlush(void);
-void replicationScriptCacheAdd(sds sha1);
-int replicationScriptCacheExists(sds sha1);
-void processClientsWaitingReplicas(void);
-void unblockClientWaitingReplicas(redisClient *c);
-int replicationCountAcksByOffset(long long offset);
-void replicationSendNewlineToMaster(void);
-long long replicationGetSlaveOffset(void);
-
-/* Generic persistence functions */
-void startLoading(FILE *fp);
-void loadingProgress(off_t pos);
-void stopLoading(void);
-
-/* RDB persistence */
-#include "rdb.h"
-
-/* AOF persistence */
-void flushAppendOnlyFile(int force);
-void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
-void aofRemoveTempFile(pid_t childpid);
-int rewriteAppendOnlyFileBackground(void);
-int loadAppendOnlyFile(char *filename);
-void stopAppendOnly(void);
-int startAppendOnly(void);
-void backgroundRewriteDoneHandler(int exitcode, int bysignal);
-void aofRewriteBufferReset(void);
-unsigned long aofRewriteBufferSize(void);
-
-/* Sorted sets data type */
-
-/* Struct to hold a inclusive/exclusive range spec by score comparison. */
-typedef struct {
- double min, max;
- int minex, maxex; /* are min or max exclusive? */
-} zrangespec;
-
-/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
-typedef struct {
- robj *min, *max; /* May be set to shared.(minstring|maxstring) */
- int minex, maxex; /* are min or max exclusive? */
-} zlexrangespec;
-
-zskiplist *zslCreate(void);
-void zslFree(zskiplist *zsl);
-zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);
-unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score);
-int zslDelete(zskiplist *zsl, double score, robj *obj);
-zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
-zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
-double zzlGetScore(unsigned char *sptr);
-void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
-void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
-unsigned int zsetLength(robj *zobj);
-void zsetConvert(robj *zobj, int encoding);
-unsigned long zslGetRank(zskiplist *zsl, double score, robj *o);
-
-/* Core functions */
-int freeMemoryIfNeeded(void);
-int processCommand(redisClient *c);
-void setupSignalHandlers(void);
-struct redisCommand *lookupCommand(sds name);
-struct redisCommand *lookupCommandByCString(char *s);
-struct redisCommand *lookupCommandOrOriginal(sds name);
-void call(redisClient *c, int flags);
-void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
-void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
-void forceCommandPropagation(redisClient *c, int flags);
-int prepareForShutdown();
-#ifdef __GNUC__
-void redisLog(int level, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-#else
-void redisLog(int level, const char *fmt, ...);
-#endif
-void redisLogRaw(int level, const char *msg);
-void redisLogFromHandler(int level, const char *msg);
-void usage();
-void updateDictResizePolicy(void);
-int htNeedsResize(dict *dict);
-void oom(const char *msg);
-void populateCommandTable(void);
-void resetCommandTableStats(void);
-void adjustOpenFilesLimit(void);
-void closeListeningSockets(int unlink_unix_socket);
-void updateCachedTime(void);
-void resetServerStats(void);
-unsigned int getLRUClock(void);
-
-/* Set data type */
-robj *setTypeCreate(robj *value);
-int setTypeAdd(robj *subject, robj *value);
-int setTypeRemove(robj *subject, robj *value);
-int setTypeIsMember(robj *subject, robj *value);
-setTypeIterator *setTypeInitIterator(robj *subject);
-void setTypeReleaseIterator(setTypeIterator *si);
-int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
-robj *setTypeNextObject(setTypeIterator *si);
-int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
-unsigned long setTypeSize(robj *subject);
-void setTypeConvert(robj *subject, int enc);
-
-/* Hash data type */
-void hashTypeConvert(robj *o, int enc);
-void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
-void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
-robj *hashTypeGetObject(robj *o, robj *key);
-int hashTypeExists(robj *o, robj *key);
-int hashTypeSet(robj *o, robj *key, robj *value);
-int hashTypeDelete(robj *o, robj *key);
-unsigned long hashTypeLength(robj *o);
-hashTypeIterator *hashTypeInitIterator(robj *subject);
-void hashTypeReleaseIterator(hashTypeIterator *hi);
-int hashTypeNext(hashTypeIterator *hi);
-void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
- unsigned char **vstr,
- unsigned int *vlen,
- long long *vll);
-void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst);
-robj *hashTypeCurrentObject(hashTypeIterator *hi, int what);
-robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
-
-/* Pub / Sub */
-int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
-int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
-void freePubsubPattern(void *p);
-int listMatchPubsubPattern(void *a, void *b);
-int pubsubPublishMessage(robj *channel, robj *message);
-
-/* Keyspace events notification */
-void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
-int keyspaceEventsStringToFlags(char *classes);
-sds keyspaceEventsFlagsToString(int flags);
-
-/* Configuration */
-void loadServerConfig(char *filename, char *options);
-void appendServerSaveParams(time_t seconds, int changes);
-void resetServerSaveParams();
-struct rewriteConfigState; /* Forward declaration to export API. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force);
-int rewriteConfig(char *path);
-
-/* db.c -- Keyspace access API */
-int removeExpire(redisDb *db, robj *key);
-void propagateExpire(redisDb *db, robj *key);
-int expireIfNeeded(redisDb *db, robj *key);
-long long getExpire(redisDb *db, robj *key);
-void setExpire(redisDb *db, robj *key, long long when);
-robj *lookupKey(redisDb *db, robj *key);
-robj *lookupKeyRead(redisDb *db, robj *key);
-robj *lookupKeyWrite(redisDb *db, robj *key);
-robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply);
-robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply);
-void dbAdd(redisDb *db, robj *key, robj *val);
-void dbOverwrite(redisDb *db, robj *key, robj *val);
-void setKey(redisDb *db, robj *key, robj *val);
-int dbExists(redisDb *db, robj *key);
-robj *dbRandomKey(redisDb *db);
-int dbDelete(redisDb *db, robj *key);
-robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
-long long emptyDb(void(callback)(void*));
-int selectDb(redisClient *c, int id);
-void signalModifiedKey(redisDb *db, robj *key);
-void signalFlushedDb(int dbid);
-unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
-unsigned int countKeysInSlot(unsigned int hashslot);
-unsigned int delKeysInSlot(unsigned int hashslot);
-int verifyClusterConfigWithData(void);
-void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor);
-int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor);
-
-/* API to get key arguments from commands */
-int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-void getKeysFreeResult(int *result);
-int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys);
-int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-
-/* Cluster */
-void clusterInit(void);
-unsigned short crc16(const char *buf, int len);
-unsigned int keyHashSlot(char *key, int keylen);
-void clusterCron(void);
-void clusterPropagatePublish(robj *channel, robj *message);
-void migrateCloseTimedoutSockets(void);
-void clusterBeforeSleep(void);
-
-/* Sentinel */
-void initSentinelConfig(void);
-void initSentinel(void);
-void sentinelTimer(void);
-char *sentinelHandleConfiguration(char **argv, int argc);
-void sentinelIsRunning(void);
-
-/* Scripting */
-void scriptingInit(void);
-
-/* Blocked clients */
-void processUnblockedClients(void);
-void blockClient(redisClient *c, int btype);
-void unblockClient(redisClient *c);
-void replyToBlockedClientTimedOut(redisClient *c);
-int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit);
-
-/* Git SHA1 */
-char *redisGitSHA1(void);
-char *redisGitDirty(void);
-uint64_t redisBuildId(void);
-
-/* Commands prototypes */
-void authCommand(redisClient *c);
-void pingCommand(redisClient *c);
-void echoCommand(redisClient *c);
-void commandCommand(redisClient *c);
-void setCommand(redisClient *c);
-void setnxCommand(redisClient *c);
-void setexCommand(redisClient *c);
-void psetexCommand(redisClient *c);
-void getCommand(redisClient *c);
-void delCommand(redisClient *c);
-void existsCommand(redisClient *c);
-void setbitCommand(redisClient *c);
-void getbitCommand(redisClient *c);
-void setrangeCommand(redisClient *c);
-void getrangeCommand(redisClient *c);
-void incrCommand(redisClient *c);
-void decrCommand(redisClient *c);
-void incrbyCommand(redisClient *c);
-void decrbyCommand(redisClient *c);
-void incrbyfloatCommand(redisClient *c);
-void selectCommand(redisClient *c);
-void randomkeyCommand(redisClient *c);
-void keysCommand(redisClient *c);
-void scanCommand(redisClient *c);
-void dbsizeCommand(redisClient *c);
-void lastsaveCommand(redisClient *c);
-void saveCommand(redisClient *c);
-void bgsaveCommand(redisClient *c);
-void bgrewriteaofCommand(redisClient *c);
-void shutdownCommand(redisClient *c);
-void moveCommand(redisClient *c);
-void renameCommand(redisClient *c);
-void renamenxCommand(redisClient *c);
-void lpushCommand(redisClient *c);
-void rpushCommand(redisClient *c);
-void lpushxCommand(redisClient *c);
-void rpushxCommand(redisClient *c);
-void linsertCommand(redisClient *c);
-void lpopCommand(redisClient *c);
-void rpopCommand(redisClient *c);
-void llenCommand(redisClient *c);
-void lindexCommand(redisClient *c);
-void lrangeCommand(redisClient *c);
-void ltrimCommand(redisClient *c);
-void typeCommand(redisClient *c);
-void lsetCommand(redisClient *c);
-void saddCommand(redisClient *c);
-void sremCommand(redisClient *c);
-void smoveCommand(redisClient *c);
-void sismemberCommand(redisClient *c);
-void scardCommand(redisClient *c);
-void spopCommand(redisClient *c);
-void srandmemberCommand(redisClient *c);
-void sinterCommand(redisClient *c);
-void sinterstoreCommand(redisClient *c);
-void sunionCommand(redisClient *c);
-void sunionstoreCommand(redisClient *c);
-void sdiffCommand(redisClient *c);
-void sdiffstoreCommand(redisClient *c);
-void sscanCommand(redisClient *c);
-void syncCommand(redisClient *c);
-void flushdbCommand(redisClient *c);
-void flushallCommand(redisClient *c);
-void sortCommand(redisClient *c);
-void lremCommand(redisClient *c);
-void rpoplpushCommand(redisClient *c);
-void infoCommand(redisClient *c);
-void mgetCommand(redisClient *c);
-void monitorCommand(redisClient *c);
-void expireCommand(redisClient *c);
-void expireatCommand(redisClient *c);
-void pexpireCommand(redisClient *c);
-void pexpireatCommand(redisClient *c);
-void getsetCommand(redisClient *c);
-void ttlCommand(redisClient *c);
-void pttlCommand(redisClient *c);
-void persistCommand(redisClient *c);
-void slaveofCommand(redisClient *c);
-void roleCommand(redisClient *c);
-void debugCommand(redisClient *c);
-void msetCommand(redisClient *c);
-void msetnxCommand(redisClient *c);
-void zaddCommand(redisClient *c);
-void zincrbyCommand(redisClient *c);
-void zrangeCommand(redisClient *c);
-void zrangebyscoreCommand(redisClient *c);
-void zrevrangebyscoreCommand(redisClient *c);
-void zrangebylexCommand(redisClient *c);
-void zrevrangebylexCommand(redisClient *c);
-void zcountCommand(redisClient *c);
-void zlexcountCommand(redisClient *c);
-void zrevrangeCommand(redisClient *c);
-void zcardCommand(redisClient *c);
-void zremCommand(redisClient *c);
-void zscoreCommand(redisClient *c);
-void zremrangebyscoreCommand(redisClient *c);
-void zremrangebylexCommand(redisClient *c);
-void multiCommand(redisClient *c);
-void execCommand(redisClient *c);
-void discardCommand(redisClient *c);
-void blpopCommand(redisClient *c);
-void brpopCommand(redisClient *c);
-void brpoplpushCommand(redisClient *c);
-void appendCommand(redisClient *c);
-void strlenCommand(redisClient *c);
-void zrankCommand(redisClient *c);
-void zrevrankCommand(redisClient *c);
-void hsetCommand(redisClient *c);
-void hsetnxCommand(redisClient *c);
-void hgetCommand(redisClient *c);
-void hmsetCommand(redisClient *c);
-void hmgetCommand(redisClient *c);
-void hdelCommand(redisClient *c);
-void hlenCommand(redisClient *c);
-void zremrangebyrankCommand(redisClient *c);
-void zunionstoreCommand(redisClient *c);
-void zinterstoreCommand(redisClient *c);
-void zscanCommand(redisClient *c);
-void hkeysCommand(redisClient *c);
-void hvalsCommand(redisClient *c);
-void hgetallCommand(redisClient *c);
-void hexistsCommand(redisClient *c);
-void hscanCommand(redisClient *c);
-void configCommand(redisClient *c);
-void hincrbyCommand(redisClient *c);
-void hincrbyfloatCommand(redisClient *c);
-void subscribeCommand(redisClient *c);
-void unsubscribeCommand(redisClient *c);
-void psubscribeCommand(redisClient *c);
-void punsubscribeCommand(redisClient *c);
-void publishCommand(redisClient *c);
-void pubsubCommand(redisClient *c);
-void watchCommand(redisClient *c);
-void unwatchCommand(redisClient *c);
-void clusterCommand(redisClient *c);
-void restoreCommand(redisClient *c);
-void migrateCommand(redisClient *c);
-void askingCommand(redisClient *c);
-void readonlyCommand(redisClient *c);
-void readwriteCommand(redisClient *c);
-void dumpCommand(redisClient *c);
-void objectCommand(redisClient *c);
-void clientCommand(redisClient *c);
-void evalCommand(redisClient *c);
-void evalShaCommand(redisClient *c);
-void scriptCommand(redisClient *c);
-void timeCommand(redisClient *c);
-void bitopCommand(redisClient *c);
-void bitcountCommand(redisClient *c);
-void bitposCommand(redisClient *c);
-void replconfCommand(redisClient *c);
-void waitCommand(redisClient *c);
-void pfselftestCommand(redisClient *c);
-void pfaddCommand(redisClient *c);
-void pfcountCommand(redisClient *c);
-void pfmergeCommand(redisClient *c);
-void pfdebugCommand(redisClient *c);
-void latencyCommand(redisClient *c);
-
-#if defined(__GNUC__)
-void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
-void free(void *ptr) __attribute__ ((deprecated));
-void *malloc(size_t size) __attribute__ ((deprecated));
-void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
-#endif
-
-/* Debugging stuff */
-void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line);
-void _redisAssert(char *estr, char *file, int line);
-void _redisPanic(char *msg, char *file, int line);
-void bugReportStart(void);
-void redisLogObjectDebugInfo(robj *o);
-void sigsegvHandler(int sig, siginfo_t *info, void *secret);
-sds genRedisInfoString(char *section);
-void enableWatchdog(int period);
-void disableWatchdog(void);
-void watchdogScheduleSignal(int period);
-void redisLogHexDump(int level, char *descr, void *value, size_t len);
-
-#define redisDebug(fmt, ...) \
- printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
-#define redisDebugMark() \
- printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
-
-#endif
diff --git a/src/redisassert.h b/src/redisassert.h
index e5825c0f5..c9b78327c 100644
--- a/src/redisassert.h
+++ b/src/redisassert.h
@@ -40,8 +40,10 @@
#include <unistd.h> /* for _exit() */
-#define assert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define assert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define panic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)
-void _redisAssert(char *estr, char *file, int line);
+void _serverAssert(char *estr, char *file, int line);
+void _serverPanic(const char *file, int line, const char *msg, ...);
#endif
diff --git a/src/redismodule.h b/src/redismodule.h
new file mode 100644
index 000000000..7fc0fec40
--- /dev/null
+++ b/src/redismodule.h
@@ -0,0 +1,358 @@
+#ifndef REDISMODULE_H
+#define REDISMODULE_H
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* ---------------- Defines common between core and modules --------------- */
+
+/* Error status return values. */
+#define REDISMODULE_OK 0
+#define REDISMODULE_ERR 1
+
+/* API versions. */
+#define REDISMODULE_APIVER_1 1
+
+/* API flags and constants */
+#define REDISMODULE_READ (1<<0)
+#define REDISMODULE_WRITE (1<<1)
+
+#define REDISMODULE_LIST_HEAD 0
+#define REDISMODULE_LIST_TAIL 1
+
+/* Key types. */
+#define REDISMODULE_KEYTYPE_EMPTY 0
+#define REDISMODULE_KEYTYPE_STRING 1
+#define REDISMODULE_KEYTYPE_LIST 2
+#define REDISMODULE_KEYTYPE_HASH 3
+#define REDISMODULE_KEYTYPE_SET 4
+#define REDISMODULE_KEYTYPE_ZSET 5
+#define REDISMODULE_KEYTYPE_MODULE 6
+
+/* Reply types. */
+#define REDISMODULE_REPLY_UNKNOWN -1
+#define REDISMODULE_REPLY_STRING 0
+#define REDISMODULE_REPLY_ERROR 1
+#define REDISMODULE_REPLY_INTEGER 2
+#define REDISMODULE_REPLY_ARRAY 3
+#define REDISMODULE_REPLY_NULL 4
+
+/* Postponed array length. */
+#define REDISMODULE_POSTPONED_ARRAY_LEN -1
+
+/* Expire */
+#define REDISMODULE_NO_EXPIRE -1
+
+/* Sorted set API flags. */
+#define REDISMODULE_ZADD_XX (1<<0)
+#define REDISMODULE_ZADD_NX (1<<1)
+#define REDISMODULE_ZADD_ADDED (1<<2)
+#define REDISMODULE_ZADD_UPDATED (1<<3)
+#define REDISMODULE_ZADD_NOP (1<<4)
+
+/* Hash API flags. */
+#define REDISMODULE_HASH_NONE 0
+#define REDISMODULE_HASH_NX (1<<0)
+#define REDISMODULE_HASH_XX (1<<1)
+#define REDISMODULE_HASH_CFIELDS (1<<2)
+#define REDISMODULE_HASH_EXISTS (1<<3)
+
+/* A special pointer that we can use between the core and the module to signal
+ * field deletion, and that is impossible to be a valid pointer. */
+#define REDISMODULE_HASH_DELETE ((RedisModuleString*)(long)1)
+
+/* Error messages. */
+#define REDISMODULE_ERRORMSG_WRONGTYPE "WRONGTYPE Operation against a key holding the wrong kind of value"
+
+#define REDISMODULE_POSITIVE_INFINITE (1.0/0.0)
+#define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0)
+
+#define REDISMODULE_NOT_USED(V) ((void) V)
+
+/* ------------------------- End of common defines ------------------------ */
+
+#ifndef REDISMODULE_CORE
+
+typedef long long mstime_t;
+
+/* Incomplete structures for compiler checks but opaque access. */
+typedef struct RedisModuleCtx RedisModuleCtx;
+typedef struct RedisModuleKey RedisModuleKey;
+typedef struct RedisModuleString RedisModuleString;
+typedef struct RedisModuleCallReply RedisModuleCallReply;
+typedef struct RedisModuleIO RedisModuleIO;
+typedef struct RedisModuleType RedisModuleType;
+typedef struct RedisModuleDigest RedisModuleDigest;
+typedef struct RedisModuleBlockedClient RedisModuleBlockedClient;
+
+typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+
+typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver);
+typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value);
+typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value);
+typedef size_t (*RedisModuleTypeMemUsageFunc)(const void *value);
+typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value);
+typedef void (*RedisModuleTypeFreeFunc)(void *value);
+
+#define REDISMODULE_TYPE_METHOD_VERSION 1
+typedef struct RedisModuleTypeMethods {
+ uint64_t version;
+ RedisModuleTypeLoadFunc rdb_load;
+ RedisModuleTypeSaveFunc rdb_save;
+ RedisModuleTypeRewriteFunc aof_rewrite;
+ RedisModuleTypeMemUsageFunc mem_usage;
+ RedisModuleTypeDigestFunc digest;
+ RedisModuleTypeFreeFunc free;
+} RedisModuleTypeMethods;
+
+#define REDISMODULE_GET_API(name) \
+ RedisModule_GetApi("RedisModule_" #name, ((void **)&RedisModule_ ## name))
+
+#define REDISMODULE_API_FUNC(x) (*x)
+
+
+void *REDISMODULE_API_FUNC(RedisModule_Alloc)(size_t bytes);
+void *REDISMODULE_API_FUNC(RedisModule_Realloc)(void *ptr, size_t bytes);
+void REDISMODULE_API_FUNC(RedisModule_Free)(void *ptr);
+void *REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size);
+char *REDISMODULE_API_FUNC(RedisModule_Strdup)(const char *str);
+int REDISMODULE_API_FUNC(RedisModule_GetApi)(const char *, void *);
+int REDISMODULE_API_FUNC(RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep);
+int REDISMODULE_API_FUNC(RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver);
+int REDISMODULE_API_FUNC(RedisModule_WrongArity)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithLongLong)(RedisModuleCtx *ctx, long long ll);
+int REDISMODULE_API_FUNC(RedisModule_GetSelectedDb)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_SelectDb)(RedisModuleCtx *ctx, int newid);
+void *REDISMODULE_API_FUNC(RedisModule_OpenKey)(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode);
+void REDISMODULE_API_FUNC(RedisModule_CloseKey)(RedisModuleKey *kp);
+int REDISMODULE_API_FUNC(RedisModule_KeyType)(RedisModuleKey *kp);
+size_t REDISMODULE_API_FUNC(RedisModule_ValueLength)(RedisModuleKey *kp);
+int REDISMODULE_API_FUNC(RedisModule_ListPush)(RedisModuleKey *kp, int where, RedisModuleString *ele);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_ListPop)(RedisModuleKey *key, int where);
+RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_Call)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...);
+const char *REDISMODULE_API_FUNC(RedisModule_CallReplyProto)(RedisModuleCallReply *reply, size_t *len);
+void REDISMODULE_API_FUNC(RedisModule_FreeCallReply)(RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_CallReplyType)(RedisModuleCallReply *reply);
+long long REDISMODULE_API_FUNC(RedisModule_CallReplyInteger)(RedisModuleCallReply *reply);
+size_t REDISMODULE_API_FUNC(RedisModule_CallReplyLength)(RedisModuleCallReply *reply);
+RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_CallReplyArrayElement)(RedisModuleCallReply *reply, size_t idx);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str);
+const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, const char *msg);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len);
+void REDISMODULE_API_FUNC(RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithNull)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll);
+int REDISMODULE_API_FUNC(RedisModule_StringToDouble)(const RedisModuleString *str, double *d);
+void REDISMODULE_API_FUNC(RedisModule_AutoMemory)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...);
+int REDISMODULE_API_FUNC(RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx);
+const char *REDISMODULE_API_FUNC(RedisModule_CallReplyStringPtr)(RedisModuleCallReply *reply, size_t *len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromCallReply)(RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_DeleteKey)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_StringSet)(RedisModuleKey *key, RedisModuleString *str);
+char *REDISMODULE_API_FUNC(RedisModule_StringDMA)(RedisModuleKey *key, size_t *len, int mode);
+int REDISMODULE_API_FUNC(RedisModule_StringTruncate)(RedisModuleKey *key, size_t newlen);
+mstime_t REDISMODULE_API_FUNC(RedisModule_GetExpire)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_SetExpire)(RedisModuleKey *key, mstime_t expire);
+int REDISMODULE_API_FUNC(RedisModule_ZsetAdd)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr);
+int REDISMODULE_API_FUNC(RedisModule_ZsetIncrby)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore);
+int REDISMODULE_API_FUNC(RedisModule_ZsetScore)(RedisModuleKey *key, RedisModuleString *ele, double *score);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRem)(RedisModuleKey *key, RedisModuleString *ele, int *deleted);
+void REDISMODULE_API_FUNC(RedisModule_ZsetRangeStop)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetFirstInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex);
+int REDISMODULE_API_FUNC(RedisModule_ZsetLastInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex);
+int REDISMODULE_API_FUNC(RedisModule_ZsetFirstInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max);
+int REDISMODULE_API_FUNC(RedisModule_ZsetLastInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_ZsetRangeCurrentElement)(RedisModuleKey *key, double *score);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangeNext)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangePrev)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangeEndReached)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_HashSet)(RedisModuleKey *key, int flags, ...);
+int REDISMODULE_API_FUNC(RedisModule_HashGet)(RedisModuleKey *key, int flags, ...);
+int REDISMODULE_API_FUNC(RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos);
+unsigned long long REDISMODULE_API_FUNC(RedisModule_GetClientId)(RedisModuleCtx *ctx);
+void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes);
+RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods);
+int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value);
+RedisModuleType *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetType)(RedisModuleKey *key);
+void *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetValue)(RedisModuleKey *key);
+void REDISMODULE_API_FUNC(RedisModule_SaveUnsigned)(RedisModuleIO *io, uint64_t value);
+uint64_t REDISMODULE_API_FUNC(RedisModule_LoadUnsigned)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_SaveSigned)(RedisModuleIO *io, int64_t value);
+int64_t REDISMODULE_API_FUNC(RedisModule_LoadSigned)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_EmitAOF)(RedisModuleIO *io, const char *cmdname, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_SaveString)(RedisModuleIO *io, RedisModuleString *s);
+void REDISMODULE_API_FUNC(RedisModule_SaveStringBuffer)(RedisModuleIO *io, const char *str, size_t len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_LoadString)(RedisModuleIO *io);
+char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr);
+void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value);
+double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value);
+float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...);
+int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str);
+int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b);
+RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io);
+long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void);
+void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele);
+void REDISMODULE_API_FUNC(RedisModule_DigestEndSequence)(RedisModuleDigest *md);
+
+/* Experimental APIs */
+#ifdef REDISMODULE_EXPERIMENTAL_API
+RedisModuleBlockedClient *REDISMODULE_API_FUNC(RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(void*), long long timeout_ms);
+int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata);
+int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx);
+void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_AbortBlock)(RedisModuleBlockedClient *bc);
+RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc);
+void REDISMODULE_API_FUNC(RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx);
+#endif
+
+/* This is included inline inside each Redis module. */
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused));
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) {
+ void *getapifuncptr = ((void**)ctx)[0];
+ RedisModule_GetApi = (int (*)(const char *, void *)) (unsigned long)getapifuncptr;
+ REDISMODULE_GET_API(Alloc);
+ REDISMODULE_GET_API(Calloc);
+ REDISMODULE_GET_API(Free);
+ REDISMODULE_GET_API(Realloc);
+ REDISMODULE_GET_API(Strdup);
+ REDISMODULE_GET_API(CreateCommand);
+ REDISMODULE_GET_API(SetModuleAttribs);
+ REDISMODULE_GET_API(WrongArity);
+ REDISMODULE_GET_API(ReplyWithLongLong);
+ REDISMODULE_GET_API(ReplyWithError);
+ REDISMODULE_GET_API(ReplyWithSimpleString);
+ REDISMODULE_GET_API(ReplyWithArray);
+ REDISMODULE_GET_API(ReplySetArrayLength);
+ REDISMODULE_GET_API(ReplyWithStringBuffer);
+ REDISMODULE_GET_API(ReplyWithString);
+ REDISMODULE_GET_API(ReplyWithNull);
+ REDISMODULE_GET_API(ReplyWithCallReply);
+ REDISMODULE_GET_API(ReplyWithDouble);
+ REDISMODULE_GET_API(ReplySetArrayLength);
+ REDISMODULE_GET_API(GetSelectedDb);
+ REDISMODULE_GET_API(SelectDb);
+ REDISMODULE_GET_API(OpenKey);
+ REDISMODULE_GET_API(CloseKey);
+ REDISMODULE_GET_API(KeyType);
+ REDISMODULE_GET_API(ValueLength);
+ REDISMODULE_GET_API(ListPush);
+ REDISMODULE_GET_API(ListPop);
+ REDISMODULE_GET_API(StringToLongLong);
+ REDISMODULE_GET_API(StringToDouble);
+ REDISMODULE_GET_API(Call);
+ REDISMODULE_GET_API(CallReplyProto);
+ REDISMODULE_GET_API(FreeCallReply);
+ REDISMODULE_GET_API(CallReplyInteger);
+ REDISMODULE_GET_API(CallReplyType);
+ REDISMODULE_GET_API(CallReplyLength);
+ REDISMODULE_GET_API(CallReplyArrayElement);
+ REDISMODULE_GET_API(CallReplyStringPtr);
+ REDISMODULE_GET_API(CreateStringFromCallReply);
+ REDISMODULE_GET_API(CreateString);
+ REDISMODULE_GET_API(CreateStringFromLongLong);
+ REDISMODULE_GET_API(CreateStringFromString);
+ REDISMODULE_GET_API(CreateStringPrintf);
+ REDISMODULE_GET_API(FreeString);
+ REDISMODULE_GET_API(StringPtrLen);
+ REDISMODULE_GET_API(AutoMemory);
+ REDISMODULE_GET_API(Replicate);
+ REDISMODULE_GET_API(ReplicateVerbatim);
+ REDISMODULE_GET_API(DeleteKey);
+ REDISMODULE_GET_API(StringSet);
+ REDISMODULE_GET_API(StringDMA);
+ REDISMODULE_GET_API(StringTruncate);
+ REDISMODULE_GET_API(GetExpire);
+ REDISMODULE_GET_API(SetExpire);
+ REDISMODULE_GET_API(ZsetAdd);
+ REDISMODULE_GET_API(ZsetIncrby);
+ REDISMODULE_GET_API(ZsetScore);
+ REDISMODULE_GET_API(ZsetRem);
+ REDISMODULE_GET_API(ZsetRangeStop);
+ REDISMODULE_GET_API(ZsetFirstInScoreRange);
+ REDISMODULE_GET_API(ZsetLastInScoreRange);
+ REDISMODULE_GET_API(ZsetFirstInLexRange);
+ REDISMODULE_GET_API(ZsetLastInLexRange);
+ REDISMODULE_GET_API(ZsetRangeCurrentElement);
+ REDISMODULE_GET_API(ZsetRangeNext);
+ REDISMODULE_GET_API(ZsetRangePrev);
+ REDISMODULE_GET_API(ZsetRangeEndReached);
+ REDISMODULE_GET_API(HashSet);
+ REDISMODULE_GET_API(HashGet);
+ REDISMODULE_GET_API(IsKeysPositionRequest);
+ REDISMODULE_GET_API(KeyAtPos);
+ REDISMODULE_GET_API(GetClientId);
+ REDISMODULE_GET_API(PoolAlloc);
+ REDISMODULE_GET_API(CreateDataType);
+ REDISMODULE_GET_API(ModuleTypeSetValue);
+ REDISMODULE_GET_API(ModuleTypeGetType);
+ REDISMODULE_GET_API(ModuleTypeGetValue);
+ REDISMODULE_GET_API(SaveUnsigned);
+ REDISMODULE_GET_API(LoadUnsigned);
+ REDISMODULE_GET_API(SaveSigned);
+ REDISMODULE_GET_API(LoadSigned);
+ REDISMODULE_GET_API(SaveString);
+ REDISMODULE_GET_API(SaveStringBuffer);
+ REDISMODULE_GET_API(LoadString);
+ REDISMODULE_GET_API(LoadStringBuffer);
+ REDISMODULE_GET_API(SaveDouble);
+ REDISMODULE_GET_API(LoadDouble);
+ REDISMODULE_GET_API(SaveFloat);
+ REDISMODULE_GET_API(LoadFloat);
+ REDISMODULE_GET_API(EmitAOF);
+ REDISMODULE_GET_API(Log);
+ REDISMODULE_GET_API(LogIOError);
+ REDISMODULE_GET_API(StringAppendBuffer);
+ REDISMODULE_GET_API(RetainString);
+ REDISMODULE_GET_API(StringCompare);
+ REDISMODULE_GET_API(GetContextFromIO);
+ REDISMODULE_GET_API(Milliseconds);
+ REDISMODULE_GET_API(DigestAddStringBuffer);
+ REDISMODULE_GET_API(DigestAddLongLong);
+ REDISMODULE_GET_API(DigestEndSequence);
+
+#ifdef REDISMODULE_EXPERIMENTAL_API
+ REDISMODULE_GET_API(GetThreadSafeContext);
+ REDISMODULE_GET_API(FreeThreadSafeContext);
+ REDISMODULE_GET_API(ThreadSafeContextLock);
+ REDISMODULE_GET_API(ThreadSafeContextUnlock);
+ REDISMODULE_GET_API(BlockClient);
+ REDISMODULE_GET_API(UnblockClient);
+ REDISMODULE_GET_API(IsBlockedReplyRequest);
+ REDISMODULE_GET_API(IsBlockedTimeoutRequest);
+ REDISMODULE_GET_API(GetBlockedClientPrivateData);
+ REDISMODULE_GET_API(AbortBlock);
+#endif
+
+ RedisModule_SetModuleAttribs(ctx,name,ver,apiver);
+ return REDISMODULE_OK;
+}
+
+#else
+
+/* Things only defined for the modules core, not exported to modules
+ * including this file. */
+#define RedisModuleString robj
+
+#endif /* REDISMODULE_CORE */
+#endif /* REDISMOUDLE_H */
diff --git a/src/replication.c b/src/replication.c
index ff0a0141a..6be5d2631 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -29,7 +29,7 @@
*/
-#include "redis.h"
+#include "server.h"
#include <sys/time.h>
#include <unistd.h>
@@ -40,19 +40,45 @@
void replicationDiscardCachedMaster(void);
void replicationResurrectCachedMaster(int newfd);
void replicationSendAck(void);
+void putSlaveOnline(client *slave);
+int cancelReplicationHandshake(void);
+
+/* --------------------------- Utility functions ---------------------------- */
+
+/* Return the pointer to a string representing the slave ip:listening_port
+ * pair. Mostly useful for logging, since we want to log a slave using its
+ * IP address and its listening port which is more clear for the user, for
+ * example: "Closing connection with slave 10.1.2.3:6380". */
+char *replicationGetSlaveName(client *c) {
+ static char buf[NET_PEER_ID_LEN];
+ char ip[NET_IP_STR_LEN];
+
+ ip[0] = '\0';
+ buf[0] = '\0';
+ if (c->slave_ip[0] != '\0' ||
+ anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1)
+ {
+ /* Note that the 'ip' buffer is always larger than 'c->slave_ip' */
+ if (c->slave_ip[0] != '\0') memcpy(ip,c->slave_ip,sizeof(c->slave_ip));
+
+ if (c->slave_listening_port)
+ anetFormatAddr(buf,sizeof(buf),ip,c->slave_listening_port);
+ else
+ snprintf(buf,sizeof(buf),"%s:<unknown-slave-port>",ip);
+ } else {
+ snprintf(buf,sizeof(buf),"client id #%llu",
+ (unsigned long long) c->id);
+ }
+ return buf;
+}
/* ---------------------------------- MASTER -------------------------------- */
void createReplicationBacklog(void) {
- redisAssert(server.repl_backlog == NULL);
+ serverAssert(server.repl_backlog == NULL);
server.repl_backlog = zmalloc(server.repl_backlog_size);
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
- /* When a new backlog buffer is created, we increment the replication
- * offset by one to make sure we'll not be able to PSYNC with any
- * previous slave. This is needed because we avoid incrementing the
- * master_repl_offset if no backlog exists nor slaves are attached. */
- server.master_repl_offset++;
/* We don't have any data inside our buffer, but virtually the first
* byte we have is the next byte that will be generated for the
@@ -67,8 +93,8 @@ void createReplicationBacklog(void) {
* the most recent bytes, or the same data and more free space in case the
* buffer is enlarged). */
void resizeReplicationBacklog(long long newsize) {
- if (newsize < REDIS_REPL_BACKLOG_MIN_SIZE)
- newsize = REDIS_REPL_BACKLOG_MIN_SIZE;
+ if (newsize < CONFIG_REPL_BACKLOG_MIN_SIZE)
+ newsize = CONFIG_REPL_BACKLOG_MIN_SIZE;
if (server.repl_backlog_size == newsize) return;
server.repl_backlog_size = newsize;
@@ -82,13 +108,13 @@ void resizeReplicationBacklog(long long newsize) {
server.repl_backlog = zmalloc(server.repl_backlog_size);
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
- /* Next byte we have is... the next since the buffer is emtpy. */
+ /* Next byte we have is... the next since the buffer is empty. */
server.repl_backlog_off = server.master_repl_offset+1;
}
}
void freeReplicationBacklog(void) {
- redisAssert(listLength(server.slaves) == 0);
+ serverAssert(listLength(server.slaves) == 0);
zfree(server.repl_backlog);
server.repl_backlog = NULL;
}
@@ -96,7 +122,7 @@ void freeReplicationBacklog(void) {
/* Add data to the replication backlog.
* This function also increments the global replication offset stored at
* server.master_repl_offset, because there is no case where we want to feed
- * the backlog without incrementing the buffer. */
+ * the backlog without incrementing the offset. */
void feedReplicationBacklog(void *ptr, size_t len) {
unsigned char *p = ptr;
@@ -125,11 +151,11 @@ void feedReplicationBacklog(void *ptr, size_t len) {
/* Wrapper for feedReplicationBacklog() that takes Redis string objects
* as input. */
void feedReplicationBacklogWithObject(robj *o) {
- char llstr[REDIS_LONGSTR_SIZE];
+ char llstr[LONG_STR_SIZE];
void *p;
size_t len;
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
len = ll2string(llstr,sizeof(llstr),(long)o->ptr);
p = llstr;
} else {
@@ -139,31 +165,43 @@ void feedReplicationBacklogWithObject(robj *o) {
feedReplicationBacklog(p,len);
}
+/* Propagate write commands to slaves, and populate the replication backlog
+ * as well. This function is used if the instance is a master: we use
+ * the commands received by our clients in order to create the replication
+ * stream. Instead if the instance is a slave and has sub-slaves attached,
+ * we use replicationFeedSlavesFromMaster() */
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int j, len;
- char llstr[REDIS_LONGSTR_SIZE];
+ char llstr[LONG_STR_SIZE];
+
+ /* If the instance is not a top level master, return ASAP: we'll just proxy
+ * the stream of data we receive from our master instead, in order to
+ * propagate *identical* replication stream. In this way this slave can
+ * advertise the same replication ID as the master (since it shares the
+ * master replication history and has the same backlog and offsets). */
+ if (server.masterhost != NULL) return;
/* If there aren't slaves, and there is no backlog buffer to populate,
* we can return ASAP. */
if (server.repl_backlog == NULL && listLength(slaves) == 0) return;
/* We can't have slaves attached and no backlog. */
- redisAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
+ serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
/* Send SELECT command to every slave if needed. */
if (server.slaveseldb != dictid) {
robj *selectcmd;
/* For a few DBs we have pre-computed SELECT command. */
- if (dictid >= 0 && dictid < REDIS_SHARED_SELECT_CMDS) {
+ if (dictid >= 0 && dictid < PROTO_SHARED_SELECT_CMDS) {
selectcmd = shared.select[dictid];
} else {
int dictid_len;
dictid_len = ll2string(llstr,sizeof(llstr),dictid);
- selectcmd = createObject(REDIS_STRING,
+ selectcmd = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),
"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
dictid_len, llstr));
@@ -175,18 +213,19 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* Send it to slaves. */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
addReply(slave,selectcmd);
}
- if (dictid < 0 || dictid >= REDIS_SHARED_SELECT_CMDS)
+ if (dictid < 0 || dictid >= PROTO_SHARED_SELECT_CMDS)
decrRefCount(selectcmd);
}
server.slaveseldb = dictid;
/* Write the command to the replication backlog if any. */
if (server.repl_backlog) {
- char aux[REDIS_LONGSTR_SIZE+3];
+ char aux[LONG_STR_SIZE+3];
/* Add the multi bulk reply length. */
aux[0] = '*';
@@ -200,7 +239,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* We need to feed the buffer with the object as a bulk reply
* not just as a plain string, so create the $..CRLF payload len
- * ad add the final CRLF */
+ * and add the final CRLF */
aux[0] = '$';
len = ll2string(aux+1,sizeof(aux)-1,objlen);
aux[len+1] = '\r';
@@ -214,10 +253,10 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* Write the command to every slave. */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start */
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
/* Feed slaves that are waiting for the initial SYNC (so these commands
* are queued in the output buffer until the initial SYNC completes),
@@ -233,7 +272,35 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
}
}
-void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc) {
+/* This function is used in order to proxy what we receive from our master
+ * to our sub-slaves. */
+#include <ctype.h>
+void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen) {
+ listNode *ln;
+ listIter li;
+
+ /* Debugging: this is handy to see the stream sent from master
+ * to slaves. Disabled with if(0). */
+ if (0) {
+ printf("%zu:",buflen);
+ for (size_t j = 0; j < buflen; j++) {
+ printf("%c", isprint(buf[j]) ? buf[j] : '.');
+ }
+ printf("\n");
+ }
+
+ if (server.repl_backlog) feedReplicationBacklog(buf,buflen);
+ listRewind(slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ /* Don't feed slaves that are still waiting for BGSAVE to start */
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
+ addReplyString(slave,buf,buflen);
+ }
+}
+
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int j;
@@ -243,16 +310,16 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
gettimeofday(&tv,NULL);
cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec);
- if (c->flags & REDIS_LUA_CLIENT) {
+ if (c->flags & CLIENT_LUA) {
cmdrepr = sdscatprintf(cmdrepr,"[%d lua] ",dictid);
- } else if (c->flags & REDIS_UNIX_SOCKET) {
+ } else if (c->flags & CLIENT_UNIX_SOCKET) {
cmdrepr = sdscatprintf(cmdrepr,"[%d unix:%s] ",dictid,server.unixsocket);
} else {
cmdrepr = sdscatprintf(cmdrepr,"[%d %s] ",dictid,getClientPeerId(c));
}
for (j = 0; j < argc; j++) {
- if (argv[j]->encoding == REDIS_ENCODING_INT) {
+ if (argv[j]->encoding == OBJ_ENCODING_INT) {
cmdrepr = sdscatprintf(cmdrepr, "\"%ld\"", (long)argv[j]->ptr);
} else {
cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
@@ -262,11 +329,11 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
cmdrepr = sdscatlen(cmdrepr," ",1);
}
cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
- cmdobj = createObject(REDIS_STRING,cmdrepr);
+ cmdobj = createObject(OBJ_STRING,cmdrepr);
listRewind(monitors,&li);
while((ln = listNext(&li))) {
- redisClient *monitor = ln->value;
+ client *monitor = ln->value;
addReply(monitor,cmdobj);
}
decrRefCount(cmdobj);
@@ -274,35 +341,35 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
/* Feed the slave 'c' with the replication backlog starting from the
* specified 'offset' up to the end of the backlog. */
-long long addReplyReplicationBacklog(redisClient *c, long long offset) {
+long long addReplyReplicationBacklog(client *c, long long offset) {
long long j, skip, len;
- redisLog(REDIS_DEBUG, "[PSYNC] Slave request offset: %lld", offset);
+ serverLog(LL_DEBUG, "[PSYNC] Slave request offset: %lld", offset);
if (server.repl_backlog_histlen == 0) {
- redisLog(REDIS_DEBUG, "[PSYNC] Backlog history len is zero");
+ serverLog(LL_DEBUG, "[PSYNC] Backlog history len is zero");
return 0;
}
- redisLog(REDIS_DEBUG, "[PSYNC] Backlog size: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] Backlog size: %lld",
server.repl_backlog_size);
- redisLog(REDIS_DEBUG, "[PSYNC] First byte: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] First byte: %lld",
server.repl_backlog_off);
- redisLog(REDIS_DEBUG, "[PSYNC] History len: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] History len: %lld",
server.repl_backlog_histlen);
- redisLog(REDIS_DEBUG, "[PSYNC] Current index: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] Current index: %lld",
server.repl_backlog_idx);
/* Compute the amount of bytes we need to discard. */
skip = offset - server.repl_backlog_off;
- redisLog(REDIS_DEBUG, "[PSYNC] Skipping: %lld", skip);
+ serverLog(LL_DEBUG, "[PSYNC] Skipping: %lld", skip);
- /* Point j to the oldest byte, that is actaully our
+ /* Point j to the oldest byte, that is actually our
* server.repl_backlog_off byte. */
j = (server.repl_backlog_idx +
(server.repl_backlog_size-server.repl_backlog_histlen)) %
server.repl_backlog_size;
- redisLog(REDIS_DEBUG, "[PSYNC] Index of first byte: %lld", j);
+ serverLog(LL_DEBUG, "[PSYNC] Index of first byte: %lld", j);
/* Discard the amount of data to seek to the specified 'offset'. */
j = (j + skip) % server.repl_backlog_size;
@@ -310,13 +377,13 @@ long long addReplyReplicationBacklog(redisClient *c, long long offset) {
/* Feed slave with data. Since it is a circular buffer we have to
* split the reply in two parts if we are cross-boundary. */
len = server.repl_backlog_histlen - skip;
- redisLog(REDIS_DEBUG, "[PSYNC] Reply total length: %lld", len);
+ serverLog(LL_DEBUG, "[PSYNC] Reply total length: %lld", len);
while(len) {
long long thislen =
((server.repl_backlog_size - j) < len) ?
(server.repl_backlog_size - j) : len;
- redisLog(REDIS_DEBUG, "[PSYNC] addReply() length: %lld", thislen);
+ serverLog(LL_DEBUG, "[PSYNC] addReply() length: %lld", thislen);
addReplySds(c,sdsnewlen(server.repl_backlog + j, thislen));
len -= thislen;
j = 0;
@@ -324,44 +391,112 @@ long long addReplyReplicationBacklog(redisClient *c, long long offset) {
return server.repl_backlog_histlen - skip;
}
+/* Return the offset to provide as reply to the PSYNC command received
+ * from the slave. The returned value is only valid immediately after
+ * the BGSAVE process started and before executing any other command
+ * from clients. */
+long long getPsyncInitialOffset(void) {
+ return server.master_repl_offset;
+}
+
+/* Send a FULLRESYNC reply in the specific case of a full resynchronization,
+ * as a side effect setup the slave for a full sync in different ways:
+ *
+ * 1) Remember, into the slave client structure, the replication offset
+ * we sent here, so that if new slaves will later attach to the same
+ * background RDB saving process (by duplicating this client output
+ * buffer), we can get the right offset from this slave.
+ * 2) Set the replication state of the slave to WAIT_BGSAVE_END so that
+ * we start accumulating differences from this point.
+ * 3) Force the replication stream to re-emit a SELECT statement so
+ * the new slave incremental differences will start selecting the
+ * right database number.
+ *
+ * Normally this function should be called immediately after a successful
+ * BGSAVE for replication was started, or when there is one already in
+ * progress that we attached our slave to. */
+int replicationSetupSlaveForFullResync(client *slave, long long offset) {
+ char buf[128];
+ int buflen;
+
+ slave->psync_initial_offset = offset;
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_END;
+ /* We are going to accumulate the incremental changes for this
+ * slave as well. Set slaveseldb to -1 in order to force to re-emit
+ * a SELECT statement in the replication stream. */
+ server.slaveseldb = -1;
+
+ /* Don't send this reply to slaves that approached us with
+ * the old SYNC command. */
+ if (!(slave->flags & CLIENT_PRE_PSYNC)) {
+ buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
+ server.replid,offset);
+ if (write(slave->fd,buf,buflen) != buflen) {
+ freeClientAsync(slave);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
/* This function handles the PSYNC command from the point of view of a
* master receiving a request for partial resynchronization.
*
- * On success return REDIS_OK, otherwise REDIS_ERR is returned and we proceed
+ * On success return C_OK, otherwise C_ERR is returned and we proceed
* with the usual full resync. */
-int masterTryPartialResynchronization(redisClient *c) {
+int masterTryPartialResynchronization(client *c) {
long long psync_offset, psync_len;
- char *master_runid = c->argv[1]->ptr;
+ char *master_replid = c->argv[1]->ptr;
char buf[128];
int buflen;
- /* Is the runid of this master the same advertised by the wannabe slave
- * via PSYNC? If runid changed this master is a different instance and
- * there is no way to continue. */
- if (strcasecmp(master_runid, server.runid)) {
+ /* Parse the replication offset asked by the slave. Go to full sync
+ * on parse error: this should never happen but we try to handle
+ * it in a robust way compared to aborting. */
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
+ C_OK) goto need_full_resync;
+
+ /* Is the replication ID of this master the same advertised by the wannabe
+ * slave via PSYNC? If the replication ID changed this master has a
+ * different replication history, and there is no way to continue.
+ *
+ * Note that there are two potentially valid replication IDs: the ID1
+ * and the ID2. The ID2 however is only valid up to a specific offset. */
+ if (strcasecmp(master_replid, server.replid) &&
+ (strcasecmp(master_replid, server.replid2) ||
+ psync_offset > server.second_replid_offset))
+ {
/* Run id "?" is used by slaves that want to force a full resync. */
- if (master_runid[0] != '?') {
- redisLog(REDIS_NOTICE,"Partial resynchronization not accepted: "
- "Runid mismatch (Client asked for runid '%s', my runid is '%s')",
- master_runid, server.runid);
+ if (master_replid[0] != '?') {
+ if (strcasecmp(master_replid, server.replid) &&
+ strcasecmp(master_replid, server.replid2))
+ {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Replication ID mismatch (Slave asked for '%s', my "
+ "replication IDs are '%s' and '%s')",
+ master_replid, server.replid, server.replid2);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Requested offset for second ID was %lld, but I can reply "
+ "up to %lld", psync_offset, server.second_replid_offset);
+ }
} else {
- redisLog(REDIS_NOTICE,"Full resync requested by slave.");
+ serverLog(LL_NOTICE,"Full resync requested by slave %s",
+ replicationGetSlaveName(c));
}
goto need_full_resync;
}
/* We still have the data our slave is asking for? */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
- REDIS_OK) goto need_full_resync;
if (!server.repl_backlog ||
psync_offset < server.repl_backlog_off ||
psync_offset > (server.repl_backlog_off + server.repl_backlog_histlen))
{
- redisLog(REDIS_NOTICE,
- "Unable to partial resync with the slave for lack of backlog (Slave request was: %lld).", psync_offset);
+ serverLog(LL_NOTICE,
+ "Unable to partial resync with slave %s for lack of backlog (Slave request was: %lld).", replicationGetSlaveName(c), psync_offset);
if (psync_offset > server.master_repl_offset) {
- redisLog(REDIS_WARNING,
- "Warning: slave tried to PSYNC with an offset that is greater than the master replication offset.");
+ serverLog(LL_WARNING,
+ "Warning: slave %s tried to PSYNC with an offset that is greater than the master replication offset.", replicationGetSlaveName(c));
}
goto need_full_resync;
}
@@ -370,53 +505,132 @@ int masterTryPartialResynchronization(redisClient *c) {
* 1) Set client state to make it a slave.
* 2) Inform the client we can continue with +CONTINUE
* 3) Send the backlog data (from the offset to the end) to the slave. */
- c->flags |= REDIS_SLAVE;
- c->replstate = REDIS_REPL_ONLINE;
+ c->flags |= CLIENT_SLAVE;
+ c->replstate = SLAVE_STATE_ONLINE;
c->repl_ack_time = server.unixtime;
+ c->repl_put_online_on_ack = 0;
listAddNodeTail(server.slaves,c);
/* We can't use the connection buffers since they are used to accumulate
* new commands at this stage. But we are sure the socket send buffer is
- * emtpy so this write will never fail actually. */
- buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
+ * empty so this write will never fail actually. */
+ if (c->slave_capa & SLAVE_CAPA_PSYNC2) {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE %s\r\n", server.replid);
+ } else {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
+ }
if (write(c->fd,buf,buflen) != buflen) {
freeClientAsync(c);
- return REDIS_OK;
+ return C_OK;
}
psync_len = addReplyReplicationBacklog(c,psync_offset);
- redisLog(REDIS_NOTICE,
- "Partial resynchronization request accepted. Sending %lld bytes of backlog starting from offset %lld.", psync_len, psync_offset);
+ serverLog(LL_NOTICE,
+ "Partial resynchronization request from %s accepted. Sending %lld bytes of backlog starting from offset %lld.",
+ replicationGetSlaveName(c),
+ psync_len, psync_offset);
/* Note that we don't need to set the selected DB at server.slaveseldb
* to -1 to force the master to emit SELECT, since the slave already
* has this state from the previous connection with the master. */
refreshGoodSlavesCount();
- return REDIS_OK; /* The caller can return, no full resync needed. */
+ return C_OK; /* The caller can return, no full resync needed. */
need_full_resync:
- /* We need a full resync for some reason... notify the client. */
- psync_offset = server.master_repl_offset;
- /* Add 1 to psync_offset if it the replication backlog does not exists
- * as when it will be created later we'll increment the offset by one. */
- if (server.repl_backlog == NULL) psync_offset++;
- /* Again, we can't use the connection buffers (see above). */
- buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
- server.runid,psync_offset);
- if (write(c->fd,buf,buflen) != buflen) {
- freeClientAsync(c);
- return REDIS_OK;
+ /* We need a full resync for some reason... Note that we can't
+ * reply to PSYNC right now if a full SYNC is needed. The reply
+ * must include the master offset at the time the RDB file we transfer
+ * is generated, so we need to delay the reply to that moment. */
+ return C_ERR;
+}
+
+/* Start a BGSAVE for replication goals, which is, selecting the disk or
+ * socket target depending on the configuration, and making sure that
+ * the script cache is flushed before to start.
+ *
+ * The mincapa argument is the bitwise AND among all the slaves capabilities
+ * of the slaves waiting for this BGSAVE, so represents the slave capabilities
+ * all the slaves support. Can be tested via SLAVE_CAPA_* macros.
+ *
+ * Side effects, other than starting a BGSAVE:
+ *
+ * 1) Handle the slaves in WAIT_START state, by preparing them for a full
+ * sync if the BGSAVE was succesfully started, or sending them an error
+ * and dropping them from the list of slaves.
+ *
+ * 2) Flush the Lua scripting script cache if the BGSAVE was actually
+ * started.
+ *
+ * Returns C_OK on success or C_ERR otherwise. */
+int startBgsaveForReplication(int mincapa) {
+ int retval;
+ int socket_target = server.repl_diskless_sync && (mincapa & SLAVE_CAPA_EOF);
+ listIter li;
+ listNode *ln;
+
+ serverLog(LL_NOTICE,"Starting BGSAVE for SYNC with target: %s",
+ socket_target ? "slaves sockets" : "disk");
+
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ /* If we are saving for a chained slave (that is, if we are,
+ * in turn, a slave of another instance), make sure after
+ * loadig the RDB, our slaves select the right DB: we'll just
+ * send the replication stream we receive from our master, so
+ * no way to send SELECT commands. */
+ if (server.master) rsi.repl_stream_db = server.master->db->id;
+
+ if (socket_target)
+ retval = rdbSaveToSlavesSockets(&rsi);
+ else
+ retval = rdbSaveBackground(server.rdb_filename,&rsi);
+
+ /* If we failed to BGSAVE, remove the slaves waiting for a full
+ * resynchorinization from the list of salves, inform them with
+ * an error about what happened, close the connection ASAP. */
+ if (retval == C_ERR) {
+ serverLog(LL_WARNING,"BGSAVE for replication failed");
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ slave->flags &= ~CLIENT_SLAVE;
+ listDelNode(server.slaves,ln);
+ addReplyError(slave,
+ "BGSAVE failed, replication can't continue");
+ slave->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+ return retval;
}
- return REDIS_ERR;
+
+ /* If the target is socket, rdbSaveToSlavesSockets() already setup
+ * the salves for a full resync. Otherwise for disk target do it now.*/
+ if (!socket_target) {
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ replicationSetupSlaveForFullResync(slave,
+ getPsyncInitialOffset());
+ }
+ }
+ }
+
+ /* Flush the script cache, since we need that slave differences are
+ * accumulated without requiring slaves to match our cached scripts. */
+ if (retval == C_OK) replicationScriptCacheFlush();
+ return retval;
}
-/* SYNC ad PSYNC command implemenation. */
-void syncCommand(redisClient *c) {
+/* SYNC and PSYNC command implemenation. */
+void syncCommand(client *c) {
/* ignore SYNC if already slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
+ if (c->flags & CLIENT_SLAVE) return;
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
- if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {
- addReplyError(c,"Can't SYNC while not connected with my master");
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
+ addReplySds(c,sdsnew("-NOMASTERLINK Can't SYNC while not connected with my master\r\n"));
return;
}
@@ -424,93 +638,125 @@ void syncCommand(redisClient *c) {
* the client about already issued commands. We need a fresh reply
* buffer registering the differences between the BGSAVE and the current
* dataset, so that we can copy to other slaves if needed. */
- if (listLength(c->reply) != 0 || c->bufpos != 0) {
+ if (clientHasPendingReplies(c)) {
addReplyError(c,"SYNC and PSYNC are invalid with pending output");
return;
}
- redisLog(REDIS_NOTICE,"Slave asks for synchronization");
+ serverLog(LL_NOTICE,"Slave %s asks for synchronization",
+ replicationGetSlaveName(c));
/* Try a partial resynchronization if this is a PSYNC command.
* If it fails, we continue with usual full resynchronization, however
* when this happens masterTryPartialResynchronization() already
* replied with:
*
- * +FULLRESYNC <runid> <offset>
+ * +FULLRESYNC <replid> <offset>
*
- * So the slave knows the new runid and offset to try a PSYNC later
+ * So the slave knows the new replid and offset to try a PSYNC later
* if the connection with the master is lost. */
if (!strcasecmp(c->argv[0]->ptr,"psync")) {
- if (masterTryPartialResynchronization(c) == REDIS_OK) {
+ if (masterTryPartialResynchronization(c) == C_OK) {
server.stat_sync_partial_ok++;
return; /* No full resync needed, return. */
} else {
- char *master_runid = c->argv[1]->ptr;
+ char *master_replid = c->argv[1]->ptr;
/* Increment stats for failed PSYNCs, but only if the
- * runid is not "?", as this is used by slaves to force a full
+ * replid is not "?", as this is used by slaves to force a full
* resync on purpose when they are not albe to partially
* resync. */
- if (master_runid[0] != '?') server.stat_sync_partial_err++;
+ if (master_replid[0] != '?') server.stat_sync_partial_err++;
}
} else {
/* If a slave uses SYNC, we are dealing with an old implementation
* of the replication protocol (like redis-cli --slave). Flag the client
* so that we don't expect to receive REPLCONF ACK feedbacks. */
- c->flags |= REDIS_PRE_PSYNC;
+ c->flags |= CLIENT_PRE_PSYNC;
}
/* Full resynchronization. */
server.stat_sync_full++;
- /* Here we need to check if there is a background saving operation
- * in progress, or if it is required to start one */
- if (server.rdb_child_pid != -1) {
+ /* Setup the slave as one waiting for BGSAVE to start. The following code
+ * paths will change the state if we handle the slave differently. */
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ if (server.repl_disable_tcp_nodelay)
+ anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
+ c->repldbfd = -1;
+ c->flags |= CLIENT_SLAVE;
+ listAddNodeTail(server.slaves,c);
+
+ /* Create the replication backlog if needed. */
+ if (listLength(server.slaves) == 1 && server.repl_backlog == NULL) {
+ /* When we create the backlog from scratch, we always use a new
+ * replication ID and clear the ID2, since there is no valid
+ * past history. */
+ changeReplicationId();
+ clearReplicationId2();
+ createReplicationBacklog();
+ }
+
+ /* CASE 1: BGSAVE is in progress, with disk target. */
+ if (server.rdb_child_pid != -1 &&
+ server.rdb_child_type == RDB_CHILD_TYPE_DISK)
+ {
/* Ok a background save is in progress. Let's check if it is a good
* one for replication, i.e. if there is another slave that is
- * registering differences since the server forked to save */
- redisClient *slave;
+ * registering differences since the server forked to save. */
+ client *slave;
listNode *ln;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break;
}
- if (ln) {
+ /* To attach this slave, we check that it has at least all the
+ * capabilities of the slave that triggered the current BGSAVE. */
+ if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) {
/* Perfect, the server is already registering differences for
* another slave. Set the right state, and copy the buffer. */
copyClientOutputBuffer(c,slave);
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
+ replicationSetupSlaveForFullResync(c,slave->psync_initial_offset);
+ serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
/* No way, we need to wait for the next BGSAVE in order to
- * register differences */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
- redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
+ * register differences. */
+ serverLog(LL_NOTICE,"Can't attach the slave to the current BGSAVE. Waiting for next BGSAVE for SYNC");
}
+
+ /* CASE 2: BGSAVE is in progress, with socket target. */
+ } else if (server.rdb_child_pid != -1 &&
+ server.rdb_child_type == RDB_CHILD_TYPE_SOCKET)
+ {
+ /* There is an RDB child process but it is writing directly to
+ * children sockets. We need to wait for the next BGSAVE
+ * in order to synchronize. */
+ serverLog(LL_NOTICE,"Current BGSAVE has socket target. Waiting for next BGSAVE for SYNC");
+
+ /* CASE 3: There is no BGSAVE is progress. */
} else {
- /* Ok we don't have a BGSAVE in progress, let's start one */
- redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
- if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
- redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
- addReplyError(c,"Unable to perform background save");
- return;
+ if (server.repl_diskless_sync && (c->slave_capa & SLAVE_CAPA_EOF)) {
+ /* Diskless replication RDB child is created inside
+ * replicationCron() since we want to delay its start a
+ * few seconds to wait for more slaves to arrive. */
+ if (server.repl_diskless_sync_delay)
+ serverLog(LL_NOTICE,"Delay next BGSAVE for diskless SYNC");
+ } else {
+ /* Target is disk (or the slave is not capable of supporting
+ * diskless replication) and we don't have a BGSAVE in progress,
+ * let's start one. */
+ if (server.aof_child_pid == -1) {
+ startBgsaveForReplication(c->slave_capa);
+ } else {
+ serverLog(LL_NOTICE,
+ "No BGSAVE in progress, but an AOF rewrite is active. "
+ "BGSAVE for replication delayed");
+ }
}
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- /* Flush the script cache for the new slave. */
- replicationScriptCacheFlush();
}
-
- if (server.repl_disable_tcp_nodelay)
- anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
- c->repldbfd = -1;
- c->flags |= REDIS_SLAVE;
- server.slaveseldb = -1; /* Force to re-emit the SELECT command. */
- listAddNodeTail(server.slaves,c);
- if (listLength(server.slaves) == 1 && server.repl_backlog == NULL)
- createReplicationBacklog();
return;
}
@@ -526,7 +772,7 @@ void syncCommand(redisClient *c) {
* In the future the same command can be used in order to configure
* the replication to initiate an incremental replication instead of a
* full resync. */
-void replconfCommand(redisClient *c) {
+void replconfCommand(client *c) {
int j;
if ((c->argc % 2) == 0) {
@@ -542,28 +788,48 @@ void replconfCommand(redisClient *c) {
long port;
if ((getLongFromObjectOrReply(c,c->argv[j+1],
- &port,NULL) != REDIS_OK))
+ &port,NULL) != C_OK))
return;
c->slave_listening_port = port;
+ } else if (!strcasecmp(c->argv[j]->ptr,"ip-address")) {
+ sds ip = c->argv[j+1]->ptr;
+ if (sdslen(ip) < sizeof(c->slave_ip)) {
+ memcpy(c->slave_ip,ip,sdslen(ip)+1);
+ } else {
+ addReplyErrorFormat(c,"REPLCONF ip-address provided by "
+ "slave instance is too long: %zd bytes", sdslen(ip));
+ return;
+ }
+ } else if (!strcasecmp(c->argv[j]->ptr,"capa")) {
+ /* Ignore capabilities not understood by this master. */
+ if (!strcasecmp(c->argv[j+1]->ptr,"eof"))
+ c->slave_capa |= SLAVE_CAPA_EOF;
+ else if (!strcasecmp(c->argv[j+1]->ptr,"psync2"))
+ c->slave_capa |= SLAVE_CAPA_PSYNC2;
} else if (!strcasecmp(c->argv[j]->ptr,"ack")) {
/* REPLCONF ACK is used by slave to inform the master the amount
* of replication stream that it processed so far. It is an
* internal only command that normal clients should never use. */
long long offset;
- if (!(c->flags & REDIS_SLAVE)) return;
- if ((getLongLongFromObject(c->argv[j+1], &offset) != REDIS_OK))
+ if (!(c->flags & CLIENT_SLAVE)) return;
+ if ((getLongLongFromObject(c->argv[j+1], &offset) != C_OK))
return;
if (offset > c->repl_ack_off)
c->repl_ack_off = offset;
c->repl_ack_time = server.unixtime;
+ /* If this was a diskless replication, we need to really put
+ * the slave online when the first ACK is received (which
+ * confirms slave is online and ready to get more data). */
+ if (c->repl_put_online_on_ack && c->replstate == SLAVE_STATE_ONLINE)
+ putSlaveOnline(c);
/* Note: this command does not reply anything! */
return;
} else if (!strcasecmp(c->argv[j]->ptr,"getack")) {
/* REPLCONF GETACK is used in order to request an ACK ASAP
* to the slave. */
if (server.masterhost && server.master) replicationSendAck();
- /* Note: this command does not reply anything! */
+ return;
} else {
addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s",
(char*)c->argv[j]->ptr);
@@ -573,11 +839,38 @@ void replconfCommand(redisClient *c) {
addReply(c,shared.ok);
}
+/* This function puts a slave in the online state, and should be called just
+ * after a slave received the RDB file for the initial synchronization, and
+ * we are finally ready to send the incremental stream of commands.
+ *
+ * It does a few things:
+ *
+ * 1) Put the slave in ONLINE state (useless when the function is called
+ * because state is already ONLINE but repl_put_online_on_ack is true).
+ * 2) Make sure the writable event is re-installed, since calling the SYNC
+ * command disables it, so that we can accumulate output buffer without
+ * sending it to the slave.
+ * 3) Update the count of good slaves. */
+void putSlaveOnline(client *slave) {
+ slave->replstate = SLAVE_STATE_ONLINE;
+ slave->repl_put_online_on_ack = 0;
+ slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
+ sendReplyToClient, slave) == AE_ERR) {
+ serverLog(LL_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ refreshGoodSlavesCount();
+ serverLog(LL_NOTICE,"Synchronization with slave %s succeeded",
+ replicationGetSlaveName(slave));
+}
+
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *slave = privdata;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- char buf[REDIS_IOBUF_LEN];
+ client *slave = privdata;
+ UNUSED(el);
+ UNUSED(mask);
+ char buf[PROTO_IOBUF_LEN];
ssize_t nwritten, buflen;
/* Before sending the RDB file, we send the preamble as configured by the
@@ -586,11 +879,12 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
if (slave->replpreamble) {
nwritten = write(fd,slave->replpreamble,sdslen(slave->replpreamble));
if (nwritten == -1) {
- redisLog(REDIS_VERBOSE,"Write error sending RDB preamble to slave: %s",
+ serverLog(LL_VERBOSE,"Write error sending RDB preamble to slave: %s",
strerror(errno));
freeClient(slave);
return;
}
+ server.stat_net_output_bytes += nwritten;
sdsrange(slave->replpreamble,nwritten,-1);
if (sdslen(slave->replpreamble) == 0) {
sdsfree(slave->replpreamble);
@@ -603,117 +897,152 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
/* If the preamble was already transfered, send the RDB bulk data. */
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
- buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
+ buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN);
if (buflen <= 0) {
- redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
+ serverLog(LL_WARNING,"Read error sending DB to slave: %s",
(buflen == 0) ? "premature EOF" : strerror(errno));
freeClient(slave);
return;
}
if ((nwritten = write(fd,buf,buflen)) == -1) {
if (errno != EAGAIN) {
- redisLog(REDIS_WARNING,"Write error sending DB to slave: %s",
+ serverLog(LL_WARNING,"Write error sending DB to slave: %s",
strerror(errno));
freeClient(slave);
}
return;
}
slave->repldboff += nwritten;
+ server.stat_net_output_bytes += nwritten;
if (slave->repldboff == slave->repldbsize) {
close(slave->repldbfd);
slave->repldbfd = -1;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- slave->replstate = REDIS_REPL_ONLINE;
- slave->repl_ack_time = server.unixtime;
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
- sendReplyToClient, slave) == AE_ERR) {
- redisLog(REDIS_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno));
- freeClient(slave);
- return;
- }
- refreshGoodSlavesCount();
- redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
+ putSlaveOnline(slave);
}
}
-/* This function is called at the end of every background saving.
- * The argument bgsaveerr is REDIS_OK if the background saving succeeded
- * otherwise REDIS_ERR is passed to the function.
+/* This function is called at the end of every background saving,
+ * or when the replication RDB transfer strategy is modified from
+ * disk to socket or the other way around.
*
* The goal of this function is to handle slaves waiting for a successful
- * background saving in order to perform non-blocking synchronization. */
-void updateSlavesWaitingBgsave(int bgsaveerr) {
+ * background saving in order to perform non-blocking synchronization, and
+ * to schedule a new BGSAVE if there are slaves that attached while a
+ * BGSAVE was in progress, but it was not a good one for replication (no
+ * other slave was accumulating differences).
+ *
+ * The argument bgsaveerr is C_OK if the background saving succeeded
+ * otherwise C_ERR is passed to the function.
+ * The 'type' argument is the type of the child that terminated
+ * (if it had a disk or socket target). */
+void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
listNode *ln;
int startbgsave = 0;
+ int mincapa = -1;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
startbgsave = 1;
- slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
+ mincapa = (mincapa == -1) ? slave->slave_capa :
+ (mincapa & slave->slave_capa);
+ } else if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
struct redis_stat buf;
- if (bgsaveerr != REDIS_OK) {
- freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
- continue;
- }
- if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
- redis_fstat(slave->repldbfd,&buf) == -1) {
- freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
- continue;
- }
- slave->repldboff = 0;
- slave->repldbsize = buf.st_size;
- slave->replstate = REDIS_REPL_SEND_BULK;
- slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
- (unsigned long long) slave->repldbsize);
-
- aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
- freeClient(slave);
- continue;
+ /* If this was an RDB on disk save, we have to prepare to send
+ * the RDB from disk to the slave socket. Otherwise if this was
+ * already an RDB -> Slaves socket transfer, used in the case of
+ * diskless replication, our work is trivial, we can just put
+ * the slave online. */
+ if (type == RDB_CHILD_TYPE_SOCKET) {
+ serverLog(LL_NOTICE,
+ "Streamed RDB transfer with slave %s succeeded (socket). Waiting for REPLCONF ACK from slave to enable streaming",
+ replicationGetSlaveName(slave));
+ /* Note: we wait for a REPLCONF ACK message from slave in
+ * order to really put it online (install the write handler
+ * so that the accumulated data can be transfered). However
+ * we change the replication state ASAP, since our slave
+ * is technically online now. */
+ slave->replstate = SLAVE_STATE_ONLINE;
+ slave->repl_put_online_on_ack = 1;
+ slave->repl_ack_time = server.unixtime; /* Timeout otherwise. */
+ } else {
+ if (bgsaveerr != C_OK) {
+ freeClient(slave);
+ serverLog(LL_WARNING,"SYNC failed. BGSAVE child returned an error");
+ continue;
+ }
+ if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
+ redis_fstat(slave->repldbfd,&buf) == -1) {
+ freeClient(slave);
+ serverLog(LL_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
+ continue;
+ }
+ slave->repldboff = 0;
+ slave->repldbsize = buf.st_size;
+ slave->replstate = SLAVE_STATE_SEND_BULK;
+ slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
+ (unsigned long long) slave->repldbsize);
+
+ aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
+ freeClient(slave);
+ continue;
+ }
}
}
}
- if (startbgsave) {
- /* Since we are starting a new background save for one or more slaves,
- * we flush the Replication Script Cache to use EVAL to propagate every
- * new EVALSHA for the first time, since all the new slaves don't know
- * about previous scripts. */
- replicationScriptCacheFlush();
- if (rdbSaveBackground(server.rdb_filename) != REDIS_OK) {
- listIter li;
+ if (startbgsave) startBgsaveForReplication(mincapa);
+}
- listRewind(server.slaves,&li);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+/* Change the current instance replication ID with a new, random one.
+ * This will prevent successful PSYNCs between this master and other
+ * slaves, so the command should be called when something happens that
+ * alters the current story of the dataset. */
+void changeReplicationId(void) {
+ getRandomHexChars(server.replid,CONFIG_RUN_ID_SIZE);
+ server.replid[CONFIG_RUN_ID_SIZE] = '\0';
+}
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
- freeClient(slave);
- }
- }
- }
+/* Clear (invalidate) the secondary replication ID. This happens, for
+ * example, after a full resynchronization, when we start a new replication
+ * history. */
+void clearReplicationId2(void) {
+ memset(server.replid2,'0',sizeof(server.replid));
+ server.replid2[CONFIG_RUN_ID_SIZE] = '\0';
+ server.second_replid_offset = -1;
}
-/* ----------------------------------- SLAVE -------------------------------- */
+/* Use the current replication ID / offset as secondary replication
+ * ID, and change the current one in order to start a new history.
+ * This should be used when an instance is switched from slave to master
+ * so that it can serve PSYNC requests performed using the master
+ * replication ID. */
+void shiftReplicationId(void) {
+ memcpy(server.replid2,server.replid,sizeof(server.replid));
+ /* We set the second replid offset to the master offset + 1, since
+ * the slave will ask for the first byte it has not yet received, so
+ * we need to add one to the offset: for example if, as a slave, we are
+ * sure we have the same history as the master for 50 bytes, after we
+ * are turned into a master, we can accept a PSYNC request with offset
+ * 51, since the slave asking has the same history up to the 50th
+ * byte, and is asking for the new bytes starting at offset 51. */
+ server.second_replid_offset = server.master_repl_offset+1;
+ changeReplicationId();
+ serverLog(LL_WARNING,"Setting secondary replication ID to %s, valid up to offset: %lld. New replication ID is %s", server.replid2, server.second_replid_offset, server.replid);
+}
-/* Abort the async download of the bulk dataset while SYNC-ing with master */
-void replicationAbortSyncTransfer(void) {
- redisAssert(server.repl_state == REDIS_REPL_TRANSFER);
+/* ----------------------------------- SLAVE -------------------------------- */
- aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
- close(server.repl_transfer_s);
- close(server.repl_transfer_fd);
- unlink(server.repl_transfer_tmpfile);
- zfree(server.repl_transfer_tmpfile);
- server.repl_state = REDIS_REPL_CONNECT;
+/* Returns 1 if the given replication state is a handshake state,
+ * 0 otherwise. */
+int slaveIsInHandshakeState(void) {
+ return server.repl_state >= REPL_STATE_RECEIVE_PONG &&
+ server.repl_state <= REPL_STATE_RECEIVE_PSYNC;
}
/* Avoid the master to detect the slave is timing out while loading the
@@ -737,32 +1066,68 @@ void replicationSendNewlineToMaster(void) {
/* Callback used by emptyDb() while flushing away old data to load
* the new dataset received by the master. */
void replicationEmptyDbCallback(void *privdata) {
- REDIS_NOTUSED(privdata);
+ UNUSED(privdata);
replicationSendNewlineToMaster();
}
+/* Once we have a link with the master and the synchroniziation was
+ * performed, this function materializes the master client we store
+ * at server.master, starting from the specified file descriptor. */
+void replicationCreateMasterClient(int fd, int dbid) {
+ server.master = createClient(fd);
+ server.master->flags |= CLIENT_MASTER;
+ server.master->authenticated = 1;
+ server.master->reploff = server.master_initial_offset;
+ server.master->read_reploff = server.master->reploff;
+ memcpy(server.master->replid, server.master_replid,
+ sizeof(server.master_replid));
+ /* If master offset is set to -1, this master is old and is not
+ * PSYNC capable, so we flag it accordingly. */
+ if (server.master->reploff == -1)
+ server.master->flags |= CLIENT_PRE_PSYNC;
+ if (dbid != -1) selectDb(server.master,dbid);
+}
+
+void restartAOF() {
+ int retry = 10;
+ while (retry-- && startAppendOnly() == C_ERR) {
+ serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second.");
+ sleep(1);
+ }
+ if (!retry) {
+ serverLog(LL_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.");
+ exit(1);
+ }
+}
+
/* Asynchronously read the SYNC payload we receive from a master */
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
char buf[4096];
ssize_t nread, readlen;
off_t left;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
+
+ /* Static vars used to hold the EOF mark, and the last bytes received
+ * form the server: when they match, we reached the end of the transfer. */
+ static char eofmark[CONFIG_RUN_ID_SIZE];
+ static char lastbytes[CONFIG_RUN_ID_SIZE];
+ static int usemark = 0;
/* If repl_transfer_size == -1 we still have to read the bulk length
* from the master reply. */
if (server.repl_transfer_size == -1) {
if (syncReadLine(fd,buf,1024,server.repl_syncio_timeout*1000) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"I/O error reading bulk count from MASTER: %s",
strerror(errno));
goto error;
}
if (buf[0] == '-') {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"MASTER aborted replication with an error: %s",
buf+1);
goto error;
@@ -773,33 +1138,89 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
server.repl_transfer_lastio = server.unixtime;
return;
} else if (buf[0] != '$') {
- redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
+ serverLog(LL_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
goto error;
}
- server.repl_transfer_size = strtol(buf+1,NULL,10);
- redisLog(REDIS_NOTICE,
- "MASTER <-> SLAVE sync: receiving %lld bytes from master",
- (long long) server.repl_transfer_size);
+
+ /* There are two possible forms for the bulk payload. One is the
+ * usual $<count> bulk format. The other is used for diskless transfers
+ * when the master does not know beforehand the size of the file to
+ * transfer. In the latter case, the following format is used:
+ *
+ * $EOF:<40 bytes delimiter>
+ *
+ * At the end of the file the announced delimiter is transmitted. The
+ * delimiter is long and random enough that the probability of a
+ * collision with the actual file content can be ignored. */
+ if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= CONFIG_RUN_ID_SIZE) {
+ usemark = 1;
+ memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);
+ memset(lastbytes,0,CONFIG_RUN_ID_SIZE);
+ /* Set any repl_transfer_size to avoid entering this code path
+ * at the next call. */
+ server.repl_transfer_size = 0;
+ serverLog(LL_NOTICE,
+ "MASTER <-> SLAVE sync: receiving streamed RDB from master");
+ } else {
+ usemark = 0;
+ server.repl_transfer_size = strtol(buf+1,NULL,10);
+ serverLog(LL_NOTICE,
+ "MASTER <-> SLAVE sync: receiving %lld bytes from master",
+ (long long) server.repl_transfer_size);
+ }
return;
}
/* Read bulk data */
- left = server.repl_transfer_size - server.repl_transfer_read;
- readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
+ if (usemark) {
+ readlen = sizeof(buf);
+ } else {
+ left = server.repl_transfer_size - server.repl_transfer_read;
+ readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
+ }
+
nread = read(fd,buf,readlen);
if (nread <= 0) {
- redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
+ serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
(nread == -1) ? strerror(errno) : "connection lost");
- replicationAbortSyncTransfer();
+ cancelReplicationHandshake();
return;
}
+ server.stat_net_input_bytes += nread;
+
+ /* When a mark is used, we want to detect EOF asap in order to avoid
+ * writing the EOF mark into the file... */
+ int eof_reached = 0;
+
+ if (usemark) {
+ /* Update the last bytes array, and check if it matches our delimiter.*/
+ if (nread >= CONFIG_RUN_ID_SIZE) {
+ memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE);
+ } else {
+ int rem = CONFIG_RUN_ID_SIZE-nread;
+ memmove(lastbytes,lastbytes+nread,rem);
+ memcpy(lastbytes+rem,buf,nread);
+ }
+ if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1;
+ }
+
server.repl_transfer_lastio = server.unixtime;
if (write(server.repl_transfer_fd,buf,nread) != nread) {
- redisLog(REDIS_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s", strerror(errno));
goto error;
}
server.repl_transfer_read += nread;
+ /* Delete the last 40 bytes from the file if we reached EOF. */
+ if (usemark && eof_reached) {
+ if (ftruncate(server.repl_transfer_fd,
+ server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1)
+ {
+ serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno));
+ goto error;
+ }
+ }
+
/* Sync data on disk from time to time, otherwise at the end of the transfer
* we may suffer a big delay as the memory buffers are copied into the
* actual disk. */
@@ -814,63 +1235,70 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
}
/* Check if the transfer is now complete */
- if (server.repl_transfer_read == server.repl_transfer_size) {
+ if (!usemark) {
+ if (server.repl_transfer_read == server.repl_transfer_size)
+ eof_reached = 1;
+ }
+
+ if (eof_reached) {
+ int aof_is_enabled = server.aof_state != AOF_OFF;
+
if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
- redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
- replicationAbortSyncTransfer();
+ serverLog(LL_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ cancelReplicationHandshake();
return;
}
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Flushing old data");
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Flushing old data");
+ /* We need to stop any AOFRW fork before flusing and parsing
+ * RDB, otherwise we'll create a copy-on-write disaster. */
+ if(aof_is_enabled) stopAppendOnly();
signalFlushedDb(-1);
- emptyDb(replicationEmptyDbCallback);
+ emptyDb(
+ -1,
+ server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS,
+ replicationEmptyDbCallback);
/* Before loading the DB into memory we need to delete the readable
* handler, otherwise it will get called recursively since
* rdbLoad() will call the event loop to process events from time to
* time for non blocking loading. */
aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory");
- if (rdbLoad(server.rdb_filename) != REDIS_OK) {
- redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
- replicationAbortSyncTransfer();
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory");
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ if (rdbLoad(server.rdb_filename,&rsi) != C_OK) {
+ serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
+ cancelReplicationHandshake();
+ /* Re-enable the AOF if we disabled it earlier, in order to restore
+ * the original configuration. */
+ if (aof_is_enabled) restartAOF();
return;
}
/* Final setup of the connected slave <- master link */
zfree(server.repl_transfer_tmpfile);
close(server.repl_transfer_fd);
- server.master = createClient(server.repl_transfer_s);
- server.master->flags |= REDIS_MASTER;
- server.master->authenticated = 1;
- server.repl_state = REDIS_REPL_CONNECTED;
- server.master->reploff = server.repl_master_initial_offset;
- memcpy(server.master->replrunid, server.repl_master_runid,
- sizeof(server.repl_master_runid));
- /* If master offset is set to -1, this master is old and is not
- * PSYNC capable, so we flag it accordingly. */
- if (server.master->reploff == -1)
- server.master->flags |= REDIS_PRE_PSYNC;
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
+ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
+ server.repl_state = REPL_STATE_CONNECTED;
+ /* After a full resynchroniziation we use the replication ID and
+ * offset of the master. The secondary ID / offset are cleared since
+ * we are starting a new history. */
+ memcpy(server.replid,server.master->replid,sizeof(server.replid));
+ server.master_repl_offset = server.master->reploff;
+ clearReplicationId2();
+ /* Let's create the replication backlog if needed. Slaves need to
+ * accumulate the backlog regardless of the fact they have sub-slaves
+ * or not, in order to behave correctly if they are promoted to
+ * masters after a failover. */
+ if (server.repl_backlog == NULL) createReplicationBacklog();
+
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
/* Restart the AOF subsystem now that we finished the sync. This
* will trigger an AOF rewrite, and when done will start appending
* to the new file. */
- if (server.aof_state != REDIS_AOF_OFF) {
- int retry = 10;
-
- stopAppendOnly();
- while (retry-- && startAppendOnly() == REDIS_ERR) {
- redisLog(REDIS_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second.");
- sleep(1);
- }
- if (!retry) {
- redisLog(REDIS_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.");
- exit(1);
- }
- }
+ if (aof_is_enabled) restartAOF();
}
-
return;
error:
- replicationAbortSyncTransfer();
+ cancelReplicationHandshake();
return;
}
@@ -880,38 +1308,54 @@ error:
* The command returns an sds string representing the result of the
* operation. On error the first byte is a "-".
*/
-char *sendSynchronousCommand(int fd, ...) {
- va_list ap;
- sds cmd = sdsempty();
- char *arg, buf[256];
+#define SYNC_CMD_READ (1<<0)
+#define SYNC_CMD_WRITE (1<<1)
+#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
+char *sendSynchronousCommand(int flags, int fd, ...) {
/* Create the command to send to the master, we use simple inline
* protocol for simplicity as currently we only send simple strings. */
- va_start(ap,fd);
- while(1) {
- arg = va_arg(ap, char*);
- if (arg == NULL) break;
-
- if (sdslen(cmd) != 0) cmd = sdscatlen(cmd," ",1);
- cmd = sdscat(cmd,arg);
- }
- cmd = sdscatlen(cmd,"\r\n",2);
+ if (flags & SYNC_CMD_WRITE) {
+ char *arg;
+ va_list ap;
+ sds cmd = sdsempty();
+ va_start(ap,fd);
+
+ while(1) {
+ arg = va_arg(ap, char*);
+ if (arg == NULL) break;
+
+ if (sdslen(cmd) != 0) cmd = sdscatlen(cmd," ",1);
+ cmd = sdscat(cmd,arg);
+ }
+ cmd = sdscatlen(cmd,"\r\n",2);
- /* Transfer command to the server. */
- if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000) == -1) {
+ /* Transfer command to the server. */
+ if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
+ == -1)
+ {
+ sdsfree(cmd);
+ return sdscatprintf(sdsempty(),"-Writing to master: %s",
+ strerror(errno));
+ }
sdsfree(cmd);
- return sdscatprintf(sdsempty(),"-Writing to master: %s",
- strerror(errno));
+ va_end(ap);
}
- sdsfree(cmd);
/* Read the reply from the server. */
- if (syncReadLine(fd,buf,sizeof(buf),server.repl_syncio_timeout*1000) == -1)
- {
- return sdscatprintf(sdsempty(),"-Reading from master: %s",
- strerror(errno));
+ if (flags & SYNC_CMD_READ) {
+ char buf[256];
+
+ if (syncReadLine(fd,buf,sizeof(buf),server.repl_syncio_timeout*1000)
+ == -1)
+ {
+ return sdscatprintf(sdsempty(),"-Reading from master: %s",
+ strerror(errno));
+ }
+ server.repl_transfer_lastio = server.unixtime;
+ return sdsnew(buf);
}
- return sdsnew(buf);
+ return NULL;
}
/* Try a partial resynchronization with the master if we are about to reconnect.
@@ -928,6 +1372,19 @@ char *sendSynchronousCommand(int fd, ...) {
* of successful partial resynchronization, the function will reuse
* 'fd' as file descriptor of the server.master client structure.
*
+ * The function is split in two halves: if read_reply is 0, the function
+ * writes the PSYNC command on the socket, and a new function call is
+ * needed, with read_reply set to 1, in order to read the reply of the
+ * command. This is useful in order to support non blocking operations, so
+ * that we write, return into the event loop, and read when there are data.
+ *
+ * When read_reply is 0 the function returns PSYNC_WRITE_ERR if there
+ * was a write error, or PSYNC_WAIT_REPLY to signal we need another call
+ * with read_reply set to 1. However even when read_reply is set to 1
+ * the function may return PSYNC_WAIT_REPLY again to signal there were
+ * insufficient data to read to complete its work. We should re-enter
+ * into the event loop and wait in such a case.
+ *
* The function returns:
*
* PSYNC_CONTINUE: If the PSYNC command succeded and we can continue.
@@ -936,62 +1393,97 @@ char *sendSynchronousCommand(int fd, ...) {
* offset is saved.
* PSYNC_NOT_SUPPORTED: If the server does not understand PSYNC at all and
* the caller should fall back to SYNC.
+ * PSYNC_WRITE_ERROR: There was an error writing the command to the socket.
+ * PSYNC_WAIT_REPLY: Call again the function with read_reply set to 1.
+ * PSYNC_TRY_LATER: Master is currently in a transient error condition.
+ *
+ * Notable side effects:
+ *
+ * 1) As a side effect of the function call the function removes the readable
+ * event handler from "fd", unless the return value is PSYNC_WAIT_REPLY.
+ * 2) server.master_initial_offset is set to the right value according
+ * to the master reply. This will be used to populate the 'server.master'
+ * structure replication offset.
*/
-#define PSYNC_CONTINUE 0
-#define PSYNC_FULLRESYNC 1
-#define PSYNC_NOT_SUPPORTED 2
-int slaveTryPartialResynchronization(int fd) {
- char *psync_runid;
+#define PSYNC_WRITE_ERROR 0
+#define PSYNC_WAIT_REPLY 1
+#define PSYNC_CONTINUE 2
+#define PSYNC_FULLRESYNC 3
+#define PSYNC_NOT_SUPPORTED 4
+#define PSYNC_TRY_LATER 5
+int slaveTryPartialResynchronization(int fd, int read_reply) {
+ char *psync_replid;
char psync_offset[32];
sds reply;
- /* Initially set repl_master_initial_offset to -1 to mark the current
- * master run_id and offset as not valid. Later if we'll be able to do
- * a FULL resync using the PSYNC command we'll set the offset at the
- * right value, so that this information will be propagated to the
- * client structure representing the master into server.master. */
- server.repl_master_initial_offset = -1;
-
- if (server.cached_master) {
- psync_runid = server.cached_master->replrunid;
- snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
- redisLog(REDIS_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_runid, psync_offset);
- } else {
- redisLog(REDIS_NOTICE,"Partial resynchronization not possible (no cached master)");
- psync_runid = "?";
- memcpy(psync_offset,"-1",3);
+ /* Writing half */
+ if (!read_reply) {
+ /* Initially set master_initial_offset to -1 to mark the current
+ * master run_id and offset as not valid. Later if we'll be able to do
+ * a FULL resync using the PSYNC command we'll set the offset at the
+ * right value, so that this information will be propagated to the
+ * client structure representing the master into server.master. */
+ server.master_initial_offset = -1;
+
+ if (server.cached_master) {
+ psync_replid = server.cached_master->replid;
+ snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
+ serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)");
+ psync_replid = "?";
+ memcpy(psync_offset,"-1",3);
+ }
+
+ /* Issue the PSYNC command */
+ reply = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PSYNC",psync_replid,psync_offset,NULL);
+ if (reply != NULL) {
+ serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
+ sdsfree(reply);
+ aeDeleteFileEvent(server.el,fd,AE_READABLE);
+ return PSYNC_WRITE_ERROR;
+ }
+ return PSYNC_WAIT_REPLY;
}
- /* Issue the PSYNC command */
- reply = sendSynchronousCommand(fd,"PSYNC",psync_runid,psync_offset,NULL);
+ /* Reading half */
+ reply = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ if (sdslen(reply) == 0) {
+ /* The master may send empty newlines after it receives PSYNC
+ * and before to reply, just to keep the connection alive. */
+ sdsfree(reply);
+ return PSYNC_WAIT_REPLY;
+ }
+
+ aeDeleteFileEvent(server.el,fd,AE_READABLE);
if (!strncmp(reply,"+FULLRESYNC",11)) {
- char *runid = NULL, *offset = NULL;
+ char *replid = NULL, *offset = NULL;
/* FULL RESYNC, parse the reply in order to extract the run id
* and the replication offset. */
- runid = strchr(reply,' ');
- if (runid) {
- runid++;
- offset = strchr(runid,' ');
+ replid = strchr(reply,' ');
+ if (replid) {
+ replid++;
+ offset = strchr(replid,' ');
if (offset) offset++;
}
- if (!runid || !offset || (offset-runid-1) != REDIS_RUN_ID_SIZE) {
- redisLog(REDIS_WARNING,
+ if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) {
+ serverLog(LL_WARNING,
"Master replied with wrong +FULLRESYNC syntax.");
/* This is an unexpected condition, actually the +FULLRESYNC
* reply means that the master supports PSYNC, but the reply
* format seems wrong. To stay safe we blank the master
- * runid to make sure next PSYNCs will fail. */
- memset(server.repl_master_runid,0,REDIS_RUN_ID_SIZE+1);
+ * replid to make sure next PSYNCs will fail. */
+ memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1);
} else {
- memcpy(server.repl_master_runid, runid, offset-runid-1);
- server.repl_master_runid[REDIS_RUN_ID_SIZE] = '\0';
- server.repl_master_initial_offset = strtoll(offset,NULL,10);
- redisLog(REDIS_NOTICE,"Full resync from master: %s:%lld",
- server.repl_master_runid,
- server.repl_master_initial_offset);
+ memcpy(server.master_replid, replid, offset-replid-1);
+ server.master_replid[CONFIG_RUN_ID_SIZE] = '\0';
+ server.master_initial_offset = strtoll(offset,NULL,10);
+ serverLog(LL_NOTICE,"Full resync from master: %s:%lld",
+ server.master_replid,
+ server.master_initial_offset);
}
/* We are going to full resync, discard the cached master structure. */
replicationDiscardCachedMaster();
@@ -1000,24 +1492,71 @@ int slaveTryPartialResynchronization(int fd) {
}
if (!strncmp(reply,"+CONTINUE",9)) {
- /* Partial resync was accepted, set the replication state accordingly */
- redisLog(REDIS_NOTICE,
+ /* Partial resync was accepted. */
+ serverLog(LL_NOTICE,
"Successful partial resynchronization with master.");
+
+ /* Check the new replication ID advertised by the master. If it
+ * changed, we need to set the new ID as primary ID, and set or
+ * secondary ID as the old master ID up to the current offset, so
+ * that our sub-slaves will be able to PSYNC with us after a
+ * disconnection. */
+ char *start = reply+10;
+ char *end = reply+9;
+ while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
+ if (end-start == CONFIG_RUN_ID_SIZE) {
+ char new[CONFIG_RUN_ID_SIZE+1];
+ memcpy(new,start,CONFIG_RUN_ID_SIZE);
+ new[CONFIG_RUN_ID_SIZE] = '\0';
+
+ if (strcmp(new,server.cached_master->replid)) {
+ /* Master ID changed. */
+ serverLog(LL_WARNING,"Master replication ID changed to %s",new);
+
+ /* Set the old ID as our ID2, up to the current offset+1. */
+ memcpy(server.replid2,server.cached_master->replid,
+ sizeof(server.replid2));
+ server.second_replid_offset = server.master_repl_offset+1;
+
+ /* Update the cached master ID and our own primary ID to the
+ * new one. */
+ memcpy(server.replid,new,sizeof(server.replid));
+ memcpy(server.cached_master->replid,new,sizeof(server.replid));
+
+ /* Disconnect all the sub-slaves: they need to be notified. */
+ disconnectSlaves();
+ }
+ }
+
+ /* Setup the replication to continue. */
sdsfree(reply);
replicationResurrectCachedMaster(fd);
return PSYNC_CONTINUE;
}
- /* If we reach this point we receied either an error since the master does
- * not understand PSYNC, or an unexpected reply from the master.
- * Return PSYNC_NOT_SUPPORTED to the caller in both cases. */
+ /* If we reach this point we received either an error (since the master does
+ * not understand PSYNC or because it is in a special state and cannot
+ * serve our request), or an unexpected reply from the master.
+ *
+ * Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise
+ * return PSYNC_TRY_LATER if we believe this is a transient error. */
+
+ if (!strncmp(reply,"-NOMASTERLINK",13) ||
+ !strncmp(reply,"-LOADING",8))
+ {
+ serverLog(LL_NOTICE,
+ "Master is currently unable to PSYNC "
+ "but should be in the future: %s", reply);
+ sdsfree(reply);
+ return PSYNC_TRY_LATER;
+ }
if (strncmp(reply,"-ERR",4)) {
/* If it's not an error, log the unexpected event. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unexpected reply to PSYNC from master: %s", reply);
} else {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Master does not support PSYNC or is in "
"error state (reply: %s)", reply);
}
@@ -1026,108 +1565,180 @@ int slaveTryPartialResynchronization(int fd) {
return PSYNC_NOT_SUPPORTED;
}
+/* This handler fires when the non blocking connect was able to
+ * establish a connection with the master. */
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
- char tmpfile[256], *err;
- int dfd, maxtries = 5;
+ char tmpfile[256], *err = NULL;
+ int dfd = -1, maxtries = 5;
int sockerr = 0, psync_result;
socklen_t errlen = sizeof(sockerr);
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
/* If this event fired after the user turned the instance into a master
* with SLAVEOF NO ONE we must just return ASAP. */
- if (server.repl_state == REDIS_REPL_NONE) {
+ if (server.repl_state == REPL_STATE_NONE) {
close(fd);
return;
}
- /* Check for errors in the socket. */
+ /* Check for errors in the socket: after a non blocking connect() we
+ * may find that the socket is in error state. */
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
sockerr = errno;
if (sockerr) {
- aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
- redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s",
+ serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
strerror(sockerr));
goto error;
}
- /* If we were connecting, it's time to send a non blocking PING, we want to
- * make sure the master is able to reply before going into the actual
- * replication process where we have long timeouts in the order of
- * seconds (in the meantime the slave would block). */
- if (server.repl_state == REDIS_REPL_CONNECTING) {
- redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event.");
+ /* Send a PING to check the master is able to reply without errors. */
+ if (server.repl_state == REPL_STATE_CONNECTING) {
+ serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
- server.repl_state = REDIS_REPL_RECEIVE_PONG;
+ server.repl_state = REPL_STATE_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
- syncWrite(fd,"PING\r\n",6,100);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PING",NULL);
+ if (err) goto write_error;
return;
}
/* Receive the PONG command. */
- if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
- char buf[1024];
-
- /* Delete the readable event, we no longer need it now that there is
- * the PING reply to read. */
- aeDeleteFileEvent(server.el,fd,AE_READABLE);
-
- /* Read the reply with explicit timeout. */
- buf[0] = '\0';
- if (syncReadLine(fd,buf,sizeof(buf),
- server.repl_syncio_timeout*1000) == -1)
- {
- redisLog(REDIS_WARNING,
- "I/O error reading PING reply from master: %s",
- strerror(errno));
- goto error;
- }
+ if (server.repl_state == REPL_STATE_RECEIVE_PONG) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
* Note that older versions of Redis replied with "operation not
* permitted" instead of using a proper error code, so we test
* both. */
- if (buf[0] != '+' &&
- strncmp(buf,"-NOAUTH",7) != 0 &&
- strncmp(buf,"-ERR operation not permitted",28) != 0)
+ if (err[0] != '+' &&
+ strncmp(err,"-NOAUTH",7) != 0 &&
+ strncmp(err,"-ERR operation not permitted",28) != 0)
{
- redisLog(REDIS_WARNING,"Error reply to PING from master: '%s'",buf);
+ serverLog(LL_WARNING,"Error reply to PING from master: '%s'",err);
+ sdsfree(err);
goto error;
} else {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Master replied to PING, replication can continue...");
}
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_AUTH;
}
/* AUTH with the master if required. */
- if(server.masterauth) {
- err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL);
+ if (server.repl_state == REPL_STATE_SEND_AUTH) {
+ if (server.masterauth) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",server.masterauth,NULL);
+ if (err) goto write_error;
+ server.repl_state = REPL_STATE_RECEIVE_AUTH;
+ return;
+ } else {
+ server.repl_state = REPL_STATE_SEND_PORT;
+ }
+ }
+
+ /* Receive AUTH reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
if (err[0] == '-') {
- redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err);
+ serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_PORT;
}
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
- {
- sds port = sdsfromlonglong(server.port);
- err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port,
- NULL);
+ if (server.repl_state == REPL_STATE_SEND_PORT) {
+ sds port = sdsfromlonglong(server.slave_announce_port ?
+ server.slave_announce_port : server.port);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "listening-port",port, NULL);
sdsfree(port);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_PORT;
+ return;
+ }
+
+ /* Receive REPLCONF listening-port reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_PORT) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
- redisLog(REDIS_NOTICE,"(Non critical) Master does not understand REPLCONF listening-port: %s", err);
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF listening-port: %s", err);
}
sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_IP;
+ }
+
+ /* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
+ if (server.repl_state == REPL_STATE_SEND_IP &&
+ server.slave_announce_ip == NULL)
+ {
+ server.repl_state = REPL_STATE_SEND_CAPA;
+ }
+
+ /* Set the slave ip, so that Master's INFO command can list the
+ * slave IP address port correctly in case of port forwarding or NAT. */
+ if (server.repl_state == REPL_STATE_SEND_IP) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "ip-address",server.slave_announce_ip, NULL);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_IP;
+ return;
+ }
+
+ /* Receive REPLCONF ip-address reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_IP) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF listening-port. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF ip-address: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_CAPA;
+ }
+
+ /* Inform the master of our (slave) capabilities.
+ *
+ * EOF: supports EOF-style RDB transfer for diskless replication.
+ * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
+ *
+ * The master will ignore capabilities it does not understand. */
+ if (server.repl_state == REPL_STATE_SEND_CAPA) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "capa","eof","capa","psync2",NULL);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_CAPA;
+ return;
+ }
+
+ /* Receive CAPA reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_CAPA) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF capa. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF capa: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_PSYNC;
}
/* Try a partial resynchonization. If we don't have a cached master
@@ -1135,19 +1746,54 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
* to start a full resynchronization so that we get the master run id
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
- psync_result = slaveTryPartialResynchronization(fd);
+ if (server.repl_state == REPL_STATE_SEND_PSYNC) {
+ if (slaveTryPartialResynchronization(fd,0) == PSYNC_WRITE_ERROR) {
+ err = sdsnew("Write error sending the PSYNC command.");
+ goto write_error;
+ }
+ server.repl_state = REPL_STATE_RECEIVE_PSYNC;
+ return;
+ }
+
+ /* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC. */
+ if (server.repl_state != REPL_STATE_RECEIVE_PSYNC) {
+ serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
+ "state should be RECEIVE_PSYNC but is %d",
+ server.repl_state);
+ goto error;
+ }
+
+ psync_result = slaveTryPartialResynchronization(fd,1);
+ if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
+
+ /* If the master is in an transient error, we should try to PSYNC
+ * from scratch later, so go to the error path. This happens when
+ * the server is loading the dataset or is not connected with its
+ * master and so forth. */
+ if (psync_result == PSYNC_TRY_LATER) goto error;
+
+ /* Note: if PSYNC does not return WAIT_REPLY, it will take care of
+ * uninstalling the read handler from the file descriptor. */
+
if (psync_result == PSYNC_CONTINUE) {
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
return;
}
+ /* PSYNC failed or is not supported: we want our slaves to resync with us
+ * as well, if we have any sub-slaves. The master may transfer us an
+ * entirely different data set and we have no way to incrementally feed
+ * our slaves after that. */
+ disconnectSlaves(); /* Force our slaves to resync with us as well. */
+ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
+
/* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
- * and the server.repl_master_runid and repl_master_initial_offset are
+ * and the server.master_replid and master_initial_offset are
* already populated. */
if (psync_result == PSYNC_NOT_SUPPORTED) {
- redisLog(REDIS_NOTICE,"Retrying with SYNC...");
+ serverLog(LL_NOTICE,"Retrying with SYNC...");
if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
- redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
+ serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
}
@@ -1162,7 +1808,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
sleep(1);
}
if (dfd == -1) {
- redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
+ serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
goto error;
}
@@ -1170,13 +1816,13 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
== AE_ERR)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Can't create readable event for SYNC: %s (fd=%d)",
strerror(errno),fd);
goto error;
}
- server.repl_state = REDIS_REPL_TRANSFER;
+ server.repl_state = REPL_STATE_TRANSFER;
server.repl_transfer_size = -1;
server.repl_transfer_read = 0;
server.repl_transfer_last_fsync_off = 0;
@@ -1186,47 +1832,65 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
error:
+ aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
+ if (dfd != -1) close(dfd);
close(fd);
server.repl_transfer_s = -1;
- server.repl_state = REDIS_REPL_CONNECT;
+ server.repl_state = REPL_STATE_CONNECT;
return;
+
+write_error: /* Handle sendSynchronousCommand(SYNC_CMD_WRITE) errors. */
+ serverLog(LL_WARNING,"Sending command to master in replication handshake: %s", err);
+ sdsfree(err);
+ goto error;
}
int connectWithMaster(void) {
int fd;
- fd = anetTcpNonBlockConnect(NULL,server.masterhost,server.masterport);
+ fd = anetTcpNonBlockBestEffortBindConnect(NULL,
+ server.masterhost,server.masterport,NET_FIRST_BIND_ADDR);
if (fd == -1) {
- redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
+ serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
AE_ERR)
{
close(fd);
- redisLog(REDIS_WARNING,"Can't create readable event for SYNC");
- return REDIS_ERR;
+ serverLog(LL_WARNING,"Can't create readable event for SYNC");
+ return C_ERR;
}
server.repl_transfer_lastio = server.unixtime;
server.repl_transfer_s = fd;
- server.repl_state = REDIS_REPL_CONNECTING;
- return REDIS_OK;
+ server.repl_state = REPL_STATE_CONNECTING;
+ return C_OK;
}
/* This function can be called when a non blocking connection is currently
- * in progress to undo it. */
+ * in progress to undo it.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
void undoConnectWithMaster(void) {
int fd = server.repl_transfer_s;
- redisAssert(server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG);
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
close(fd);
server.repl_transfer_s = -1;
- server.repl_state = REDIS_REPL_CONNECT;
+}
+
+/* Abort the async download of the bulk dataset while SYNC-ing with master.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
+void replicationAbortSyncTransfer(void) {
+ serverAssert(server.repl_state == REPL_STATE_TRANSFER);
+ undoConnectWithMaster();
+ close(server.repl_transfer_fd);
+ unlink(server.repl_transfer_tmpfile);
+ zfree(server.repl_transfer_tmpfile);
}
/* This function aborts a non blocking replication attempt if there is one
@@ -1234,16 +1898,18 @@ void undoConnectWithMaster(void) {
* the initial bulk transfer.
*
* If there was a replication handshake in progress 1 is returned and
- * the replication state (server.repl_state) set to REDIS_REPL_CONNECT.
+ * the replication state (server.repl_state) set to REPL_STATE_CONNECT.
*
* Otherwise zero is returned and no operation is perforemd at all. */
int cancelReplicationHandshake(void) {
- if (server.repl_state == REDIS_REPL_TRANSFER) {
+ if (server.repl_state == REPL_STATE_TRANSFER) {
replicationAbortSyncTransfer();
- } else if (server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG)
+ server.repl_state = REPL_STATE_CONNECT;
+ } else if (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState())
{
undoConnectWithMaster();
+ server.repl_state = REPL_STATE_CONNECT;
} else {
return 0;
}
@@ -1252,16 +1918,24 @@ int cancelReplicationHandshake(void) {
/* Set replication to the specified master address and port. */
void replicationSetMaster(char *ip, int port) {
+ int was_master = server.masterhost == NULL;
+
sdsfree(server.masterhost);
server.masterhost = sdsnew(ip);
server.masterport = port;
- if (server.master) freeClient(server.master);
- disconnectSlaves(); /* Force our slaves to resync with us as well. */
- replicationDiscardCachedMaster(); /* Don't try a PSYNC. */
- freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
+ if (server.master) {
+ freeClient(server.master);
+ }
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
+
+ /* Force our slaves to resync with us as well. They may hopefully be able
+ * to partially resync with us, but we can notify the replid change. */
+ disconnectSlaves();
cancelReplicationHandshake();
- server.repl_state = REDIS_REPL_CONNECT;
- server.master_repl_offset = 0;
+ /* Before destroying our master state, create a cached master using
+ * our own parameters, to later PSYNC with the new master. */
+ if (was_master) replicationCacheMasterUsingMyself();
+ server.repl_state = REPL_STATE_CONNECT;
server.repl_down_since = 0;
}
@@ -1270,23 +1944,40 @@ void replicationUnsetMaster(void) {
if (server.masterhost == NULL) return; /* Nothing to do. */
sdsfree(server.masterhost);
server.masterhost = NULL;
- if (server.master) {
- if (listLength(server.slaves) == 0) {
- /* If this instance is turned into a master and there are no
- * slaves, it inherits the replication offset from the master.
- * Under certain conditions this makes replicas comparable by
- * replication offset to understand what is the most updated. */
- server.master_repl_offset = server.master->reploff;
- freeReplicationBacklog();
- }
- freeClient(server.master);
- }
+ /* When a slave is turned into a master, the current replication ID
+ * (that was inherited from the master at synchronization time) is
+ * used as secondary ID up to the current offset, and a new replication
+ * ID is created to continue with a new replication history. */
+ shiftReplicationId();
+ if (server.master) freeClient(server.master);
replicationDiscardCachedMaster();
cancelReplicationHandshake();
- server.repl_state = REDIS_REPL_NONE;
+ /* Disconnecting all the slaves is required: we need to inform slaves
+ * of the replication ID change (see shiftReplicationId() call). However
+ * the slaves will be able to partially resync with us, so it will be
+ * a very fast reconnection. */
+ disconnectSlaves();
+ server.repl_state = REPL_STATE_NONE;
+
+ /* We need to make sure the new master will start the replication stream
+ * with a SELECT statement. This is forced after a full resync, but
+ * with PSYNC version 2, there is no need for full resync after a
+ * master switch. */
+ server.slaveseldb = -1;
+}
+
+/* This function is called when the slave lose the connection with the
+ * master into an unexpected way. */
+void replicationHandleMasterDisconnection(void) {
+ server.master = NULL;
+ server.repl_state = REPL_STATE_CONNECT;
+ server.repl_down_since = server.unixtime;
+ /* We lost connection with our master, don't disconnect slaves yet,
+ * maybe we'll be able to PSYNC with our master later. We'll disconnect
+ * the slaves only if we'll have to do a full resync with our master. */
}
-void slaveofCommand(redisClient *c) {
+void slaveofCommand(client *c) {
/* SLAVEOF is not allowed in cluster mode as replication is automatically
* configured using the current address of the master node. */
if (server.cluster_enabled) {
@@ -1300,26 +1991,31 @@ void slaveofCommand(redisClient *c) {
!strcasecmp(c->argv[2]->ptr,"one")) {
if (server.masterhost) {
replicationUnsetMaster();
- redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')",
+ client);
+ sdsfree(client);
}
} else {
long port;
- if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != C_OK))
return;
/* Check if we are already attached to the specified slave */
if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr)
&& server.masterport == port) {
- redisLog(REDIS_NOTICE,"SLAVE OF would result into synchronization with the master we are already connected with. No operation performed.");
+ serverLog(LL_NOTICE,"SLAVE OF would result into synchronization with the master we are already connected with. No operation performed.");
addReplySds(c,sdsnew("+OK Already connected to specified master\r\n"));
return;
}
/* There was no previous master or the user specified a different one,
* we can continue. */
replicationSetMaster(c->argv[1]->ptr, port);
- redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
- server.masterhost, server.masterport);
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"SLAVE OF %s:%d enabled (user request from '%s')",
+ server.masterhost, server.masterport, client);
+ sdsfree(client);
}
addReply(c,shared.ok);
}
@@ -1327,7 +2023,7 @@ void slaveofCommand(redisClient *c) {
/* ROLE command: provide information about the role of the instance
* (master or slave) and additional information related to replication
* in an easy to process format. */
-void roleCommand(redisClient *c) {
+void roleCommand(client *c) {
if (server.masterhost == NULL) {
listIter li;
listNode *ln;
@@ -1340,13 +2036,17 @@ void roleCommand(redisClient *c) {
mbcount = addDeferredMultiBulkLength(c);
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
- char ip[REDIS_IP_STR_LEN];
+ client *slave = ln->value;
+ char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
- if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1) continue;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
+ if (slaveip[0] == '\0') {
+ if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1)
+ continue;
+ slaveip = ip;
+ }
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
addReplyMultiBulkLen(c,3);
- addReplyBulkCString(c,ip);
+ addReplyBulkCString(c,slaveip);
addReplyBulkLongLong(c,slave->slave_listening_port);
addReplyBulkLongLong(c,slave->repl_ack_off);
slaves++;
@@ -1359,14 +2059,17 @@ void roleCommand(redisClient *c) {
addReplyBulkCBuffer(c,"slave",5);
addReplyBulkCString(c,server.masterhost);
addReplyLongLong(c,server.masterport);
- switch(server.repl_state) {
- case REDIS_REPL_NONE: slavestate = "none"; break;
- case REDIS_REPL_CONNECT: slavestate = "connect"; break;
- case REDIS_REPL_CONNECTING: slavestate = "connecting"; break;
- case REDIS_REPL_RECEIVE_PONG: /* see next */
- case REDIS_REPL_TRANSFER: slavestate = "sync"; break;
- case REDIS_REPL_CONNECTED: slavestate = "connected"; break;
- default: slavestate = "unknown"; break;
+ if (slaveIsInHandshakeState()) {
+ slavestate = "handshake";
+ } else {
+ switch(server.repl_state) {
+ case REPL_STATE_NONE: slavestate = "none"; break;
+ case REPL_STATE_CONNECT: slavestate = "connect"; break;
+ case REPL_STATE_CONNECTING: slavestate = "connecting"; break;
+ case REPL_STATE_TRANSFER: slavestate = "sync"; break;
+ case REPL_STATE_CONNECTED: slavestate = "connected"; break;
+ default: slavestate = "unknown"; break;
+ }
}
addReplyBulkCString(c,slavestate);
addReplyLongLong(c,server.master ? server.master->reploff : -1);
@@ -1377,15 +2080,15 @@ void roleCommand(redisClient *c) {
* processed offset. If we are not connected with a master, the command has
* no effects. */
void replicationSendAck(void) {
- redisClient *c = server.master;
+ client *c = server.master;
if (c != NULL) {
- c->flags |= REDIS_MASTER_FORCE_REPLY;
+ c->flags |= CLIENT_MASTER_FORCE_REPLY;
addReplyMultiBulkLen(c,3);
addReplyBulkCString(c,"REPLCONF");
addReplyBulkCString(c,"ACK");
addReplyBulkLongLong(c,c->reploff);
- c->flags &= ~REDIS_MASTER_FORCE_REPLY;
+ c->flags &= ~CLIENT_MASTER_FORCE_REPLY;
}
}
@@ -1409,31 +2112,29 @@ void replicationSendAck(void) {
* replicationResurrectCachedMaster() that is used after a successful PSYNC
* handshake in order to reactivate the cached master.
*/
-void replicationCacheMaster(redisClient *c) {
- listNode *ln;
-
- redisAssert(server.master != NULL && server.cached_master == NULL);
- redisLog(REDIS_NOTICE,"Caching the disconnected master state.");
-
- /* Remove from the list of clients, we don't want this client to be
- * listed by CLIENT LIST or processed in any way by batch operations. */
- ln = listSearchKey(server.clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.clients,ln);
+void replicationCacheMaster(client *c) {
+ serverAssert(server.master != NULL && server.cached_master == NULL);
+ serverLog(LL_NOTICE,"Caching the disconnected master state.");
+
+ /* Unlink the client from the server structures. */
+ unlinkClient(c);
+
+ /* Reset the master client so that's ready to accept new commands:
+ * we want to discard te non processed query buffers and non processed
+ * offsets, including pending transactions, already populated arguments,
+ * pending outputs to the master. */
+ sdsclear(server.master->querybuf);
+ sdsclear(server.master->pending_querybuf);
+ server.master->read_reploff = server.master->reploff;
+ if (c->flags & CLIENT_MULTI) discardTransaction(c);
+ listEmpty(c->reply);
+ c->bufpos = 0;
+ resetClient(c);
/* Save the master. Server.master will be set to null later by
* replicationHandleMasterDisconnection(). */
server.cached_master = server.master;
- /* Remove the event handlers and close the socket. We'll later reuse
- * the socket of the new connection with the master during PSYNC. */
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- close(c->fd);
-
- /* Set fd to -1 so that we can safely call freeClient(c) later. */
- c->fd = -1;
-
/* Invalidate the Peer ID cache. */
if (c->peerid) {
sdsfree(c->peerid);
@@ -1446,13 +2147,38 @@ void replicationCacheMaster(redisClient *c) {
replicationHandleMasterDisconnection();
}
+/* This function is called when a master is turend into a slave, in order to
+ * create from scratch a cached master for the new client, that will allow
+ * to PSYNC with the slave that was promoted as the new master after a
+ * failover.
+ *
+ * Assuming this instance was previously the master instance of the new master,
+ * the new master will accept its replication ID, and potentiall also the
+ * current offset if no data was lost during the failover. So we use our
+ * current replication ID and offset in order to synthesize a cached master. */
+void replicationCacheMasterUsingMyself(void) {
+ /* The master client we create can be set to any DBID, because
+ * the new master will start its replication stream with SELECT. */
+ server.master_initial_offset = server.master_repl_offset;
+ replicationCreateMasterClient(-1,-1);
+
+ /* Use our own ID / offset. */
+ memcpy(server.master->replid, server.replid, sizeof(server.replid));
+
+ /* Set as cached master. */
+ unlinkClient(server.master);
+ server.cached_master = server.master;
+ server.master = NULL;
+ serverLog(LL_NOTICE,"Before turning into a slave, using my master parameters to synthesize a cached master: I may be able to synchronize with the new master with just a partial transfer.");
+}
+
/* Free a cached master, called when there are no longer the conditions for
* a partial resync on reconnection. */
void replicationDiscardCachedMaster(void) {
if (server.cached_master == NULL) return;
- redisLog(REDIS_NOTICE,"Discarding previously cached master state.");
- server.cached_master->flags &= ~REDIS_MASTER;
+ serverLog(LL_NOTICE,"Discarding previously cached master state.");
+ server.cached_master->flags &= ~CLIENT_MASTER;
freeClient(server.cached_master);
server.cached_master = NULL;
}
@@ -1460,32 +2186,32 @@ void replicationDiscardCachedMaster(void) {
/* Turn the cached master into the current master, using the file descriptor
* passed as argument as the socket for the new master.
*
- * This funciton is called when successfully setup a partial resynchronization
+ * This function is called when successfully setup a partial resynchronization
* so the stream of data that we'll receive will start from were this
* master left. */
void replicationResurrectCachedMaster(int newfd) {
server.master = server.cached_master;
server.cached_master = NULL;
server.master->fd = newfd;
- server.master->flags &= ~(REDIS_CLOSE_AFTER_REPLY|REDIS_CLOSE_ASAP);
+ server.master->flags &= ~(CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP);
server.master->authenticated = 1;
server.master->lastinteraction = server.unixtime;
- server.repl_state = REDIS_REPL_CONNECTED;
+ server.repl_state = REPL_STATE_CONNECTED;
/* Re-add to the list of clients. */
listAddNodeTail(server.clients,server.master);
if (aeCreateFileEvent(server.el, newfd, AE_READABLE,
readQueryFromClient, server.master)) {
- redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
/* We may also need to install the write handler as well if there is
* pending data in the write buffers. */
- if (server.master->bufpos || listLength(server.master->reply)) {
+ if (clientHasPendingReplies(server.master)) {
if (aeCreateFileEvent(server.el, newfd, AE_WRITABLE,
sendReplyToClient, server.master)) {
- redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
}
@@ -1506,10 +2232,10 @@ void refreshGoodSlavesCount(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
time_t lag = server.unixtime - slave->repl_ack_time;
- if (slave->replstate == REDIS_REPL_ONLINE &&
+ if (slave->replstate == SLAVE_STATE_ONLINE &&
lag <= server.repl_min_slaves_max_lag) good++;
}
server.repl_good_slaves_count = good;
@@ -1583,14 +2309,14 @@ void replicationScriptCacheAdd(sds sha1) {
sds oldest = listNodeValue(ln);
retval = dictDelete(server.repl_scriptcache_dict,oldest);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
listDelNode(server.repl_scriptcache_fifo,ln);
}
/* Add current. */
retval = dictAdd(server.repl_scriptcache_dict,key,NULL);
listAddNodeHead(server.repl_scriptcache_fifo,key);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
/* Returns non-zero if the specified entry exists inside the cache, that is,
@@ -1642,9 +2368,9 @@ int replicationCountAcksByOffset(long long offset) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
if (slave->repl_ack_off >= offset) count++;
}
return count;
@@ -1652,20 +2378,25 @@ int replicationCountAcksByOffset(long long offset) {
/* WAIT for N replicas to acknowledge the processing of our latest
* write command (and all the previous commands). */
-void waitCommand(redisClient *c) {
+void waitCommand(client *c) {
mstime_t timeout;
long numreplicas, ackreplicas;
long long offset = c->woff;
+ if (server.masterhost) {
+ addReplyError(c,"WAIT cannot be used with slave instances. Please also note that since Redis 4.0 if a slave is configured to be writable (which is not the default) writes to slaves are just local and are not propagated.");
+ return;
+ }
+
/* Argument parsing. */
- if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != C_OK)
return;
if (getTimeoutFromObjectOrReply(c,c->argv[2],&timeout,UNIT_MILLISECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
/* First try without blocking at all. */
ackreplicas = replicationCountAcksByOffset(c->woff);
- if (ackreplicas >= numreplicas || c->flags & REDIS_MULTI) {
+ if (ackreplicas >= numreplicas || c->flags & CLIENT_MULTI) {
addReplyLongLong(c,ackreplicas);
return;
}
@@ -1676,7 +2407,7 @@ void waitCommand(redisClient *c) {
c->bpop.reploffset = offset;
c->bpop.numreplicas = numreplicas;
listAddNodeTail(server.clients_waiting_acks,c);
- blockClient(c,REDIS_BLOCKED_WAIT);
+ blockClient(c,BLOCKED_WAIT);
/* Make sure that the server will send an ACK request to all the slaves
* before returning to the event loop. */
@@ -1687,9 +2418,9 @@ void waitCommand(redisClient *c) {
* specific cleanup. We just remove the client from the list of clients
* waiting for replica acks. Never call it directly, call unblockClient()
* instead. */
-void unblockClientWaitingReplicas(redisClient *c) {
+void unblockClientWaitingReplicas(client *c) {
listNode *ln = listSearchKey(server.clients_waiting_acks,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(server.clients_waiting_acks,ln);
}
@@ -1704,7 +2435,7 @@ void processClientsWaitingReplicas(void) {
listRewind(server.clients_waiting_acks,&li);
while((ln = listNext(&li))) {
- redisClient *c = ln->value;
+ client *c = ln->value;
/* Every time we find a client that is satisfied for a given
* offset and number of replicas, we remember it so the next client
@@ -1750,40 +2481,42 @@ long long replicationGetSlaveOffset(void) {
/* --------------------------- REPLICATION CRON ---------------------------- */
-/* Replication cron funciton, called 1 time per second. */
+/* Replication cron function, called 1 time per second. */
void replicationCron(void) {
+ static long long replication_cron_loops = 0;
+
/* Non blocking connection timeout? */
if (server.masterhost &&
- (server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
- (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
+ (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState()) &&
+ (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");
- undoConnectWithMaster();
+ serverLog(LL_WARNING,"Timeout connecting to the MASTER...");
+ cancelReplicationHandshake();
}
/* Bulk transfer I/O timeout? */
- if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER &&
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
- replicationAbortSyncTransfer();
+ serverLog(LL_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
+ cancelReplicationHandshake();
}
/* Timed out master when we are an already connected slave? */
- if (server.masterhost && server.repl_state == REDIS_REPL_CONNECTED &&
+ if (server.masterhost && server.repl_state == REPL_STATE_CONNECTED &&
(time(NULL)-server.master->lastinteraction) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"MASTER timeout: no data nor PING received...");
+ serverLog(LL_WARNING,"MASTER timeout: no data nor PING received...");
freeClient(server.master);
}
/* Check if we should connect to a MASTER */
- if (server.repl_state == REDIS_REPL_CONNECT) {
- redisLog(REDIS_NOTICE,"Connecting to MASTER %s:%d",
+ if (server.repl_state == REPL_STATE_CONNECT) {
+ serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
server.masterhost, server.masterport);
- if (connectWithMaster() == REDIS_OK) {
- redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync started");
+ if (connectWithMaster() == C_OK) {
+ serverLog(LL_NOTICE,"MASTER <-> SLAVE sync started");
}
}
@@ -1791,36 +2524,53 @@ void replicationCron(void) {
* Note that we do not send periodic acks to masters that don't
* support PSYNC and replication offsets. */
if (server.masterhost && server.master &&
- !(server.master->flags & REDIS_PRE_PSYNC))
+ !(server.master->flags & CLIENT_PRE_PSYNC))
replicationSendAck();
/* If we have attached slaves, PING them from time to time.
* So slaves can implement an explicit timeout to masters, and will
* be able to detect a link disconnection even if the TCP connection
* will not actually go down. */
- if (!(server.cronloops % (server.repl_ping_slave_period * server.hz))) {
- listIter li;
- listNode *ln;
- robj *ping_argv[1];
+ listIter li;
+ listNode *ln;
+ robj *ping_argv[1];
- /* First, send PING */
+ /* First, send PING according to ping_slave_period. */
+ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 &&
+ listLength(server.slaves))
+ {
ping_argv[0] = createStringObject("PING",4);
- replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1);
+ replicationFeedSlaves(server.slaves, server.slaveseldb,
+ ping_argv, 1);
decrRefCount(ping_argv[0]);
+ }
- /* Second, send a newline to all the slaves in pre-synchronization
- * stage, that is, slaves waiting for the master to create the RDB file.
- * The newline will be ignored by the slave but will refresh the
- * last-io timer preventing a timeout. */
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ /* Second, send a newline to all the slaves in pre-synchronization
+ * stage, that is, slaves waiting for the master to create the RDB file.
+ *
+ * Also send the a newline to all the chained slaves we have, if we lost
+ * connection from our master, to keep the slaves aware that their
+ * master is online. This is needed since sub-slaves only receive proxied
+ * data from top-level masters, so there is no explicit pinging in order
+ * to avoid altering the replication offsets. This special out of band
+ * pings (newlines) can be sent, they will have no effect in the offset.
+ *
+ * The newline will be ignored by the slave but will refresh the
+ * last interaction timer preventing a timeout. In this case we ignore the
+ * ping period and refresh the connection once per second since certain
+ * timeouts are set at a few seconds (example: PSYNC response). */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START ||
- slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
- if (write(slave->fd, "\n", 1) == -1) {
- /* Don't worry, it's just a ping. */
- }
+ int is_presync =
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
+
+ if (is_presync) {
+ if (write(slave->fd, "\n", 1) == -1) {
+ /* Don't worry about socket errors, it's just a ping. */
}
}
}
@@ -1832,35 +2582,33 @@ void replicationCron(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
- if (slave->flags & REDIS_PRE_PSYNC) continue;
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
+ if (slave->flags & CLIENT_PRE_PSYNC) continue;
if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout)
{
- char ip[REDIS_IP_STR_LEN];
- int port;
-
- if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) != -1) {
- redisLog(REDIS_WARNING,
- "Disconnecting timedout slave: %s:%d",
- ip, slave->slave_listening_port);
- }
+ serverLog(LL_WARNING, "Disconnecting timedout slave: %s",
+ replicationGetSlaveName(slave));
freeClient(slave);
}
}
}
- /* If we have no attached slaves and there is a replication backlog
- * using memory, free it after some (configured) time. */
+ /* If this is a master without attached slaves and there is a replication
+ * backlog active, in order to reclaim memory we can free it after some
+ * (configured) time. Note that this cannot be done for slaves: slaves
+ * without sub-slaves attached should still accumulate data into the
+ * backlog, in order to reply to PSYNC queries if they are turned into
+ * masters after a failover. */
if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&
- server.repl_backlog)
+ server.repl_backlog && server.masterhost == NULL)
{
time_t idle = server.unixtime - server.repl_no_slaves_since;
if (idle > server.repl_backlog_time_limit) {
freeReplicationBacklog();
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Replication backlog freed after %d seconds "
"without connected slaves.",
(int) server.repl_backlog_time_limit);
@@ -1871,12 +2619,49 @@ void replicationCron(void) {
* free our Replication Script Cache as there is no need to propagate
* EVALSHA at all. */
if (listLength(server.slaves) == 0 &&
- server.aof_state == REDIS_AOF_OFF &&
+ server.aof_state == AOF_OFF &&
listLength(server.repl_scriptcache_fifo) != 0)
{
replicationScriptCacheFlush();
}
+ /* Start a BGSAVE good for replication if we have slaves in
+ * WAIT_BGSAVE_START state.
+ *
+ * In case of diskless replication, we make sure to wait the specified
+ * number of seconds (according to configuration) so that other slaves
+ * have the time to arrive before we start streaming. */
+ if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
+ time_t idle, max_idle = 0;
+ int slaves_waiting = 0;
+ int mincapa = -1;
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ idle = server.unixtime - slave->lastinteraction;
+ if (idle > max_idle) max_idle = idle;
+ slaves_waiting++;
+ mincapa = (mincapa == -1) ? slave->slave_capa :
+ (mincapa & slave->slave_capa);
+ }
+ }
+
+ if (slaves_waiting &&
+ (!server.repl_diskless_sync ||
+ max_idle > server.repl_diskless_sync_delay))
+ {
+ /* Start the BGSAVE. The called function may start a
+ * BGSAVE with socket target or disk target depending on the
+ * configuration and slaves capabilities. */
+ startBgsaveForReplication(mincapa);
+ }
+ }
+
/* Refresh the number of slaves with lag <= min-slaves-max-lag. */
refreshGoodSlavesCount();
+ replication_cron_loops++; /* Incremented with frequency 1 HZ. */
}
diff --git a/src/rio.c b/src/rio.c
index 44f9b7a01..9c7220fcc 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -53,7 +53,9 @@
#include "util.h"
#include "crc64.h"
#include "config.h"
-#include "redis.h"
+#include "server.h"
+
+/* ------------------------- Buffer I/O implementation ----------------------- */
/* Returns 1 or 0 for success/failure. */
static size_t rioBufferWrite(rio *r, const void *buf, size_t len) {
@@ -76,6 +78,33 @@ static off_t rioBufferTell(rio *r) {
return r->io.buffer.pos;
}
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioBufferFlush(rio *r) {
+ UNUSED(r);
+ return 1; /* Nothing to do, our write just appends to the buffer. */
+}
+
+static const rio rioBufferIO = {
+ rioBufferRead,
+ rioBufferWrite,
+ rioBufferTell,
+ rioBufferFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+void rioInitWithBuffer(rio *r, sds s) {
+ *r = rioBufferIO;
+ r->io.buffer.ptr = s;
+ r->io.buffer.pos = 0;
+}
+
+/* --------------------- Stdio file pointer implementation ------------------- */
+
/* Returns 1 or 0 for success/failure. */
static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
size_t retval;
@@ -103,21 +132,17 @@ static off_t rioFileTell(rio *r) {
return ftello(r->io.file.fp);
}
-static const rio rioBufferIO = {
- rioBufferRead,
- rioBufferWrite,
- rioBufferTell,
- NULL, /* update_checksum */
- 0, /* current checksum */
- 0, /* bytes read or written */
- 0, /* read/write chunk size */
- { { NULL, 0 } } /* union for io-specific vars */
-};
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioFileFlush(rio *r) {
+ return (fflush(r->io.file.fp) == 0) ? 1 : 0;
+}
static const rio rioFileIO = {
rioFileRead,
rioFileWrite,
rioFileTell,
+ rioFileFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* bytes read or written */
@@ -132,12 +157,134 @@ void rioInitWithFile(rio *r, FILE *fp) {
r->io.file.autosync = 0;
}
-void rioInitWithBuffer(rio *r, sds s) {
- *r = rioBufferIO;
- r->io.buffer.ptr = s;
- r->io.buffer.pos = 0;
+/* ------------------- File descriptors set implementation ------------------- */
+
+/* Returns 1 or 0 for success/failure.
+ * The function returns success as long as we are able to correctly write
+ * to at least one file descriptor.
+ *
+ * When buf is NULL and len is 0, the function performs a flush operation
+ * if there is some pending buffer, so this function is also used in order
+ * to implement rioFdsetFlush(). */
+static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
+ ssize_t retval;
+ int j;
+ unsigned char *p = (unsigned char*) buf;
+ int doflush = (buf == NULL && len == 0);
+
+ /* To start we always append to our buffer. If it gets larger than
+ * a given size, we actually write to the sockets. */
+ if (len) {
+ r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
+ len = 0; /* Prevent entering the while below if we don't flush. */
+ if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1;
+ }
+
+ if (doflush) {
+ p = (unsigned char*) r->io.fdset.buf;
+ len = sdslen(r->io.fdset.buf);
+ }
+
+ /* Write in little chunchs so that when there are big writes we
+ * parallelize while the kernel is sending data in background to
+ * the TCP socket. */
+ while(len) {
+ size_t count = len < 1024 ? len : 1024;
+ int broken = 0;
+ for (j = 0; j < r->io.fdset.numfds; j++) {
+ if (r->io.fdset.state[j] != 0) {
+ /* Skip FDs alraedy in error. */
+ broken++;
+ continue;
+ }
+
+ /* Make sure to write 'count' bytes to the socket regardless
+ * of short writes. */
+ size_t nwritten = 0;
+ while(nwritten != count) {
+ retval = write(r->io.fdset.fds[j],p+nwritten,count-nwritten);
+ if (retval <= 0) {
+ /* With blocking sockets, which is the sole user of this
+ * rio target, EWOULDBLOCK is returned only because of
+ * the SO_SNDTIMEO socket option, so we translate the error
+ * into one more recognizable by the user. */
+ if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
+ break;
+ }
+ nwritten += retval;
+ }
+
+ if (nwritten != count) {
+ /* Mark this FD as broken. */
+ r->io.fdset.state[j] = errno;
+ if (r->io.fdset.state[j] == 0) r->io.fdset.state[j] = EIO;
+ }
+ }
+ if (broken == r->io.fdset.numfds) return 0; /* All the FDs in error. */
+ p += count;
+ len -= count;
+ r->io.fdset.pos += count;
+ }
+
+ if (doflush) sdsclear(r->io.fdset.buf);
+ return 1;
}
+/* Returns 1 or 0 for success/failure. */
+static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
+ UNUSED(r);
+ UNUSED(buf);
+ UNUSED(len);
+ return 0; /* Error, this target does not support reading. */
+}
+
+/* Returns read/write position in file. */
+static off_t rioFdsetTell(rio *r) {
+ return r->io.fdset.pos;
+}
+
+/* Flushes any buffer to target device if applicable. Returns 1 on success
+ * and 0 on failures. */
+static int rioFdsetFlush(rio *r) {
+ /* Our flush is implemented by the write method, that recognizes a
+ * buffer set to NULL with a count of zero as a flush request. */
+ return rioFdsetWrite(r,NULL,0);
+}
+
+static const rio rioFdsetIO = {
+ rioFdsetRead,
+ rioFdsetWrite,
+ rioFdsetTell,
+ rioFdsetFlush,
+ NULL, /* update_checksum */
+ 0, /* current checksum */
+ 0, /* bytes read or written */
+ 0, /* read/write chunk size */
+ { { NULL, 0 } } /* union for io-specific vars */
+};
+
+void rioInitWithFdset(rio *r, int *fds, int numfds) {
+ int j;
+
+ *r = rioFdsetIO;
+ r->io.fdset.fds = zmalloc(sizeof(int)*numfds);
+ r->io.fdset.state = zmalloc(sizeof(int)*numfds);
+ memcpy(r->io.fdset.fds,fds,sizeof(int)*numfds);
+ for (j = 0; j < numfds; j++) r->io.fdset.state[j] = 0;
+ r->io.fdset.numfds = numfds;
+ r->io.fdset.pos = 0;
+ r->io.fdset.buf = sdsempty();
+}
+
+/* release the rio stream. */
+void rioFreeFdset(rio *r) {
+ zfree(r->io.fdset.fds);
+ zfree(r->io.fdset.state);
+ sdsfree(r->io.fdset.buf);
+}
+
+/* ---------------------------- Generic functions ---------------------------- */
+
/* This function can be installed both in memory and file streams when checksum
* computation is needed. */
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
@@ -153,11 +300,12 @@ void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
* disk I/O concentrated in very little time. When we fsync in an explicit
* way instead the I/O pressure is more distributed across time. */
void rioSetAutoSync(rio *r, off_t bytes) {
- redisAssert(r->read == rioFileIO.read);
+ serverAssert(r->read == rioFileIO.read);
r->io.file.autosync = bytes;
}
-/* ------------------------------ Higher level interface ---------------------------
+/* --------------------------- Higher level interface --------------------------
+ *
* The following higher level functions use lower level rio.c functions to help
* generating the Redis protocol for the Append Only File. */
diff --git a/src/rio.h b/src/rio.h
index 2d12c6cc7..6749723d2 100644
--- a/src/rio.h
+++ b/src/rio.h
@@ -43,6 +43,7 @@ struct _rio {
size_t (*read)(struct _rio *, void *buf, size_t len);
size_t (*write)(struct _rio *, const void *buf, size_t len);
off_t (*tell)(struct _rio *);
+ int (*flush)(struct _rio *);
/* The update_cksum method if not NULL is used to compute the checksum of
* all the data that was read or written so far. The method should be
* designed so that can be called with the current checksum, and the buf
@@ -61,15 +62,25 @@ struct _rio {
/* Backend-specific vars. */
union {
+ /* In-memory buffer target. */
struct {
sds ptr;
off_t pos;
} buffer;
+ /* Stdio file pointer target. */
struct {
FILE *fp;
off_t buffered; /* Bytes written since last fsync. */
off_t autosync; /* fsync after 'autosync' bytes written. */
} file;
+ /* Multiple FDs target (used to write to N sockets). */
+ struct {
+ int *fds; /* File descriptors. */
+ int *state; /* Error state of each fd. 0 (if ok) or errno. */
+ int numfds;
+ off_t pos;
+ sds buf;
+ } fdset;
} io;
};
@@ -109,14 +120,24 @@ static inline off_t rioTell(rio *r) {
return r->tell(r);
}
+static inline int rioFlush(rio *r) {
+ return r->flush(r);
+}
+
void rioInitWithFile(rio *r, FILE *fp);
void rioInitWithBuffer(rio *r, sds s);
+void rioInitWithFdset(rio *r, int *fds, int numfds);
+
+void rioFreeFdset(rio *r);
size_t rioWriteBulkCount(rio *r, char prefix, int count);
size_t rioWriteBulkString(rio *r, const char *buf, size_t len);
size_t rioWriteBulkLongLong(rio *r, long long l);
size_t rioWriteBulkDouble(rio *r, double d);
+struct redisObject;
+int rioWriteBulkObject(rio *r, struct redisObject *obj);
+
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len);
void rioSetAutoSync(rio *r, off_t bytes);
diff --git a/src/scripting.c b/src/scripting.c
index ef00eede6..8f8145b2c 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -27,9 +27,10 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "sha1.h"
#include "rand.h"
+#include "cluster.h"
#include <lua.h>
#include <lauxlib.h>
@@ -44,7 +45,67 @@ char *redisProtocolToLuaType_Error(lua_State *lua, char *reply);
char *redisProtocolToLuaType_MultiBulk(lua_State *lua, char *reply);
int redis_math_random (lua_State *L);
int redis_math_randomseed (lua_State *L);
-void sha1hex(char *digest, char *script, size_t len);
+void ldbInit(void);
+void ldbDisable(client *c);
+void ldbEnable(client *c);
+void evalGenericCommandWithDebugging(client *c, int evalsha);
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar);
+void ldbLog(sds entry);
+void ldbLogRedisReply(char *reply);
+sds ldbCatStackValue(sds s, lua_State *lua, int idx);
+
+/* Debugger shared state is stored inside this global structure. */
+#define LDB_BREAKPOINTS_MAX 64 /* Max number of breakpoints. */
+#define LDB_MAX_LEN_DEFAULT 256 /* Default len limit for replies / var dumps. */
+struct ldbState {
+ int fd; /* Socket of the debugging client. */
+ int active; /* Are we debugging EVAL right now? */
+ int forked; /* Is this a fork()ed debugging session? */
+ list *logs; /* List of messages to send to the client. */
+ list *traces; /* Messages about Redis commands executed since last stop.*/
+ list *children; /* All forked debugging sessions pids. */
+ int bp[LDB_BREAKPOINTS_MAX]; /* An array of breakpoints line numbers. */
+ int bpcount; /* Number of valid entries inside bp. */
+ int step; /* Stop at next line ragardless of breakpoints. */
+ int luabp; /* Stop at next line because redis.breakpoint() was called. */
+ sds *src; /* Lua script source code split by line. */
+ int lines; /* Number of lines in 'src'. */
+ int currentline; /* Current line number. */
+ sds cbuf; /* Debugger client command buffer. */
+ size_t maxlen; /* Max var dump / reply length. */
+ int maxlen_hint_sent; /* Did we already hint about "set maxlen"? */
+} ldb;
+
+/* ---------------------------------------------------------------------------
+ * Utility functions.
+ * ------------------------------------------------------------------------- */
+
+/* Perform the SHA1 of the input string. We use this both for hashing script
+ * bodies in order to obtain the Lua function name, and in the implementation
+ * of redis.sha1().
+ *
+ * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
+ * hexadecimal number, plus 1 byte for null term. */
+void sha1hex(char *digest, char *script, size_t len) {
+ SHA1_CTX ctx;
+ unsigned char hash[20];
+ char *cset = "0123456789abcdef";
+ int j;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,(unsigned char*)script,len);
+ SHA1Final(hash,&ctx);
+
+ for (j = 0; j < 20; j++) {
+ digest[j*2] = cset[((hash[j]&0xF0)>>4)];
+ digest[j*2+1] = cset[(hash[j]&0xF)];
+ }
+ digest[40] = '\0';
+}
+
+/* ---------------------------------------------------------------------------
+ * Redis reply to Lua type conversion functions.
+ * ------------------------------------------------------------------------- */
/* Take a Redis reply in the Redis protocol format and convert it into a
* Lua type. Thanks to this function, and the introduction of not connected
@@ -53,13 +114,11 @@ void sha1hex(char *digest, char *script, size_t len);
* Basically we take the arguments, execute the Redis command in the context
* of a non connected client, then take the generated reply and convert it
* into a suitable Lua type. With this trick the scripting feature does not
- * need the introduction of a full Redis internals API. Basically the script
+ * need the introduction of a full Redis internals API. The script
* is like a normal client that bypasses all the slow I/O paths.
*
* Note: in this function we do not do any sanity check as the reply is
* generated by Redis directly. This allows us to go faster.
- * The reply string can be altered during the parsing as it is discarded
- * after the conversion is completed.
*
* Errors are returned as a table with a single 'err' field set to the
* error string.
@@ -69,21 +128,11 @@ char *redisProtocolToLuaType(lua_State *lua, char* reply) {
char *p = reply;
switch(*p) {
- case ':':
- p = redisProtocolToLuaType_Int(lua,reply);
- break;
- case '$':
- p = redisProtocolToLuaType_Bulk(lua,reply);
- break;
- case '+':
- p = redisProtocolToLuaType_Status(lua,reply);
- break;
- case '-':
- p = redisProtocolToLuaType_Error(lua,reply);
- break;
- case '*':
- p = redisProtocolToLuaType_MultiBulk(lua,reply);
- break;
+ case ':': p = redisProtocolToLuaType_Int(lua,reply); break;
+ case '$': p = redisProtocolToLuaType_Bulk(lua,reply); break;
+ case '+': p = redisProtocolToLuaType_Status(lua,reply); break;
+ case '-': p = redisProtocolToLuaType_Error(lua,reply); break;
+ case '*': p = redisProtocolToLuaType_MultiBulk(lua,reply); break;
}
return p;
}
@@ -151,9 +200,20 @@ char *redisProtocolToLuaType_MultiBulk(lua_State *lua, char *reply) {
return p;
}
+/* This function is used in order to push an error on the Lua stack in the
+ * format used by redis.pcall to return errors, which is a lua table
+ * with a single "err" field set to the error string. Note that this
+ * table is never a valid reply by proper commands, since the returned
+ * tables are otherwise always indexed by integers, never by strings. */
void luaPushError(lua_State *lua, char *error) {
lua_Debug dbg;
+ /* If debugging is active and in step mode, log errors resulting from
+ * Redis commands. */
+ if (ldb.active && ldb.step) {
+ ldbLog(sdscatprintf(sdsempty(),"<error> %s",error));
+ }
+
lua_newtable(lua);
lua_pushstring(lua,"err");
@@ -169,6 +229,16 @@ void luaPushError(lua_State *lua, char *error) {
lua_settable(lua,-3);
}
+/* In case the error set into the Lua stack by luaPushError() was generated
+ * by the non-error-trapping version of redis.pcall(), which is redis.call(),
+ * this function will raise the Lua error so that the execution of the
+ * script will be halted. */
+int luaRaiseError(lua_State *lua) {
+ lua_pushstring(lua,"err");
+ lua_gettable(lua,-2);
+ return lua_error(lua);
+}
+
/* Sort the array currently in the stack. We do this to make the output
* of commands like KEYS or SMEMBERS something deterministic when called
* from Lua (to play well with AOf/replication).
@@ -200,31 +270,118 @@ void luaSortArray(lua_State *lua) {
lua_pop(lua,1); /* Stack: array (sorted) */
}
+/* ---------------------------------------------------------------------------
+ * Lua reply to Redis reply conversion functions.
+ * ------------------------------------------------------------------------- */
+
+void luaReplyToRedisReply(client *c, lua_State *lua) {
+ int t = lua_type(lua,-1);
+
+ switch(t) {
+ case LUA_TSTRING:
+ addReplyBulkCBuffer(c,(char*)lua_tostring(lua,-1),lua_strlen(lua,-1));
+ break;
+ case LUA_TBOOLEAN:
+ addReply(c,lua_toboolean(lua,-1) ? shared.cone : shared.nullbulk);
+ break;
+ case LUA_TNUMBER:
+ addReplyLongLong(c,(long long)lua_tonumber(lua,-1));
+ break;
+ case LUA_TTABLE:
+ /* We need to check if it is an array, an error, or a status reply.
+ * Error are returned as a single element table with 'err' field.
+ * Status replies are returned as single element table with 'ok'
+ * field. */
+ lua_pushstring(lua,"err");
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds err = sdsnew(lua_tostring(lua,-1));
+ sdsmapchars(err,"\r\n"," ",2);
+ addReplySds(c,sdscatprintf(sdsempty(),"-%s\r\n",err));
+ sdsfree(err);
+ lua_pop(lua,2);
+ return;
+ }
+
+ lua_pop(lua,1);
+ lua_pushstring(lua,"ok");
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds ok = sdsnew(lua_tostring(lua,-1));
+ sdsmapchars(ok,"\r\n"," ",2);
+ addReplySds(c,sdscatprintf(sdsempty(),"+%s\r\n",ok));
+ sdsfree(ok);
+ lua_pop(lua,1);
+ } else {
+ void *replylen = addDeferredMultiBulkLength(c);
+ int j = 1, mbulklen = 0;
+
+ lua_pop(lua,1); /* Discard the 'ok' field value we popped */
+ while(1) {
+ lua_pushnumber(lua,j++);
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TNIL) {
+ lua_pop(lua,1);
+ break;
+ }
+ luaReplyToRedisReply(c, lua);
+ mbulklen++;
+ }
+ setDeferredMultiBulkLength(c,replylen,mbulklen);
+ }
+ break;
+ default:
+ addReply(c,shared.nullbulk);
+ }
+ lua_pop(lua,1);
+}
+
+/* ---------------------------------------------------------------------------
+ * Lua redis.* functions implementations.
+ * ------------------------------------------------------------------------- */
+
#define LUA_CMD_OBJCACHE_SIZE 32
#define LUA_CMD_OBJCACHE_MAX_LEN 64
int luaRedisGenericCommand(lua_State *lua, int raise_error) {
int j, argc = lua_gettop(lua);
struct redisCommand *cmd;
- redisClient *c = server.lua_client;
+ client *c = server.lua_client;
sds reply;
/* Cached across calls. */
static robj **argv = NULL;
static int argv_size = 0;
static robj *cached_objects[LUA_CMD_OBJCACHE_SIZE];
- static int cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+ static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+ static int inuse = 0; /* Recursive calls detection. */
+
+ /* By using Lua debug hooks it is possible to trigger a recursive call
+ * to luaRedisGenericCommand(), which normally should never happen.
+ * To make this function reentrant is futile and makes it slower, but
+ * we should at least detect such a misuse, and abort. */
+ if (inuse) {
+ char *recursion_warning =
+ "luaRedisGenericCommand() recursive call detected. "
+ "Are you doing funny stuff with Lua debug hooks?";
+ serverLog(LL_WARNING,"%s",recursion_warning);
+ luaPushError(lua,recursion_warning);
+ return 1;
+ }
+ inuse++;
/* Require at least one argument */
if (argc == 0) {
luaPushError(lua,
"Please specify at least one argument for redis.call()");
- return 1;
+ inuse--;
+ return raise_error ? luaRaiseError(lua) : 1;
}
/* Build the arguments vector */
- if (!argv) {
- argv = zmalloc(sizeof(robj*)*argc);
- } else if (argv_size < argc) {
+ if (argv_size < argc) {
argv = zrealloc(argv,sizeof(robj*)*argc);
argv_size = argc;
}
@@ -250,14 +407,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (j < LUA_CMD_OBJCACHE_SIZE && cached_objects[j] &&
cached_objects_len[j] >= obj_len)
{
- char *s = cached_objects[j]->ptr;
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
-
+ sds s = cached_objects[j]->ptr;
argv[j] = cached_objects[j];
cached_objects[j] = NULL;
memcpy(s,obj_s,obj_len+1);
- sh->free += sh->len - obj_len;
- sh->len = obj_len;
+ sdssetlen(s, obj_len);
} else {
argv[j] = createStringObject(obj_s, obj_len);
}
@@ -274,13 +428,30 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
luaPushError(lua,
"Lua redis() command arguments must be strings or integers");
- return 1;
+ inuse--;
+ return raise_error ? luaRaiseError(lua) : 1;
}
/* Setup our fake client for command execution */
c->argv = argv;
c->argc = argc;
+ /* Log the command if debugging is active. */
+ if (ldb.active && ldb.step) {
+ sds cmdlog = sdsnew("<redis>");
+ for (j = 0; j < c->argc; j++) {
+ if (j == 10) {
+ cmdlog = sdscatprintf(cmdlog," ... (%d more)",
+ c->argc-j-1);
+ break;
+ } else {
+ cmdlog = sdscatlen(cmdlog," ",1);
+ cmdlog = sdscatsds(cmdlog,c->argv[j]->ptr);
+ }
+ }
+ ldbLog(cmdlog);
+ }
+
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
if (!cmd || ((cmd->arity > 0 && cmd->arity != argc) ||
@@ -293,9 +464,10 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
luaPushError(lua,"Unknown Redis command called from Lua script");
goto cleanup;
}
+ c->cmd = c->lastcmd = cmd;
/* There are commands that are not allowed inside scripts. */
- if (cmd->flags & REDIS_CMD_NOSCRIPT) {
+ if (cmd->flags & CMD_NOSCRIPT) {
luaPushError(lua, "This Redis command is not allowed from scripts");
goto cleanup;
}
@@ -303,20 +475,20 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
/* Write commands are forbidden against read-only slaves, or if a
* command marked as non-deterministic was already called in the context
* of this script. */
- if (cmd->flags & REDIS_CMD_WRITE) {
- if (server.lua_random_dirty) {
+ if (cmd->flags & CMD_WRITE) {
+ if (server.lua_random_dirty && !server.lua_replicate_commands) {
luaPushError(lua,
- "Write commands not allowed after non deterministic commands");
+ "Write commands not allowed after non deterministic commands. Call redis.replicate_commands() at the start of your script in order to switch to single commands replication mode.");
goto cleanup;
} else if (server.masterhost && server.repl_slave_ro &&
!server.loading &&
- !(server.lua_caller->flags & REDIS_MASTER))
+ !(server.lua_caller->flags & CLIENT_MASTER))
{
luaPushError(lua, shared.roslaveerr->ptr);
goto cleanup;
} else if (server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
- server.lastbgsave_status == REDIS_ERR)
+ server.lastbgsave_status == C_ERR)
{
luaPushError(lua, shared.bgsaveerr->ptr);
goto cleanup;
@@ -328,25 +500,63 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
* first write in the context of this script, otherwise we can't stop
* in the middle. */
if (server.maxmemory && server.lua_write_dirty == 0 &&
- (cmd->flags & REDIS_CMD_DENYOOM))
+ (cmd->flags & CMD_DENYOOM))
{
- if (freeMemoryIfNeeded() == REDIS_ERR) {
+ if (freeMemoryIfNeeded() == C_ERR) {
luaPushError(lua, shared.oomerr->ptr);
goto cleanup;
}
}
- if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1;
- if (cmd->flags & REDIS_CMD_WRITE) server.lua_write_dirty = 1;
+ if (cmd->flags & CMD_RANDOM) server.lua_random_dirty = 1;
+ if (cmd->flags & CMD_WRITE) server.lua_write_dirty = 1;
+
+ /* If this is a Redis Cluster node, we need to make sure Lua is not
+ * trying to access non-local keys, with the exception of commands
+ * received from our master or when loading the AOF back in memory. */
+ if (server.cluster_enabled && !server.loading &&
+ !(server.lua_caller->flags & CLIENT_MASTER))
+ {
+ /* Duplicate relevant flags in the lua client. */
+ c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
+ c->flags |= server.lua_caller->flags & (CLIENT_READONLY|CLIENT_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+ server.cluster->myself)
+ {
+ luaPushError(lua,
+ "Lua script attempted to access a non local key in a "
+ "cluster node");
+ goto cleanup;
+ }
+ }
+
+ /* If we are using single commands replication, we need to wrap what
+ * we propagate into a MULTI/EXEC block, so that it will be atomic like
+ * a Lua script in the context of AOF and slaves. */
+ if (server.lua_replicate_commands &&
+ !server.lua_multi_emitted &&
+ server.lua_write_dirty &&
+ server.lua_repl != PROPAGATE_NONE)
+ {
+ execCommandPropagateMulti(server.lua_caller);
+ server.lua_multi_emitted = 1;
+ }
/* Run the command */
- c->cmd = cmd;
- call(c,REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
+ int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
+ if (server.lua_replicate_commands) {
+ /* Set flags according to redis.set_repl() settings. */
+ if (server.lua_repl & PROPAGATE_AOF)
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ if (server.lua_repl & PROPAGATE_REPL)
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c,call_flags);
/* Convert the result of the Redis command into a suitable Lua type.
* The first thing we need is to create a single string from the client
* output buffers. */
- if (listLength(c->reply) == 0 && c->bufpos < REDIS_REPLY_CHUNK_BYTES) {
+ if (listLength(c->reply) == 0 && c->bufpos < PROTO_REPLY_CHUNK_BYTES) {
/* This is a fast path for the common case of a reply inside the
* client static buffer. Don't create an SDS string but just use
* the client buffer directly. */
@@ -357,17 +567,23 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
reply = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
- robj *o = listNodeValue(listFirst(c->reply));
+ sds o = listNodeValue(listFirst(c->reply));
- reply = sdscatlen(reply,o->ptr,sdslen(o->ptr));
+ reply = sdscatsds(reply,o);
listDelNode(c->reply,listFirst(c->reply));
}
}
if (raise_error && reply[0] != '-') raise_error = 0;
redisProtocolToLuaType(lua,reply);
+
+ /* If the debugger is active, log the reply from Redis. */
+ if (ldb.active && ldb.step)
+ ldbLogRedisReply(reply);
+
/* Sort the output array if needed, assuming it is a non-null multi bulk
* reply as expected. */
- if ((cmd->flags & REDIS_CMD_SORT_FOR_SCRIPT) &&
+ if ((cmd->flags & CMD_SORT_FOR_SCRIPT) &&
+ (server.lua_replicate_commands == 0) &&
(reply[0] == '*' && reply[1] != '-')) {
luaSortArray(lua);
}
@@ -385,15 +601,14 @@ cleanup:
* (we must be the only owner) for us to cache it. */
if (j < LUA_CMD_OBJCACHE_SIZE &&
o->refcount == 1 &&
- (o->encoding == REDIS_ENCODING_RAW ||
- o->encoding == REDIS_ENCODING_EMBSTR) &&
+ (o->encoding == OBJ_ENCODING_RAW ||
+ o->encoding == OBJ_ENCODING_EMBSTR) &&
sdslen(o->ptr) <= LUA_CMD_OBJCACHE_MAX_LEN)
{
- struct sdshdr *sh = (void*)(((char*)(o->ptr))-(sizeof(struct sdshdr)));
-
+ sds s = o->ptr;
if (cached_objects[j]) decrRefCount(cached_objects[j]);
cached_objects[j] = o;
- cached_objects_len[j] = sh->free + sh->len;
+ cached_objects_len[j] = sdsalloc(s);
} else {
decrRefCount(o);
}
@@ -402,23 +617,26 @@ cleanup:
if (c->argv != argv) {
zfree(c->argv);
argv = NULL;
+ argv_size = 0;
}
if (raise_error) {
/* If we are here we should have an error in the stack, in the
* form of a table with an "err" field. Extract the string to
* return the plain error. */
- lua_pushstring(lua,"err");
- lua_gettable(lua,-2);
- return lua_error(lua);
+ inuse--;
+ return luaRaiseError(lua);
}
+ inuse--;
return 1;
}
+/* redis.call() */
int luaRedisCallCommand(lua_State *lua) {
return luaRedisGenericCommand(lua,1);
}
+/* redis.pcall() */
int luaRedisPCallCommand(lua_State *lua) {
return luaRedisGenericCommand(lua,0);
}
@@ -432,8 +650,8 @@ int luaRedisSha1hexCommand(lua_State *lua) {
char *s;
if (argc != 1) {
- luaPushError(lua, "wrong number of arguments");
- return 1;
+ lua_pushstring(lua, "wrong number of arguments");
+ return lua_error(lua);
}
s = (char*)lua_tolstring(lua,1,&len);
@@ -462,30 +680,110 @@ int luaRedisReturnSingleFieldTable(lua_State *lua, char *field) {
return 1;
}
+/* redis.error_reply() */
int luaRedisErrorReplyCommand(lua_State *lua) {
return luaRedisReturnSingleFieldTable(lua,"err");
}
+/* redis.status_reply() */
int luaRedisStatusReplyCommand(lua_State *lua) {
return luaRedisReturnSingleFieldTable(lua,"ok");
}
+/* redis.replicate_commands()
+ *
+ * Turn on single commands replication if the script never called
+ * a write command so far, and returns true. Otherwise if the script
+ * already started to write, returns false and stick to whole scripts
+ * replication, which is our default. */
+int luaRedisReplicateCommandsCommand(lua_State *lua) {
+ if (server.lua_write_dirty) {
+ lua_pushboolean(lua,0);
+ } else {
+ server.lua_replicate_commands = 1;
+ /* When we switch to single commands replication, we can provide
+ * different math.random() sequences at every call, which is what
+ * the user normally expects. */
+ redisSrand48(rand());
+ lua_pushboolean(lua,1);
+ }
+ return 1;
+}
+
+/* redis.breakpoint()
+ *
+ * Allows to stop execution during a debuggign session from within
+ * the Lua code implementation, like if a breakpoint was set in the code
+ * immediately after the function. */
+int luaRedisBreakpointCommand(lua_State *lua) {
+ if (ldb.active) {
+ ldb.luabp = 1;
+ lua_pushboolean(lua,1);
+ } else {
+ lua_pushboolean(lua,0);
+ }
+ return 1;
+}
+
+/* redis.debug()
+ *
+ * Log a string message into the output console.
+ * Can take multiple arguments that will be separated by commas.
+ * Nothing is returned to the caller. */
+int luaRedisDebugCommand(lua_State *lua) {
+ if (!ldb.active) return 0;
+ int argc = lua_gettop(lua);
+ sds log = sdscatprintf(sdsempty(),"<debug> line %d: ", ldb.currentline);
+ while(argc--) {
+ log = ldbCatStackValue(log,lua,-1 - argc);
+ if (argc != 0) log = sdscatlen(log,", ",2);
+ }
+ ldbLog(log);
+ return 0;
+}
+
+/* redis.set_repl()
+ *
+ * Set the propagation of write commands executed in the context of the
+ * script to on/off for AOF and slaves. */
+int luaRedisSetReplCommand(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ int flags;
+
+ if (server.lua_replicate_commands == 0) {
+ lua_pushstring(lua, "You can set the replication behavior only after turning on single commands replication with redis.replicate_commands().");
+ return lua_error(lua);
+ } else if (argc != 1) {
+ lua_pushstring(lua, "redis.set_repl() requires two arguments.");
+ return lua_error(lua);
+ }
+
+ flags = lua_tonumber(lua,-1);
+ if ((flags & ~(PROPAGATE_AOF|PROPAGATE_REPL)) != 0) {
+ lua_pushstring(lua, "Invalid replication flags. Use REPL_AOF, REPL_SLAVE, REPL_ALL or REPL_NONE.");
+ return lua_error(lua);
+ }
+ server.lua_repl = flags;
+ return 0;
+}
+
+/* redis.log() */
int luaLogCommand(lua_State *lua) {
int j, argc = lua_gettop(lua);
int level;
sds log;
if (argc < 2) {
- luaPushError(lua, "redis.log() requires two arguments or more.");
- return 1;
+ lua_pushstring(lua, "redis.log() requires two arguments or more.");
+ return lua_error(lua);
} else if (!lua_isnumber(lua,-argc)) {
- luaPushError(lua, "First argument must be a number (log level).");
- return 1;
+ lua_pushstring(lua, "First argument must be a number (log level).");
+ return lua_error(lua);
}
level = lua_tonumber(lua,-argc);
- if (level < REDIS_DEBUG || level > REDIS_WARNING) {
- luaPushError(lua, "Invalid debug level.");
- return 1;
+ if (level < LL_DEBUG || level > LL_WARNING) {
+ lua_pushstring(lua, "Invalid debug level.");
+ return lua_error(lua);
}
/* Glue together all the arguments */
@@ -500,34 +798,14 @@ int luaLogCommand(lua_State *lua) {
log = sdscatlen(log,s,len);
}
}
- redisLogRaw(level,log);
+ serverLogRaw(level,log);
sdsfree(log);
return 0;
}
-void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
- long long elapsed;
- REDIS_NOTUSED(ar);
- REDIS_NOTUSED(lua);
-
- elapsed = mstime() - server.lua_time_start;
- if (elapsed >= server.lua_time_limit && server.lua_timedout == 0) {
- redisLog(REDIS_WARNING,"Lua slow script detected: still in execution after %lld milliseconds. You can try killing the script using the SCRIPT KILL command.",elapsed);
- server.lua_timedout = 1;
- /* Once the script timeouts we reenter the event loop to permit others
- * to call SCRIPT KILL or SHUTDOWN NOSAVE if needed. For this reason
- * we need to mask the client executing the script from the event loop.
- * If we don't do that the client may disconnect and could no longer be
- * here when the EVAL command will return. */
- aeDeleteFileEvent(server.el, server.lua_caller->fd, AE_READABLE);
- }
- if (server.lua_timedout) processEventsWhileBlocked();
- if (server.lua_kill) {
- redisLog(REDIS_WARNING,"Lua script killed by user with SCRIPT KILL.");
- lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");
- lua_error(lua);
- }
-}
+/* ---------------------------------------------------------------------------
+ * Lua engine initialization and reset.
+ * ------------------------------------------------------------------------- */
void luaLoadLib(lua_State *lua, const char *libname, lua_CFunction luafunc) {
lua_pushcfunction(lua, luafunc);
@@ -538,6 +816,7 @@ void luaLoadLib(lua_State *lua, const char *libname, lua_CFunction luafunc) {
LUALIB_API int (luaopen_cjson) (lua_State *L);
LUALIB_API int (luaopen_struct) (lua_State *L);
LUALIB_API int (luaopen_cmsgpack) (lua_State *L);
+LUALIB_API int (luaopen_bit) (lua_State *L);
void luaLoadLibraries(lua_State *lua) {
luaLoadLib(lua, "", luaopen_base);
@@ -548,6 +827,7 @@ void luaLoadLibraries(lua_State *lua) {
luaLoadLib(lua, "cjson", luaopen_cjson);
luaLoadLib(lua, "struct", luaopen_struct);
luaLoadLib(lua, "cmsgpack", luaopen_cmsgpack);
+ luaLoadLib(lua, "bit", luaopen_bit);
#if 0 /* Stuff that we don't load currently, for sandboxing concerns. */
luaLoadLib(lua, LUA_LOADLIBNAME, luaopen_package);
@@ -560,6 +840,8 @@ void luaLoadLibraries(lua_State *lua) {
void luaRemoveUnsupportedFunctions(lua_State *lua) {
lua_pushnil(lua);
lua_setglobal(lua,"loadfile");
+ lua_pushnil(lua);
+ lua_setglobal(lua,"dofile");
}
/* This function installs metamethods in the global table _G that prevent
@@ -574,11 +856,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
/* strict.lua from: http://metalua.luaforge.net/src/lib/strict.lua.html.
* Modified to be adapted to Redis. */
+ s[j++]="local dbg=debug\n";
s[j++]="local mt = {}\n";
s[j++]="setmetatable(_G, mt)\n";
s[j++]="mt.__newindex = function (t, n, v)\n";
- s[j++]=" if debug.getinfo(2) then\n";
- s[j++]=" local w = debug.getinfo(2, \"S\").what\n";
+ s[j++]=" if dbg.getinfo(2) then\n";
+ s[j++]=" local w = dbg.getinfo(2, \"S\").what\n";
s[j++]=" if w ~= \"main\" and w ~= \"C\" then\n";
s[j++]=" error(\"Script attempted to create global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
@@ -586,11 +869,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
s[j++]=" rawset(t, n, v)\n";
s[j++]="end\n";
s[j++]="mt.__index = function (t, n)\n";
- s[j++]=" if debug.getinfo(2) and debug.getinfo(2, \"S\").what ~= \"C\" then\n";
- s[j++]=" error(\"Script attempted to access unexisting global variable '\"..tostring(n)..\"'\", 2)\n";
+ s[j++]=" if dbg.getinfo(2) and dbg.getinfo(2, \"S\").what ~= \"C\" then\n";
+ s[j++]=" error(\"Script attempted to access nonexistent global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
s[j++]=" return rawget(t, n)\n";
s[j++]="end\n";
+ s[j++]="debug = nil\n";
s[j++]=NULL;
for (j = 0; s[j] != NULL; j++) code = sdscatlen(code,s[j],strlen(s[j]));
@@ -600,12 +884,26 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
}
/* Initialize the scripting environment.
- * It is possible to call this function to reset the scripting environment
- * assuming that we call scriptingRelease() before.
- * See scriptingReset() for more information. */
-void scriptingInit(void) {
+ *
+ * This function is called the first time at server startup with
+ * the 'setup' argument set to 1.
+ *
+ * It can be called again multiple times during the lifetime of the Redis
+ * process, with 'setup' set to 0, and following a scriptingRelease() call,
+ * in order to reset the Lua scripting environment.
+ *
+ * However it is simpler to just call scriptingReset() that does just that. */
+void scriptingInit(int setup) {
lua_State *lua = lua_open();
+ if (setup) {
+ server.lua_client = NULL;
+ server.lua_caller = NULL;
+ server.lua_timedout = 0;
+ server.lua_always_replicate_commands = 0; /* Only DEBUG can change it.*/
+ ldbInit();
+ }
+
luaLoadLibraries(lua);
luaRemoveUnsupportedFunctions(lua);
@@ -633,19 +931,19 @@ void scriptingInit(void) {
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_DEBUG");
- lua_pushnumber(lua,REDIS_DEBUG);
+ lua_pushnumber(lua,LL_DEBUG);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_VERBOSE");
- lua_pushnumber(lua,REDIS_VERBOSE);
+ lua_pushnumber(lua,LL_VERBOSE);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_NOTICE");
- lua_pushnumber(lua,REDIS_NOTICE);
+ lua_pushnumber(lua,LL_NOTICE);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_WARNING");
- lua_pushnumber(lua,REDIS_WARNING);
+ lua_pushnumber(lua,LL_WARNING);
lua_settable(lua,-3);
/* redis.sha1hex */
@@ -661,6 +959,42 @@ void scriptingInit(void) {
lua_pushcfunction(lua, luaRedisStatusReplyCommand);
lua_settable(lua, -3);
+ /* redis.replicate_commands */
+ lua_pushstring(lua, "replicate_commands");
+ lua_pushcfunction(lua, luaRedisReplicateCommandsCommand);
+ lua_settable(lua, -3);
+
+ /* redis.set_repl and associated flags. */
+ lua_pushstring(lua,"set_repl");
+ lua_pushcfunction(lua,luaRedisSetReplCommand);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_NONE");
+ lua_pushnumber(lua,PROPAGATE_NONE);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_AOF");
+ lua_pushnumber(lua,PROPAGATE_AOF);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_SLAVE");
+ lua_pushnumber(lua,PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_ALL");
+ lua_pushnumber(lua,PROPAGATE_AOF|PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ /* redis.breakpoint */
+ lua_pushstring(lua,"breakpoint");
+ lua_pushcfunction(lua,luaRedisBreakpointCommand);
+ lua_settable(lua,-3);
+
+ /* redis.debug */
+ lua_pushstring(lua,"debug");
+ lua_pushcfunction(lua,luaRedisDebugCommand);
+ lua_settable(lua,-3);
+
/* Finally set the table as 'redis' global var. */
lua_setglobal(lua,"redis");
@@ -694,10 +1028,11 @@ void scriptingInit(void) {
* information about the caller, that's what makes sense from the point
* of view of the user debugging a script. */
{
- char *errh_func = "function __redis__err__handler(err)\n"
- " local i = debug.getinfo(2,'nSl')\n"
+ char *errh_func = "local dbg = debug\n"
+ "function __redis__err__handler(err)\n"
+ " local i = dbg.getinfo(2,'nSl')\n"
" if i and i.what == 'C' then\n"
- " i = debug.getinfo(3,'nSl')\n"
+ " i = dbg.getinfo(3,'nSl')\n"
" end\n"
" if i then\n"
" return i.source .. ':' .. i.currentline .. ': ' .. err\n"
@@ -715,10 +1050,10 @@ void scriptingInit(void) {
* by scriptingReset(). */
if (server.lua_client == NULL) {
server.lua_client = createClient(-1);
- server.lua_client->flags |= REDIS_LUA_CLIENT;
+ server.lua_client->flags |= CLIENT_LUA;
}
- /* Lua beginners ofter don't use "local", this is likely to introduce
+ /* Lua beginners often don't use "local", this is likely to introduce
* subtle bugs in their code. To prevent problems we protect accesses
* to global variables. */
scriptingEnableGlobalsProtection(lua);
@@ -735,94 +1070,7 @@ void scriptingRelease(void) {
void scriptingReset(void) {
scriptingRelease();
- scriptingInit();
-}
-
-/* Perform the SHA1 of the input string. We use this both for hashing script
- * bodies in order to obtain the Lua function name, and in the implementation
- * of redis.sha1().
- *
- * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
- * hexadecimal number, plus 1 byte for null term. */
-void sha1hex(char *digest, char *script, size_t len) {
- SHA1_CTX ctx;
- unsigned char hash[20];
- char *cset = "0123456789abcdef";
- int j;
-
- SHA1Init(&ctx);
- SHA1Update(&ctx,(unsigned char*)script,len);
- SHA1Final(hash,&ctx);
-
- for (j = 0; j < 20; j++) {
- digest[j*2] = cset[((hash[j]&0xF0)>>4)];
- digest[j*2+1] = cset[(hash[j]&0xF)];
- }
- digest[40] = '\0';
-}
-
-void luaReplyToRedisReply(redisClient *c, lua_State *lua) {
- int t = lua_type(lua,-1);
-
- switch(t) {
- case LUA_TSTRING:
- addReplyBulkCBuffer(c,(char*)lua_tostring(lua,-1),lua_strlen(lua,-1));
- break;
- case LUA_TBOOLEAN:
- addReply(c,lua_toboolean(lua,-1) ? shared.cone : shared.nullbulk);
- break;
- case LUA_TNUMBER:
- addReplyLongLong(c,(long long)lua_tonumber(lua,-1));
- break;
- case LUA_TTABLE:
- /* We need to check if it is an array, an error, or a status reply.
- * Error are returned as a single element table with 'err' field.
- * Status replies are returned as single element table with 'ok' field */
- lua_pushstring(lua,"err");
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TSTRING) {
- sds err = sdsnew(lua_tostring(lua,-1));
- sdsmapchars(err,"\r\n"," ",2);
- addReplySds(c,sdscatprintf(sdsempty(),"-%s\r\n",err));
- sdsfree(err);
- lua_pop(lua,2);
- return;
- }
-
- lua_pop(lua,1);
- lua_pushstring(lua,"ok");
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TSTRING) {
- sds ok = sdsnew(lua_tostring(lua,-1));
- sdsmapchars(ok,"\r\n"," ",2);
- addReplySds(c,sdscatprintf(sdsempty(),"+%s\r\n",ok));
- sdsfree(ok);
- lua_pop(lua,1);
- } else {
- void *replylen = addDeferredMultiBulkLength(c);
- int j = 1, mbulklen = 0;
-
- lua_pop(lua,1); /* Discard the 'ok' field value we popped */
- while(1) {
- lua_pushnumber(lua,j++);
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TNIL) {
- lua_pop(lua,1);
- break;
- }
- luaReplyToRedisReply(c, lua);
- mbulklen++;
- }
- setDeferredMultiBulkLength(c,replylen,mbulklen);
- }
- break;
- default:
- addReply(c,shared.nullbulk);
- }
- lua_pop(lua,1);
+ scriptingInit(0);
}
/* Set an array of Redis String Objects as a Lua array (table) stored into a
@@ -838,37 +1086,84 @@ void luaSetGlobalArray(lua_State *lua, char *var, robj **elev, int elec) {
lua_setglobal(lua,var);
}
+/* ---------------------------------------------------------------------------
+ * Redis provided math.random
+ * ------------------------------------------------------------------------- */
+
+/* We replace math.random() with our implementation that is not affected
+ * by specific libc random() implementations and will output the same sequence
+ * (for the same seed) in every arch. */
+
+/* The following implementation is the one shipped with Lua itself but with
+ * rand() replaced by redisLrand48(). */
+int redis_math_random (lua_State *L) {
+ /* the `%' avoids the (rare) case of r==1, and is needed also because on
+ some systems (SunOS!) `rand()' may return a value larger than RAND_MAX */
+ lua_Number r = (lua_Number)(redisLrand48()%REDIS_LRAND48_MAX) /
+ (lua_Number)REDIS_LRAND48_MAX;
+ switch (lua_gettop(L)) { /* check number of arguments */
+ case 0: { /* no arguments */
+ lua_pushnumber(L, r); /* Number between 0 and 1 */
+ break;
+ }
+ case 1: { /* only upper limit */
+ int u = luaL_checkint(L, 1);
+ luaL_argcheck(L, 1<=u, 1, "interval is empty");
+ lua_pushnumber(L, floor(r*u)+1); /* int between 1 and `u' */
+ break;
+ }
+ case 2: { /* lower and upper limits */
+ int l = luaL_checkint(L, 1);
+ int u = luaL_checkint(L, 2);
+ luaL_argcheck(L, l<=u, 2, "interval is empty");
+ lua_pushnumber(L, floor(r*(u-l+1))+l); /* int between `l' and `u' */
+ break;
+ }
+ default: return luaL_error(L, "wrong number of arguments");
+ }
+ return 1;
+}
+
+int redis_math_randomseed (lua_State *L) {
+ redisSrand48(luaL_checkint(L, 1));
+ return 0;
+}
+
+/* ---------------------------------------------------------------------------
+ * EVAL and SCRIPT commands implementation
+ * ------------------------------------------------------------------------- */
+
/* Define a lua function with the specified function name and body.
- * The function name musts be a 2 characters long string, since all the
+ * The function name musts be a 42 characters long string, since all the
* functions we defined in the Lua context are in the form:
*
* f_<hex sha1 sum>
*
- * On success REDIS_OK is returned, and nothing is left on the Lua stack.
- * On error REDIS_ERR is returned and an appropriate error is set in the
+ * On success C_OK is returned, and nothing is left on the Lua stack.
+ * On error C_ERR is returned and an appropriate error is set in the
* client context. */
-int luaCreateFunction(redisClient *c, lua_State *lua, char *funcname, robj *body) {
+int luaCreateFunction(client *c, lua_State *lua, char *funcname, robj *body) {
sds funcdef = sdsempty();
funcdef = sdscat(funcdef,"function ");
funcdef = sdscatlen(funcdef,funcname,42);
funcdef = sdscatlen(funcdef,"() ",3);
funcdef = sdscatlen(funcdef,body->ptr,sdslen(body->ptr));
- funcdef = sdscatlen(funcdef," end",4);
+ funcdef = sdscatlen(funcdef,"\nend",4);
if (luaL_loadbuffer(lua,funcdef,sdslen(funcdef),"@user_script")) {
addReplyErrorFormat(c,"Error compiling script (new function): %s\n",
lua_tostring(lua,-1));
lua_pop(lua,1);
sdsfree(funcdef);
- return REDIS_ERR;
+ return C_ERR;
}
sdsfree(funcdef);
if (lua_pcall(lua,0,0,0)) {
addReplyErrorFormat(c,"Error running script (new function): %s\n",
lua_tostring(lua,-1));
lua_pop(lua,1);
- return REDIS_ERR;
+ return C_ERR;
}
/* We also save a SHA1 -> Original script map in a dictionary
@@ -877,20 +1172,45 @@ int luaCreateFunction(redisClient *c, lua_State *lua, char *funcname, robj *body
{
int retval = dictAdd(server.lua_scripts,
sdsnewlen(funcname+2,40),body);
- redisAssertWithInfo(c,NULL,retval == DICT_OK);
+ serverAssertWithInfo(c,NULL,retval == DICT_OK);
incrRefCount(body);
}
- return REDIS_OK;
+ return C_OK;
+}
+
+/* This is the Lua script "count" hook that we use to detect scripts timeout. */
+void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
+ long long elapsed;
+ UNUSED(ar);
+ UNUSED(lua);
+
+ elapsed = mstime() - server.lua_time_start;
+ if (elapsed >= server.lua_time_limit && server.lua_timedout == 0) {
+ serverLog(LL_WARNING,"Lua slow script detected: still in execution after %lld milliseconds. You can try killing the script using the SCRIPT KILL command.",elapsed);
+ server.lua_timedout = 1;
+ /* Once the script timeouts we reenter the event loop to permit others
+ * to call SCRIPT KILL or SHUTDOWN NOSAVE if needed. For this reason
+ * we need to mask the client executing the script from the event loop.
+ * If we don't do that the client may disconnect and could no longer be
+ * here when the EVAL command will return. */
+ aeDeleteFileEvent(server.el, server.lua_caller->fd, AE_READABLE);
+ }
+ if (server.lua_timedout) processEventsWhileBlocked();
+ if (server.lua_kill) {
+ serverLog(LL_WARNING,"Lua script killed by user with SCRIPT KILL.");
+ lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");
+ lua_error(lua);
+ }
}
-void evalGenericCommand(redisClient *c, int evalsha) {
+void evalGenericCommand(client *c, int evalsha) {
lua_State *lua = server.lua;
char funcname[43];
long long numkeys;
int delhook = 0, err;
- /* We want the same PRNG sequence at every call so that our PRNG is
- * not affected by external state. */
+ /* When we replicate whole scripts, we want the same PRNG sequence at
+ * every call so that our PRNG is not affected by external state. */
redisSrand48(0);
/* We set this flag to zero to remember that so far no random command
@@ -903,13 +1223,19 @@ void evalGenericCommand(redisClient *c, int evalsha) {
* is called after a random command was used. */
server.lua_random_dirty = 0;
server.lua_write_dirty = 0;
+ server.lua_replicate_commands = server.lua_always_replicate_commands;
+ server.lua_multi_emitted = 0;
+ server.lua_repl = PROPAGATE_AOF|PROPAGATE_REPL;
/* Get the number of arguments that are keys */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&numkeys,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&numkeys,NULL) != C_OK)
return;
if (numkeys > (c->argc - 3)) {
addReplyError(c,"Number of keys can't be greater than number of args");
return;
+ } else if (numkeys < 0) {
+ addReplyError(c,"Number of keys can't be negative");
+ return;
}
/* We obtain the script SHA1, then check if this function is already
@@ -948,15 +1274,15 @@ void evalGenericCommand(redisClient *c, int evalsha) {
addReply(c, shared.noscripterr);
return;
}
- if (luaCreateFunction(c,lua,funcname,c->argv[1]) == REDIS_ERR) {
+ if (luaCreateFunction(c,lua,funcname,c->argv[1]) == C_ERR) {
lua_pop(lua,1); /* remove the error handler from the stack. */
/* The error is sent to the client by luaCreateFunction()
- * itself when it returns REDIS_ERR. */
+ * itself when it returns C_ERR. */
return;
}
/* Now the following is guaranteed to return non nil */
lua_getglobal(lua, funcname);
- redisAssert(!lua_isnil(lua,-1));
+ serverAssert(!lua_isnil(lua,-1));
}
/* Populate the argv and keys table accordingly to the arguments that
@@ -970,13 +1296,21 @@ void evalGenericCommand(redisClient *c, int evalsha) {
/* Set a hook in order to be able to stop the script execution if it
* is running for too much time.
* We set the hook only if the time limit is enabled as the hook will
- * make the Lua script execution slower. */
+ * make the Lua script execution slower.
+ *
+ * If we are debugging, we set instead a "line" hook so that the
+ * debugger is call-back at every line executed by the script. */
server.lua_caller = c;
server.lua_time_start = mstime();
server.lua_kill = 0;
- if (server.lua_time_limit > 0 && server.masterhost == NULL) {
+ if (server.lua_time_limit > 0 && server.masterhost == NULL &&
+ ldb.active == 0)
+ {
lua_sethook(lua,luaMaskCountHook,LUA_MASKCOUNT,100000);
delhook = 1;
+ } else if (ldb.active) {
+ lua_sethook(server.lua,luaLdbLineHook,LUA_MASKLINE|LUA_MASKCOUNT,100000);
+ delhook = 1;
}
/* At this point whether this script was never seen before or if it was
@@ -985,7 +1319,7 @@ void evalGenericCommand(redisClient *c, int evalsha) {
err = lua_pcall(lua,0,1,-2);
/* Perform some cleanup that we need to do both on error and success. */
- if (delhook) lua_sethook(lua,luaMaskCountHook,0,0); /* Disable hook */
+ if (delhook) lua_sethook(lua,NULL,0,0); /* Disable hook */
if (server.lua_timedout) {
server.lua_timedout = 0;
/* Restore the readable handler that was unregistered when the
@@ -1023,6 +1357,19 @@ void evalGenericCommand(redisClient *c, int evalsha) {
lua_pop(lua,1); /* Remove the error handler. */
}
+ /* If we are using single commands replication, emit EXEC if there
+ * was at least a write. */
+ if (server.lua_replicate_commands) {
+ preventCommandPropagation(c);
+ if (server.lua_multi_emitted) {
+ robj *propargv[1];
+ propargv[0] = createStringObject("EXEC",4);
+ alsoPropagate(server.execCommand,c->db->id,propargv,1,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(propargv[0]);
+ }
+ }
+
/* EVALSHA should be propagated to Slave and AOF file as full EVAL, unless
* we are sure that the script was already in the context of all the
* attached slaves *and* the current AOF file if enabled.
@@ -1033,7 +1380,7 @@ void evalGenericCommand(redisClient *c, int evalsha) {
* For repliation, everytime a new slave attaches to the master, we need to
* flush our cache of scripts that can be replicated as EVALSHA, while
* for AOF we need to do so every time we rewrite the AOF file. */
- if (evalsha) {
+ if (evalsha && !server.lua_replicate_commands) {
if (!replicationScriptCacheExists(c->argv[1]->ptr)) {
/* This script is not in our script cache, replicate it as
* EVAL, then add it into the script cache, as from now on
@@ -1041,20 +1388,23 @@ void evalGenericCommand(redisClient *c, int evalsha) {
robj *script = dictFetchValue(server.lua_scripts,c->argv[1]->ptr);
replicationScriptCacheAdd(c->argv[1]->ptr);
- redisAssertWithInfo(c,NULL,script != NULL);
+ serverAssertWithInfo(c,NULL,script != NULL);
rewriteClientCommandArgument(c,0,
resetRefCount(createStringObject("EVAL",4)));
rewriteClientCommandArgument(c,1,script);
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL|REDIS_PROPAGATE_AOF);
+ forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
}
}
}
-void evalCommand(redisClient *c) {
- evalGenericCommand(c,0);
+void evalCommand(client *c) {
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,0);
+ else
+ evalGenericCommandWithDebugging(c,0);
}
-void evalShaCommand(redisClient *c) {
+void evalShaCommand(client *c) {
if (sdslen(c->argv[1]->ptr) != 40) {
/* We know that a match is not possible if the provided SHA is
* not the right length. So we return an error ASAP, this way
@@ -1063,53 +1413,15 @@ void evalShaCommand(redisClient *c) {
addReply(c, shared.noscripterr);
return;
}
- evalGenericCommand(c,1);
-}
-
-/* We replace math.random() with our implementation that is not affected
- * by specific libc random() implementations and will output the same sequence
- * (for the same seed) in every arch. */
-
-/* The following implementation is the one shipped with Lua itself but with
- * rand() replaced by redisLrand48(). */
-int redis_math_random (lua_State *L) {
- /* the `%' avoids the (rare) case of r==1, and is needed also because on
- some systems (SunOS!) `rand()' may return a value larger than RAND_MAX */
- lua_Number r = (lua_Number)(redisLrand48()%REDIS_LRAND48_MAX) /
- (lua_Number)REDIS_LRAND48_MAX;
- switch (lua_gettop(L)) { /* check number of arguments */
- case 0: { /* no arguments */
- lua_pushnumber(L, r); /* Number between 0 and 1 */
- break;
- }
- case 1: { /* only upper limit */
- int u = luaL_checkint(L, 1);
- luaL_argcheck(L, 1<=u, 1, "interval is empty");
- lua_pushnumber(L, floor(r*u)+1); /* int between 1 and `u' */
- break;
- }
- case 2: { /* lower and upper limits */
- int l = luaL_checkint(L, 1);
- int u = luaL_checkint(L, 2);
- luaL_argcheck(L, l<=u, 2, "interval is empty");
- lua_pushnumber(L, floor(r*(u-l+1))+l); /* int between `l' and `u' */
- break;
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,1);
+ else {
+ addReplyError(c,"Please use EVAL instead of EVALSHA for debugging");
+ return;
}
- default: return luaL_error(L, "wrong number of arguments");
- }
- return 1;
-}
-
-int redis_math_randomseed (lua_State *L) {
- redisSrand48(luaL_checkint(L, 1));
- return 0;
}
-/* ---------------------------------------------------------------------------
- * SCRIPT command for script environment introspection and control
- * ------------------------------------------------------------------------- */
-
-void scriptCommand(redisClient *c) {
+void scriptCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
scriptingReset();
addReply(c,shared.ok);
@@ -1135,14 +1447,14 @@ void scriptCommand(redisClient *c) {
sha = sdsnewlen(funcname+2,40);
if (dictFind(server.lua_scripts,sha) == NULL) {
if (luaCreateFunction(c,server.lua,funcname,c->argv[2])
- == REDIS_ERR) {
+ == C_ERR) {
sdsfree(sha);
return;
}
}
addReplyBulkCBuffer(c,funcname+2,40);
sdsfree(sha);
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL|REDIS_PROPAGATE_AOF);
+ forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
if (server.lua_caller == NULL) {
addReplySds(c,sdsnew("-NOTBUSY No scripts in execution right now.\r\n"));
@@ -1152,7 +1464,926 @@ void scriptCommand(redisClient *c) {
server.lua_kill = 1;
addReply(c,shared.ok);
}
+ } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"debug")) {
+ if (clientHasPendingReplies(c)) {
+ addReplyError(c,"SCRIPT DEBUG must be called outside a pipeline");
+ return;
+ }
+ if (!strcasecmp(c->argv[2]->ptr,"no")) {
+ ldbDisable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"yes")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"sync")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ c->flags |= CLIENT_LUA_DEBUG_SYNC;
+ } else {
+ addReplyError(c,"Use SCRIPT DEBUG yes/sync/no");
+ }
} else {
addReplyError(c, "Unknown SCRIPT subcommand or wrong # of args.");
}
}
+
+/* ---------------------------------------------------------------------------
+ * LDB: Redis Lua debugging facilities
+ * ------------------------------------------------------------------------- */
+
+/* Initialize Lua debugger data structures. */
+void ldbInit(void) {
+ ldb.fd = -1;
+ ldb.active = 0;
+ ldb.logs = listCreate();
+ listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree);
+ ldb.children = listCreate();
+ ldb.src = NULL;
+ ldb.lines = 0;
+ ldb.cbuf = sdsempty();
+}
+
+/* Remove all the pending messages in the specified list. */
+void ldbFlushLog(list *log) {
+ listNode *ln;
+
+ while((ln = listFirst(log)) != NULL)
+ listDelNode(log,ln);
+}
+
+/* Enable debug mode of Lua scripts for this client. */
+void ldbEnable(client *c) {
+ c->flags |= CLIENT_LUA_DEBUG;
+ ldbFlushLog(ldb.logs);
+ ldb.fd = c->fd;
+ ldb.step = 1;
+ ldb.bpcount = 0;
+ ldb.luabp = 0;
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+ ldb.maxlen = LDB_MAX_LEN_DEFAULT;
+ ldb.maxlen_hint_sent = 0;
+}
+
+/* Exit debugging mode from the POV of client. This function is not enough
+ * to properly shut down a client debugging session, see ldbEndSession()
+ * for more information. */
+void ldbDisable(client *c) {
+ c->flags &= ~(CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC);
+}
+
+/* Append a log entry to the specified LDB log. */
+void ldbLog(sds entry) {
+ listAddNodeTail(ldb.logs,entry);
+}
+
+/* A version of ldbLog() which prevents producing logs greater than
+ * ldb.maxlen. The first time the limit is reached an hint is generated
+ * to inform the user that reply trimming can be disabled using the
+ * debugger "maxlen" command. */
+void ldbLogWithMaxLen(sds entry) {
+ int trimmed = 0;
+ if (ldb.maxlen && sdslen(entry) > ldb.maxlen) {
+ sdsrange(entry,0,ldb.maxlen-1);
+ entry = sdscatlen(entry," ...",4);
+ trimmed = 1;
+ }
+ ldbLog(entry);
+ if (trimmed && ldb.maxlen_hint_sent == 0) {
+ ldb.maxlen_hint_sent = 1;
+ ldbLog(sdsnew(
+ "<hint> The above reply was trimmed. Use 'maxlen 0' to disable trimming."));
+ }
+}
+
+/* Send ldb.logs to the debugging client as a multi-bulk reply
+ * consisting of simple strings. Log entries which include newlines have them
+ * replaced with spaces. The entries sent are also consumed. */
+void ldbSendLogs(void) {
+ sds proto = sdsempty();
+ proto = sdscatfmt(proto,"*%i\r\n", (int)listLength(ldb.logs));
+ while(listLength(ldb.logs)) {
+ listNode *ln = listFirst(ldb.logs);
+ proto = sdscatlen(proto,"+",1);
+ sdsmapchars(ln->value,"\r\n"," ",2);
+ proto = sdscatsds(proto,ln->value);
+ proto = sdscatlen(proto,"\r\n",2);
+ listDelNode(ldb.logs,ln);
+ }
+ if (write(ldb.fd,proto,sdslen(proto)) == -1) {
+ /* Avoid warning. We don't check the return value of write()
+ * since the next read() will catch the I/O error and will
+ * close the debugging session. */
+ }
+ sdsfree(proto);
+}
+
+/* Start a debugging session before calling EVAL implementation.
+ * The techique we use is to capture the client socket file descriptor,
+ * in order to perform direct I/O with it from within Lua hooks. This
+ * way we don't have to re-enter Redis in order to handle I/O.
+ *
+ * The function returns 1 if the caller should proceed to call EVAL,
+ * and 0 if instead the caller should abort the operation (this happens
+ * for the parent in a forked session, since it's up to the children
+ * to continue, or when fork returned an error).
+ *
+ * The caller should call ldbEndSession() only if ldbStartSession()
+ * returned 1. */
+int ldbStartSession(client *c) {
+ ldb.forked = (c->flags & CLIENT_LUA_DEBUG_SYNC) == 0;
+ if (ldb.forked) {
+ pid_t cp = fork();
+ if (cp == -1) {
+ addReplyError(c,"Fork() failed: can't run EVAL in debugging mode.");
+ return 0;
+ } else if (cp == 0) {
+ /* Child. Let's ignore important signals handled by the parent. */
+ struct sigaction act;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+
+ /* Log the creation of the child and close the listening
+ * socket to make sure if the parent crashes a reset is sent
+ * to the clients. */
+ serverLog(LL_WARNING,"Redis forked for debugging eval");
+ closeListeningSockets(0);
+ } else {
+ /* Parent */
+ listAddNodeTail(ldb.children,(void*)(unsigned long)cp);
+ freeClientAsync(c); /* Close the client in the parent side. */
+ return 0;
+ }
+ } else {
+ serverLog(LL_WARNING,
+ "Redis synchronous debugging eval session started");
+ }
+
+ /* Setup our debugging session. */
+ anetBlock(NULL,ldb.fd);
+ anetSendTimeout(NULL,ldb.fd,5000);
+ ldb.active = 1;
+
+ /* First argument of EVAL is the script itself. We split it into different
+ * lines since this is the way the debugger accesses the source code. */
+ sds srcstring = sdsdup(c->argv[1]->ptr);
+ size_t srclen = sdslen(srcstring);
+ while(srclen && (srcstring[srclen-1] == '\n' ||
+ srcstring[srclen-1] == '\r'))
+ {
+ srcstring[--srclen] = '\0';
+ }
+ sdssetlen(srcstring,srclen);
+ ldb.src = sdssplitlen(srcstring,sdslen(srcstring),"\n",1,&ldb.lines);
+ sdsfree(srcstring);
+ return 1;
+}
+
+/* End a debugging session after the EVAL call with debugging enabled
+ * returned. */
+void ldbEndSession(client *c) {
+ /* Emit the remaining logs and an <endsession> mark. */
+ ldbLog(sdsnew("<endsession>"));
+ ldbSendLogs();
+
+ /* If it's a fork()ed session, we just exit. */
+ if (ldb.forked) {
+ writeToClient(c->fd, c, 0);
+ serverLog(LL_WARNING,"Lua debugging session child exiting");
+ exitFromChild(0);
+ } else {
+ serverLog(LL_WARNING,
+ "Redis synchronous debugging eval session ended");
+ }
+
+ /* Otherwise let's restore client's state. */
+ anetNonBlock(NULL,ldb.fd);
+ anetSendTimeout(NULL,ldb.fd,0);
+
+ /* Close the client connectin after sending the final EVAL reply
+ * in order to signal the end of the debugging session. */
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+
+ /* Cleanup. */
+ sdsfreesplitres(ldb.src,ldb.lines);
+ ldb.lines = 0;
+ ldb.active = 0;
+}
+
+/* If the specified pid is among the list of children spawned for
+ * forked debugging sessions, it is removed from the children list.
+ * If the pid was found non-zero is returned. */
+int ldbRemoveChild(pid_t pid) {
+ listNode *ln = listSearchKey(ldb.children,(void*)(unsigned long)pid);
+ if (ln) {
+ listDelNode(ldb.children,ln);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return the number of children we still did not received termination
+ * acknowledge via wait() in the parent process. */
+int ldbPendingChildren(void) {
+ return listLength(ldb.children);
+}
+
+/* Kill all the forked sessions. */
+void ldbKillForkedSessions(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(ldb.children,&li);
+ while((ln = listNext(&li))) {
+ pid_t pid = (unsigned long) ln->value;
+ serverLog(LL_WARNING,"Killing debugging session %ld",(long)pid);
+ kill(pid,SIGKILL);
+ }
+ listRelease(ldb.children);
+ ldb.children = listCreate();
+}
+
+/* Wrapper for EVAL / EVALSHA that enables debugging, and makes sure
+ * that when EVAL returns, whatever happened, the session is ended. */
+void evalGenericCommandWithDebugging(client *c, int evalsha) {
+ if (ldbStartSession(c)) {
+ evalGenericCommand(c,evalsha);
+ ldbEndSession(c);
+ } else {
+ ldbDisable(c);
+ }
+}
+
+/* Return a pointer to ldb.src source code line, considering line to be
+ * one-based, and returning a special string for out of range lines. */
+char *ldbGetSourceLine(int line) {
+ int idx = line-1;
+ if (idx < 0 || idx >= ldb.lines) return "<out of range source code line>";
+ return ldb.src[idx];
+}
+
+/* Return true if there is a breakpoint in the specified line. */
+int ldbIsBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++)
+ if (ldb.bp[j] == line) return 1;
+ return 0;
+}
+
+/* Add the specified breakpoint. Ignore it if we already reached the max.
+ * Returns 1 if the breakpoint was added (or was already set). 0 if there is
+ * no space for the breakpoint or if the line is invalid. */
+int ldbAddBreakpoint(int line) {
+ if (line <= 0 || line > ldb.lines) return 0;
+ if (!ldbIsBreakpoint(line) && ldb.bpcount != LDB_BREAKPOINTS_MAX) {
+ ldb.bp[ldb.bpcount++] = line;
+ return 1;
+ }
+ return 0;
+}
+
+/* Remove the specified breakpoint, returning 1 if the operation was
+ * performed or 0 if there was no such breakpoint. */
+int ldbDelBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++) {
+ if (ldb.bp[j] == line) {
+ ldb.bpcount--;
+ memmove(ldb.bp+j,ldb.bp+j+1,ldb.bpcount-j);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Expect a valid multi-bulk command in the debugging client query buffer.
+ * On success the command is parsed and returned as an array of SDS strings,
+ * otherwise NULL is returned and there is to read more buffer. */
+sds *ldbReplParseCommand(int *argcp) {
+ sds *argv = NULL;
+ int argc = 0;
+ if (sdslen(ldb.cbuf) == 0) return NULL;
+
+ /* Working on a copy is simpler in this case. We can modify it freely
+ * for the sake of simpler parsing. */
+ sds copy = sdsdup(ldb.cbuf);
+ char *p = copy;
+
+ /* This Redis protocol parser is a joke... just the simplest thing that
+ * works in this context. It is also very forgiving regarding broken
+ * protocol. */
+
+ /* Seek and parse *<count>\r\n. */
+ p = strchr(p,'*'); if (!p) goto protoerr;
+ char *plen = p+1; /* Multi bulk len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto protoerr;
+ *p = '\0'; p += 2;
+ *argcp = atoi(plen);
+ if (*argcp <= 0 || *argcp > 1024) goto protoerr;
+
+ /* Parse each argument. */
+ argv = zmalloc(sizeof(sds)*(*argcp));
+ argc = 0;
+ while(argc < *argcp) {
+ if (*p != '$') goto protoerr;
+ plen = p+1; /* Bulk string len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto protoerr;
+ *p = '\0'; p += 2;
+ int slen = atoi(plen); /* Length of this arg. */
+ if (slen <= 0 || slen > 1024) goto protoerr;
+ argv[argc++] = sdsnewlen(p,slen);
+ p += slen; /* Skip the already parsed argument. */
+ if (p[0] != '\r' || p[1] != '\n') goto protoerr;
+ p += 2; /* Skip \r\n. */
+ }
+ sdsfree(copy);
+ return argv;
+
+protoerr:
+ sdsfreesplitres(argv,argc);
+ sdsfree(copy);
+ return NULL;
+}
+
+/* Log the specified line in the Lua debugger output. */
+void ldbLogSourceLine(int lnum) {
+ char *line = ldbGetSourceLine(lnum);
+ char *prefix;
+ int bp = ldbIsBreakpoint(lnum);
+ int current = ldb.currentline == lnum;
+
+ if (current && bp)
+ prefix = "->#";
+ else if (current)
+ prefix = "-> ";
+ else if (bp)
+ prefix = " #";
+ else
+ prefix = " ";
+ sds thisline = sdscatprintf(sdsempty(),"%s%-3d %s", prefix, lnum, line);
+ ldbLog(thisline);
+}
+
+/* Implement the "list" command of the Lua debugger. If around is 0
+ * the whole file is listed, otherwise only a small portion of the file
+ * around the specified line is shown. When a line number is specified
+ * the amonut of context (lines before/after) is specified via the
+ * 'context' argument. */
+void ldbList(int around, int context) {
+ int j;
+
+ for (j = 1; j <= ldb.lines; j++) {
+ if (around != 0 && abs(around-j) > context) continue;
+ ldbLogSourceLine(j);
+ }
+}
+
+/* Append an human readable representation of the Lua value at position 'idx'
+ * on the stack of the 'lua' state, to the SDS string passed as argument.
+ * The new SDS string with the represented value attached is returned.
+ * Used in order to implement ldbLogStackValue().
+ *
+ * The element is not automatically removed from the stack, nor it is
+ * converted to a different type. */
+#define LDB_MAX_VALUES_DEPTH (LUA_MINSTACK/2)
+sds ldbCatStackValueRec(sds s, lua_State *lua, int idx, int level) {
+ int t = lua_type(lua,idx);
+
+ if (level++ == LDB_MAX_VALUES_DEPTH)
+ return sdscat(s,"<max recursion level reached! Nested table?>");
+
+ switch(t) {
+ case LUA_TSTRING:
+ {
+ size_t strl;
+ char *strp = (char*)lua_tolstring(lua,idx,&strl);
+ s = sdscatrepr(s,strp,strl);
+ }
+ break;
+ case LUA_TBOOLEAN:
+ s = sdscat(s,lua_toboolean(lua,idx) ? "true" : "false");
+ break;
+ case LUA_TNUMBER:
+ s = sdscatprintf(s,"%g",(double)lua_tonumber(lua,idx));
+ break;
+ case LUA_TNIL:
+ s = sdscatlen(s,"nil",3);
+ break;
+ case LUA_TTABLE:
+ {
+ int expected_index = 1; /* First index we expect in an array. */
+ int is_array = 1; /* Will be set to null if check fails. */
+ /* Note: we create two representations at the same time, one
+ * assuming the table is an array, one assuming it is not. At the
+ * end we know what is true and select the right one. */
+ sds repr1 = sdsempty();
+ sds repr2 = sdsempty();
+ lua_pushnil(lua); /* The first key to start the iteration is nil. */
+ while (lua_next(lua,idx-1)) {
+ /* Test if so far the table looks like an array. */
+ if (is_array &&
+ (lua_type(lua,-2) != LUA_TNUMBER ||
+ lua_tonumber(lua,-2) != expected_index)) is_array = 0;
+ /* Stack now: table, key, value */
+ /* Array repr. */
+ repr1 = ldbCatStackValueRec(repr1,lua,-1,level);
+ repr1 = sdscatlen(repr1,"; ",2);
+ /* Full repr. */
+ repr2 = sdscatlen(repr2,"[",1);
+ repr2 = ldbCatStackValueRec(repr2,lua,-2,level);
+ repr2 = sdscatlen(repr2,"]=",2);
+ repr2 = ldbCatStackValueRec(repr2,lua,-1,level);
+ repr2 = sdscatlen(repr2,"; ",2);
+ lua_pop(lua,1); /* Stack: table, key. Ready for next iteration. */
+ expected_index++;
+ }
+ /* Strip the last " ;" from both the representations. */
+ if (sdslen(repr1)) sdsrange(repr1,0,-3);
+ if (sdslen(repr2)) sdsrange(repr2,0,-3);
+ /* Select the right one and discard the other. */
+ s = sdscatlen(s,"{",1);
+ s = sdscatsds(s,is_array ? repr1 : repr2);
+ s = sdscatlen(s,"}",1);
+ sdsfree(repr1);
+ sdsfree(repr2);
+ }
+ break;
+ case LUA_TFUNCTION:
+ case LUA_TUSERDATA:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ {
+ const void *p = lua_topointer(lua,idx);
+ char *typename = "unknown";
+ if (t == LUA_TFUNCTION) typename = "function";
+ else if (t == LUA_TUSERDATA) typename = "userdata";
+ else if (t == LUA_TTHREAD) typename = "thread";
+ else if (t == LUA_TLIGHTUSERDATA) typename = "light-userdata";
+ s = sdscatprintf(s,"\"%s@%p\"",typename,p);
+ }
+ break;
+ default:
+ s = sdscat(s,"\"<unknown-lua-type>\"");
+ break;
+ }
+ return s;
+}
+
+/* Higher level wrapper for ldbCatStackValueRec() that just uses an initial
+ * recursion level of '0'. */
+sds ldbCatStackValue(sds s, lua_State *lua, int idx) {
+ return ldbCatStackValueRec(s,lua,idx,0);
+}
+
+/* Produce a debugger log entry representing the value of the Lua object
+ * currently on the top of the stack. The element is ot popped nor modified.
+ * Check ldbCatStackValue() for the actual implementation. */
+void ldbLogStackValue(lua_State *lua, char *prefix) {
+ sds s = sdsnew(prefix);
+ s = ldbCatStackValue(s,lua,-1);
+ ldbLogWithMaxLen(s);
+}
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply);
+
+/* Get Redis protocol from 'reply' and appends it in human readable form to
+ * the passed SDS string 'o'.
+ *
+ * Note that the SDS string is passed by reference (pointer of pointer to
+ * char*) so that we can return a modified pointer, as for SDS semantics. */
+char *ldbRedisProtocolToHuman(sds *o, char *reply) {
+ char *p = reply;
+ switch(*p) {
+ case ':': p = ldbRedisProtocolToHuman_Int(o,reply); break;
+ case '$': p = ldbRedisProtocolToHuman_Bulk(o,reply); break;
+ case '+': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '-': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '*': p = ldbRedisProtocolToHuman_MultiBulk(o,reply); break;
+ }
+ return p;
+}
+
+/* The following functions are helpers for ldbRedisProtocolToHuman(), each
+ * take care of a given Redis return type. */
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ *o = sdscatlen(*o,reply+1,p-reply-1);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long bulklen;
+
+ string2ll(reply+1,p-reply-1,&bulklen);
+ if (bulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p+2;
+ } else {
+ *o = sdscatrepr(*o,p+2,bulklen);
+ return p+2+bulklen+2;
+ }
+}
+
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+
+ *o = sdscatrepr(*o,reply,p-reply);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long mbulklen;
+ int j = 0;
+
+ string2ll(reply+1,p-reply-1,&mbulklen);
+ p += 2;
+ if (mbulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p;
+ }
+ *o = sdscatlen(*o,"[",1);
+ for (j = 0; j < mbulklen; j++) {
+ p = ldbRedisProtocolToHuman(o,p);
+ if (j != mbulklen-1) *o = sdscatlen(*o,",",1);
+ }
+ *o = sdscatlen(*o,"]",1);
+ return p;
+}
+
+/* Log a Redis reply as debugger output, in an human readable format.
+ * If the resulting string is longer than 'len' plus a few more chars
+ * used as prefix, it gets truncated. */
+void ldbLogRedisReply(char *reply) {
+ sds log = sdsnew("<reply> ");
+ ldbRedisProtocolToHuman(&log,reply);
+ ldbLogWithMaxLen(log);
+}
+
+/* Implements the "print <var>" command of the Lua debugger. It scans for Lua
+ * var "varname" starting from the current stack frame up to the top stack
+ * frame. The first matching variable is printed. */
+void ldbPrint(lua_State *lua, char *varname) {
+ lua_Debug ar;
+
+ int l = 0; /* Stack level. */
+ while (lua_getstack(lua,l,&ar) != 0) {
+ l++;
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (strcmp(varname,name) == 0) {
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ return;
+ } else {
+ lua_pop(lua,1); /* Discard the var name on the stack. */
+ }
+ }
+ }
+
+ /* Let's try with global vars in two selected cases */
+ if (!strcmp(varname,"ARGV") || !strcmp(varname,"KEYS")) {
+ lua_getglobal(lua, varname);
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ } else {
+ ldbLog(sdsnew("No such variable."));
+ }
+}
+
+/* Implements the "print" command (without arguments) of the Lua debugger.
+ * Prints all the variables in the current stack frame. */
+void ldbPrintAll(lua_State *lua) {
+ lua_Debug ar;
+ int vars = 0;
+
+ if (lua_getstack(lua,0,&ar) != 0) {
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (!strstr(name,"(*temporary)")) {
+ sds prefix = sdscatprintf(sdsempty(),"<value> %s = ",name);
+ ldbLogStackValue(lua,prefix);
+ sdsfree(prefix);
+ vars++;
+ }
+ lua_pop(lua,1);
+ }
+ }
+
+ if (vars == 0) {
+ ldbLog(sdsnew("No local variables in the current context."));
+ }
+}
+
+/* Implements the break command to list, add and remove breakpoints. */
+void ldbBreak(sds *argv, int argc) {
+ if (argc == 1) {
+ if (ldb.bpcount == 0) {
+ ldbLog(sdsnew("No breakpoints set. Use 'b <line>' to add one."));
+ return;
+ } else {
+ ldbLog(sdscatfmt(sdsempty(),"%i breakpoints set:",ldb.bpcount));
+ int j;
+ for (j = 0; j < ldb.bpcount; j++)
+ ldbLogSourceLine(ldb.bp[j]);
+ }
+ } else {
+ int j;
+ for (j = 1; j < argc; j++) {
+ char *arg = argv[j];
+ long line;
+ if (!string2l(arg,sdslen(arg),&line)) {
+ ldbLog(sdscatfmt(sdsempty(),"Invalid argument:'%s'",arg));
+ } else {
+ if (line == 0) {
+ ldb.bpcount = 0;
+ ldbLog(sdsnew("All breakpoints removed."));
+ } else if (line > 0) {
+ if (ldb.bpcount == LDB_BREAKPOINTS_MAX) {
+ ldbLog(sdsnew("Too many breakpoints set."));
+ } else if (ldbAddBreakpoint(line)) {
+ ldbList(line,1);
+ } else {
+ ldbLog(sdsnew("Wrong line number."));
+ }
+ } else if (line < 0) {
+ if (ldbDelBreakpoint(-line))
+ ldbLog(sdsnew("Breakpoint removed."));
+ else
+ ldbLog(sdsnew("No breakpoint in the specified line."));
+ }
+ }
+ }
+ }
+}
+
+/* Implements the Lua debugger "eval" command. It just compiles the user
+ * passed fragment of code and executes it, showing the result left on
+ * the stack. */
+void ldbEval(lua_State *lua, sds *argv, int argc) {
+ /* Glue the script together if it is composed of multiple arguments. */
+ sds code = sdsjoinsds(argv+1,argc-1," ",1);
+ sds expr = sdscatsds(sdsnew("return "),code);
+
+ /* Try to compile it as an expression, prepending "return ". */
+ if (luaL_loadbuffer(lua,expr,sdslen(expr),"@ldb_eval")) {
+ lua_pop(lua,1);
+ /* Failed? Try as a statement. */
+ if (luaL_loadbuffer(lua,code,sdslen(code),"@ldb_eval")) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ sdsfree(code);
+ return;
+ }
+ }
+
+ /* Call it. */
+ sdsfree(code);
+ sdsfree(expr);
+ if (lua_pcall(lua,0,1,0)) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ return;
+ }
+ ldbLogStackValue(lua,"<retval> ");
+ lua_pop(lua,1);
+}
+
+/* Implement the debugger "redis" command. We use a trick in order to make
+ * the implementation very simple: we just call the Lua redis.call() command
+ * implementation, with ldb.step enabled, so as a side effect the Redis command
+ * and its reply are logged. */
+void ldbRedis(lua_State *lua, sds *argv, int argc) {
+ int j, saved_rc = server.lua_replicate_commands;
+
+ lua_getglobal(lua,"redis");
+ lua_pushstring(lua,"call");
+ lua_gettable(lua,-2); /* Stack: redis, redis.call */
+ for (j = 1; j < argc; j++)
+ lua_pushlstring(lua,argv[j],sdslen(argv[j]));
+ ldb.step = 1; /* Force redis.call() to log. */
+ server.lua_replicate_commands = 1;
+ lua_pcall(lua,argc-1,1,0); /* Stack: redis, result */
+ ldb.step = 0; /* Disable logging. */
+ server.lua_replicate_commands = saved_rc;
+ lua_pop(lua,2); /* Discard the result and clean the stack. */
+}
+
+/* Implements "trace" command of the Lua debugger. It just prints a backtrace
+ * querying Lua starting from the current callframe back to the outer one. */
+void ldbTrace(lua_State *lua) {
+ lua_Debug ar;
+ int level = 0;
+
+ while(lua_getstack(lua,level,&ar)) {
+ lua_getinfo(lua,"Snl",&ar);
+ if(strstr(ar.short_src,"user_script") != NULL) {
+ ldbLog(sdscatprintf(sdsempty(),"%s %s:",
+ (level == 0) ? "In" : "From",
+ ar.name ? ar.name : "top level"));
+ ldbLogSourceLine(ar.currentline);
+ }
+ level++;
+ }
+ if (level == 0) {
+ ldbLog(sdsnew("<error> Can't retrieve Lua stack."));
+ }
+}
+
+/* Impleemnts the debugger "maxlen" command. It just queries or sets the
+ * ldb.maxlen variable. */
+void ldbMaxlen(sds *argv, int argc) {
+ if (argc == 2) {
+ int newval = atoi(argv[1]);
+ ldb.maxlen_hint_sent = 1; /* User knows about this command. */
+ if (newval != 0 && newval <= 60) newval = 60;
+ ldb.maxlen = newval;
+ }
+ if (ldb.maxlen) {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are truncated at %d bytes.",(int)ldb.maxlen));
+ } else {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are unlimited."));
+ }
+}
+
+/* Read debugging commands from client.
+ * Return C_OK if the debugging session is continuing, otherwise
+ * C_ERR if the client closed the connection or is timing out. */
+int ldbRepl(lua_State *lua) {
+ sds *argv;
+ int argc;
+
+ /* We continue processing commands until a command that should return
+ * to the Lua interpreter is found. */
+ while(1) {
+ while((argv = ldbReplParseCommand(&argc)) == NULL) {
+ char buf[1024];
+ int nread = read(ldb.fd,buf,sizeof(buf));
+ if (nread <= 0) {
+ /* Make sure the script runs without user input since the
+ * client is no longer connected. */
+ ldb.step = 0;
+ ldb.bpcount = 0;
+ return C_ERR;
+ }
+ ldb.cbuf = sdscatlen(ldb.cbuf,buf,nread);
+ }
+
+ /* Flush the old buffer. */
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+
+ /* Execute the command. */
+ if (!strcasecmp(argv[0],"h") || !strcasecmp(argv[0],"help")) {
+ldbLog(sdsnew("Redis Lua debugger help:"));
+ldbLog(sdsnew("[h]elp Show this help."));
+ldbLog(sdsnew("[s]tep Run current line and stop again."));
+ldbLog(sdsnew("[n]ext Alias for step."));
+ldbLog(sdsnew("[c]continue Run till next breakpoint."));
+ldbLog(sdsnew("[l]list List source code around current line."));
+ldbLog(sdsnew("[l]list [line] List source code around [line]."));
+ldbLog(sdsnew(" line = 0 means: current position."));
+ldbLog(sdsnew("[l]list [line] [ctx] In this form [ctx] specifies how many lines"));
+ldbLog(sdsnew(" to show before/after [line]."));
+ldbLog(sdsnew("[w]hole List all source code. Alias for 'list 1 1000000'."));
+ldbLog(sdsnew("[p]rint Show all the local variables."));
+ldbLog(sdsnew("[p]rint <var> Show the value of the specified variable."));
+ldbLog(sdsnew(" Can also show global vars KEYS and ARGV."));
+ldbLog(sdsnew("[b]reak Show all breakpoints."));
+ldbLog(sdsnew("[b]reak <line> Add a breakpoint to the specified line."));
+ldbLog(sdsnew("[b]reak -<line> Remove breakpoint from the specified line."));
+ldbLog(sdsnew("[b]reak 0 Remove all breakpoints."));
+ldbLog(sdsnew("[t]race Show a backtrace."));
+ldbLog(sdsnew("[e]eval <code> Execute some Lua code (in a different callframe)."));
+ldbLog(sdsnew("[r]edis <cmd> Execute a Redis command."));
+ldbLog(sdsnew("[m]axlen [len] Trim logged Redis replies and Lua var dumps to len."));
+ldbLog(sdsnew(" Specifying zero as <len> means unlimited."));
+ldbLog(sdsnew("[a]bort Stop the execution of the script. In sync"));
+ldbLog(sdsnew(" mode dataset changes will be retained."));
+ldbLog(sdsnew(""));
+ldbLog(sdsnew("Debugger functions you can call from Lua scripts:"));
+ldbLog(sdsnew("redis.debug() Produce logs in the debugger console."));
+ldbLog(sdsnew("redis.breakpoint() Stop execution like if there was a breakpoing."));
+ldbLog(sdsnew(" in the next line of code."));
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"s") || !strcasecmp(argv[0],"step") ||
+ !strcasecmp(argv[0],"n") || !strcasecmp(argv[0],"next")) {
+ ldb.step = 1;
+ break;
+ } else if (!strcasecmp(argv[0],"c") || !strcasecmp(argv[0],"continue")){
+ break;
+ } else if (!strcasecmp(argv[0],"t") || !strcasecmp(argv[0],"trace")) {
+ ldbTrace(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"m") || !strcasecmp(argv[0],"maxlen")) {
+ ldbMaxlen(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"b") || !strcasecmp(argv[0],"break")) {
+ ldbBreak(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"e") || !strcasecmp(argv[0],"eval")) {
+ ldbEval(lua,argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"a") || !strcasecmp(argv[0],"abort")) {
+ lua_pushstring(lua, "script aborted for user request");
+ lua_error(lua);
+ } else if (argc > 1 &&
+ (!strcasecmp(argv[0],"r") || !strcasecmp(argv[0],"redis"))) {
+ ldbRedis(lua,argv,argc);
+ ldbSendLogs();
+ } else if ((!strcasecmp(argv[0],"p") || !strcasecmp(argv[0],"print"))) {
+ if (argc == 2)
+ ldbPrint(lua,argv[1]);
+ else
+ ldbPrintAll(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"l") || !strcasecmp(argv[0],"list")){
+ int around = ldb.currentline, ctx = 5;
+ if (argc > 1) {
+ int num = atoi(argv[1]);
+ if (num > 0) around = num;
+ }
+ if (argc > 2) ctx = atoi(argv[2]);
+ ldbList(around,ctx);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"w") || !strcasecmp(argv[0],"whole")){
+ ldbList(1,1000000);
+ ldbSendLogs();
+ } else {
+ ldbLog(sdsnew("<error> Unknown Redis Lua debugger command or "
+ "wrong number of arguments."));
+ ldbSendLogs();
+ }
+
+ /* Free the command vector. */
+ sdsfreesplitres(argv,argc);
+ }
+
+ /* Free the current command argv if we break inside the while loop. */
+ sdsfreesplitres(argv,argc);
+ return C_OK;
+}
+
+/* This is the core of our Lua debugger, called each time Lua is about
+ * to start executing a new line. */
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar) {
+ lua_getstack(lua,0,ar);
+ lua_getinfo(lua,"Sl",ar);
+ ldb.currentline = ar->currentline;
+
+ int bp = ldbIsBreakpoint(ldb.currentline) || ldb.luabp;
+ int timeout = 0;
+
+ /* Events outside our script are not interesting. */
+ if(strstr(ar->short_src,"user_script") == NULL) return;
+
+ /* Check if a timeout occurred. */
+ if (ar->event == LUA_HOOKCOUNT && ldb.step == 0 && bp == 0) {
+ mstime_t elapsed = mstime() - server.lua_time_start;
+ mstime_t timelimit = server.lua_time_limit ?
+ server.lua_time_limit : 5000;
+ if (elapsed >= timelimit) {
+ timeout = 1;
+ ldb.step = 1;
+ } else {
+ return; /* No timeout, ignore the COUNT event. */
+ }
+ }
+
+ if (ldb.step || bp) {
+ char *reason = "step over";
+ if (bp) reason = ldb.luabp ? "redis.breakpoint() called" :
+ "break point";
+ else if (timeout) reason = "timeout reached, infinite loop?";
+ ldb.step = 0;
+ ldb.luabp = 0;
+ ldbLog(sdscatprintf(sdsempty(),
+ "* Stopped at %d, stop reason = %s",
+ ldb.currentline, reason));
+ ldbLogSourceLine(ldb.currentline);
+ ldbSendLogs();
+ if (ldbRepl(lua) == C_ERR && timeout) {
+ /* If the client closed the connection and we have a timeout
+ * connection, let's kill the script otherwise the process
+ * will remain blocked indefinitely. */
+ lua_pushstring(lua, "timeout during Lua debugging with client closing connection");
+ lua_error(lua);
+ }
+ server.lua_time_start = mstime();
+ }
+}
+
diff --git a/src/sds.c b/src/sds.c
index dc07d0d3a..eafa13c29 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,8 +35,39 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#include <limits.h>
#include "sds.h"
-#include "zmalloc.h"
+#include "sdsalloc.h"
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 1<<5)
+ return SDS_TYPE_5;
+ if (string_size < 1<<8)
+ return SDS_TYPE_8;
+ if (string_size < 1<<16)
+ return SDS_TYPE_16;
+#if (LONG_MAX == LLONG_MAX)
+ if (string_size < 1ll<<32)
+ return SDS_TYPE_32;
+#endif
+ return SDS_TYPE_64;
+}
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
@@ -43,26 +76,65 @@
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
- * mystring = sdsnewlen("abc",3");
+ * mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
- struct sdshdr *sh;
-
- if (init) {
- sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
- } else {
- sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
- }
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+
+ sh = s_malloc(hdrlen+initlen+1);
+ if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
if (sh == NULL) return NULL;
- sh->len = initlen;
- sh->free = 0;
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ }
if (initlen && init)
- memcpy(sh->buf, init, initlen);
- sh->buf[initlen] = '\0';
- return (char*)sh->buf;
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
}
/* Create an empty (zero length) sds string. Even in this case the string
@@ -71,7 +143,7 @@ sds sdsempty(void) {
return sdsnewlen("",0);
}
-/* Create a new sds string starting from a null termined C string. */
+/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
@@ -85,7 +157,7 @@ sds sdsdup(const sds s) {
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
- zfree(s-sizeof(struct sdshdr));
+ s_free((char*)s-sdsHdrSize(s[-1]));
}
/* Set the sds string length to the length as obtained with strlen(), so
@@ -103,21 +175,17 @@ void sdsfree(sds s) {
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
void sdsupdatelen(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
int reallen = strlen(s);
- sh->free += (sh->len-reallen);
- sh->len = reallen;
+ sdssetlen(s, reallen);
}
-/* Modify an sds string on-place to make it empty (zero length).
+/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
void sdsclear(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- sh->free += sh->len;
- sh->len = 0;
- sh->buf[0] = '\0';
+ sdssetlen(s, 0);
+ s[0] = '\0';
}
/* Enlarge the free space at the end of the sds string so that the caller
@@ -127,23 +195,48 @@ void sdsclear(sds s) {
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
- struct sdshdr *sh, *newsh;
- size_t free = sdsavail(s);
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
size_t len, newlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
- if (free >= addlen) return s;
len = sdslen(s);
- sh = (void*) (s-(sizeof(struct sdshdr)));
+ sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
- newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
- if (newsh == NULL) return NULL;
- newsh->free = newlen - len;
- return newsh->buf;
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc(hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, newlen);
+ return s;
}
/* Reallocate the sds string so that it has no free space at the end. The
@@ -153,12 +246,29 @@ sds sdsMakeRoomFor(sds s, size_t addlen) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
- struct sdshdr *sh;
-
- sh = (void*) (s-(sizeof(struct sdshdr)));
- sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
- sh->free = 0;
- return sh->buf;
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+ size_t len = sdslen(s);
+ sh = (char*)s-sdsHdrSize(oldtype);
+
+ type = sdsReqType(len);
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ newsh = s_malloc(hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, len);
+ return s;
}
/* Return the total size of the allocation of the specifed sds string,
@@ -169,9 +279,14 @@ sds sdsRemoveFreeSpace(sds s) {
* 4) The implicit null term.
*/
size_t sdsAllocSize(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
- return sizeof(*sh)+sh->len+sh->free+1;
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
}
/* Increment the sds length and decrements the left free space at the
@@ -198,13 +313,44 @@ size_t sdsAllocSize(sds s) {
* sdsIncrLen(s, nread);
*/
void sdsIncrLen(sds s, int incr) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
-
- assert(sh->free >= incr);
- sh->len += incr;
- sh->free -= incr;
- assert(sh->free >= 0);
- s[sh->len] = '\0';
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
}
/* Grow the sds to have the specified length. Bytes that were not part of
@@ -213,19 +359,15 @@ void sdsIncrLen(sds s, int incr) {
* if the specified length is smaller than the current length, no operation
* is performed. */
sds sdsgrowzero(sds s, size_t len) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- size_t totlen, curlen = sh->len;
+ size_t curlen = sdslen(s);
if (len <= curlen) return s;
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;
/* Make sure added region doesn't contain garbage */
- sh = (void*)(s-(sizeof(struct sdshdr)));
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
- totlen = sh->len+sh->free;
- sh->len = len;
- sh->free = totlen-sh->len;
+ sdssetlen(s, len);
return s;
}
@@ -235,15 +377,12 @@ sds sdsgrowzero(sds s, size_t len) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
- struct sdshdr *sh;
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
- sh->len = curlen+len;
- sh->free = sh->free-len;
+ sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
@@ -267,19 +406,13 @@ sds sdscatsds(sds s, const sds t) {
/* Destructively modify the sds string 's' to hold the specified binary
* safe string pointed by 't' of length 'len' bytes. */
sds sdscpylen(sds s, const char *t, size_t len) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t totlen = sh->free+sh->len;
-
- if (totlen < len) {
- s = sdsMakeRoomFor(s,len-sh->len);
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
- totlen = sh->free+sh->len;
}
memcpy(s, t, len);
s[len] = '\0';
- sh->len = len;
- sh->free = totlen-len;
+ sdssetlen(s, len);
return s;
}
@@ -293,7 +426,7 @@ sds sdscpy(sds s, const char *t) {
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
- * The function returns the lenght of the null-terminated string
+ * The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) {
@@ -367,7 +500,7 @@ sds sdsfromlonglong(long long value) {
return sdsnewlen(buf,len);
}
-/* Like sdscatpritf() but gets va_list instead of being variadic. */
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
@@ -376,7 +509,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
@@ -388,10 +521,11 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
+ va_end(cpy);
if (buf[buflen-2] != '\0') {
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
buflen *= 2;
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
@@ -400,7 +534,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
return t;
}
@@ -412,7 +546,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
*
* Example:
*
- * s = sdsempty("Sum is: ");
+ * s = sdsnew("Sum is: ");
* s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
*
* Often you need to create a string from scratch with the printf-alike
@@ -446,7 +580,6 @@ sds sdscatprintf(sds s, const char *fmt, ...) {
* %% - Verbatim "%" character.
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t initlen = sdslen(s);
const char *f = fmt;
int i;
@@ -457,14 +590,13 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
- int l;
+ size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
- if (sh->free == 0) {
+ if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
@@ -476,13 +608,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
break;
case 'i':
@@ -494,13 +624,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
@@ -513,27 +641,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
f++;
@@ -554,25 +678,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
- * s = sdstrim(s,"A. :");
+ * s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/
sds sdstrim(sds s, const char *cset) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
- while(ep > start && strchr(cset, *ep)) ep--;
+ while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
- if (sh->buf != sp) memmove(sh->buf, sp, len);
- sh->buf[len] = '\0';
- sh->free = sh->free+(sh->len-len);
- sh->len = len;
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
return s;
}
@@ -593,7 +715,6 @@ sds sdstrim(sds s, const char *cset) {
* sdsrange(s,1,-1); => "ello World"
*/
void sdsrange(sds s, int start, int end) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
if (len == 0) return;
@@ -616,10 +737,9 @@ void sdsrange(sds s, int start, int end) {
} else {
start = 0;
}
- if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
- sh->buf[newlen] = 0;
- sh->free = sh->free+(sh->len-newlen);
- sh->len = newlen;
+ if (start && newlen) memmove(s, s+start, newlen);
+ s[newlen] = 0;
+ sdssetlen(s,newlen);
}
/* Apply tolower() to every character of the sds string 's'. */
@@ -640,8 +760,8 @@ void sdstoupper(sds s) {
*
* Return value:
*
- * 1 if s1 > s2.
- * -1 if s1 < s2.
+ * positive if s1 > s2.
+ * negative if s1 < s2.
* 0 if s1 and s2 are exactly the same binary string.
*
* If two strings share exactly the same prefix, but one of the two has
@@ -681,7 +801,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
if (seplen < 1 || len < 0) return NULL;
- tokens = zmalloc(sizeof(sds)*slots);
+ tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;
if (len == 0) {
@@ -694,7 +814,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
sds *newtokens;
slots *= 2;
- newtokens = zrealloc(tokens,sizeof(sds)*slots);
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
@@ -718,7 +838,7 @@ cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
- zfree(tokens);
+ s_free(tokens);
*count = 0;
return NULL;
}
@@ -729,7 +849,7 @@ void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
- zfree(tokens);
+ s_free(tokens);
}
/* Append to the sds string "s" an escaped string representation where
@@ -903,13 +1023,13 @@ sds *sdssplitargs(const char *line, int *argc) {
if (*p) p++;
}
/* add the token to the vector */
- vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
- if (vector == NULL) vector = zmalloc(sizeof(void*));
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
return vector;
}
}
@@ -917,7 +1037,7 @@ sds *sdssplitargs(const char *line, int *argc) {
err:
while((*argc)--)
sdsfree(vector[*argc]);
- zfree(vector);
+ s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
@@ -959,14 +1079,35 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#ifdef SDS_TEST_MAIN
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
-int main(void) {
+#define UNUSED(x) (void)(x)
+int sdsTest(void) {
{
- struct sdshdr *sh;
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
@@ -1002,6 +1143,7 @@ int main(void) {
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
"9223372036854775807--",60) == 0)
+ printf("[%s]\n",x);
sdsfree(x);
x = sdsnew("--");
@@ -1011,6 +1153,18 @@ int main(void) {
memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x')
+
+ sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
@@ -1077,24 +1231,47 @@ int main(void) {
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
- int oldfree;
+ unsigned int oldfree;
+ char *p;
+ int step = 10, j, i;
sdsfree(x);
+ sdsfree(y);
x = sdsnew("0");
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
- x = sdsMakeRoomFor(x,1);
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0);
- oldfree = sh->free;
- x[1] = '1';
- sdsIncrLen(x,1);
- test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
- test_cond("sdsIncrLen() -- len", sh->len == 2);
- test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ int oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
}
}
test_report()
return 0;
}
#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/src/sds.h b/src/sds.h
index 9a604021c..394f8b52e 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,32 +37,188 @@
#include <sys/types.h>
#include <stdarg.h>
+#include <stdint.h>
typedef char *sds;
-struct sdshdr {
- int len;
- int free;
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
static inline size_t sdslen(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->len;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
}
static inline size_t sdsavail(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->free;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
}
sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
-size_t sdslen(const sds s);
sds sdsdup(const sds s);
void sdsfree(sds s);
-size_t sdsavail(const sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
@@ -91,11 +249,25 @@ sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
#endif
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
new file mode 100644
index 000000000..531d41929
--- /dev/null
+++ b/src/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#include "zmalloc.h"
+#define s_malloc zmalloc
+#define s_realloc zrealloc
+#define s_free zfree
diff --git a/src/sentinel.c b/src/sentinel.c
index 48e1de8dd..6c6a3a0cd 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "hiredis.h"
#include "async.h"
@@ -54,19 +54,18 @@ typedef struct sentinelAddr {
#define SRI_MASTER (1<<0)
#define SRI_SLAVE (1<<1)
#define SRI_SENTINEL (1<<2)
-#define SRI_DISCONNECTED (1<<3)
-#define SRI_S_DOWN (1<<4) /* Subjectively down (no quorum). */
-#define SRI_O_DOWN (1<<5) /* Objectively down (confirmed by others). */
-#define SRI_MASTER_DOWN (1<<6) /* A Sentinel with this flag set thinks that
+#define SRI_S_DOWN (1<<3) /* Subjectively down (no quorum). */
+#define SRI_O_DOWN (1<<4) /* Objectively down (confirmed by others). */
+#define SRI_MASTER_DOWN (1<<5) /* A Sentinel with this flag set thinks that
its master is down. */
-#define SRI_FAILOVER_IN_PROGRESS (1<<7) /* Failover is in progress for
+#define SRI_FAILOVER_IN_PROGRESS (1<<6) /* Failover is in progress for
this master. */
-#define SRI_PROMOTED (1<<8) /* Slave selected for promotion. */
-#define SRI_RECONF_SENT (1<<9) /* SLAVEOF <newmaster> sent. */
-#define SRI_RECONF_INPROG (1<<10) /* Slave synchronization in progress. */
-#define SRI_RECONF_DONE (1<<11) /* Slave synchronized with new master. */
-#define SRI_FORCE_FAILOVER (1<<12) /* Force failover with master up. */
-#define SRI_SCRIPT_KILL_SENT (1<<13) /* SCRIPT KILL already sent on -BUSY */
+#define SRI_PROMOTED (1<<7) /* Slave selected for promotion. */
+#define SRI_RECONF_SENT (1<<8) /* SLAVEOF <newmaster> sent. */
+#define SRI_RECONF_INPROG (1<<9) /* Slave synchronization in progress. */
+#define SRI_RECONF_DONE (1<<10) /* Slave synchronized with new master. */
+#define SRI_FORCE_FAILOVER (1<<11) /* Force failover with master up. */
+#define SRI_SCRIPT_KILL_SENT (1<<12) /* SCRIPT KILL already sent on -BUSY */
/* Note: times are in milliseconds. */
#define SENTINEL_INFO_PERIOD 10000
@@ -115,27 +114,59 @@ typedef struct sentinelAddr {
#define SENTINEL_SCRIPT_MAX_RETRY 10
#define SENTINEL_SCRIPT_RETRY_DELAY 30000 /* 30 seconds between retries. */
-typedef struct sentinelRedisInstance {
- int flags; /* See SRI_... defines */
- char *name; /* Master name from the point of view of this sentinel. */
- char *runid; /* run ID of this instance. */
- uint64_t config_epoch; /* Configuration epoch. */
- sentinelAddr *addr; /* Master host. */
+/* SENTINEL SIMULATE-FAILURE command flags. */
+#define SENTINEL_SIMFAILURE_NONE 0
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1<<0)
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1<<1)
+
+/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
+ * monitoring many masters, we have different instances representing the
+ * same Sentinels, one per master, and we need to share the hiredis connections
+ * among them. Oherwise if 5 Sentinels are monitoring 100 masters we create
+ * 500 outgoing connections instead of 5.
+ *
+ * So this structure represents a reference counted link in terms of the two
+ * hiredis connections for commands and Pub/Sub, and the fields needed for
+ * failure detection, since the ping/pong time are now local to the link: if
+ * the link is available, the instance is avaialbe. This way we don't just
+ * have 5 connections instead of 500, we also send 5 pings instead of 500.
+ *
+ * Links are shared only for Sentinels: master and slave instances have
+ * a link with refcount = 1, always. */
+typedef struct instanceLink {
+ int refcount; /* Number of sentinelRedisInstance owners. */
+ int disconnected; /* Non-zero if we need to reconnect cc or pc. */
+ int pending_commands; /* Number of commands sent waiting for a reply. */
redisAsyncContext *cc; /* Hiredis context for commands. */
redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
- int pending_commands; /* Number of commands sent waiting for a reply. */
mstime_t cc_conn_time; /* cc connection time. */
mstime_t pc_conn_time; /* pc connection time. */
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
- mstime_t last_ping_time; /* Last time a pending ping was sent in the
- context of the current command connection
- with the instance. 0 if still not sent or
- if pong already received. */
+ mstime_t act_ping_time; /* Time at which the last pending ping (no pong
+ received after it) was sent. This field is
+ set to 0 when a pong is received, and set again
+ to the current time if the value is 0 and a new
+ ping is sent. */
+ mstime_t last_ping_time; /* Time at which we sent the last ping. This is
+ only used to avoid sending too many pings
+ during failure. Idle time is computed using
+ the act_ping_time field. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
+ mstime_t last_reconn_time; /* Last reconnection attempt performed when
+ the link was down. */
+} instanceLink;
+
+typedef struct sentinelRedisInstance {
+ int flags; /* See SRI_... defines */
+ char *name; /* Master name from the point of view of this sentinel. */
+ char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
+ uint64_t config_epoch; /* Configuration epoch. */
+ sentinelAddr *addr; /* Master host. */
+ instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
we received a hello from this Sentinel
@@ -159,7 +190,7 @@ typedef struct sentinelRedisInstance {
/* Master specific. */
dict *sentinels; /* Other sentinels monitoring the same master. */
dict *slaves; /* Slaves for this master instance. */
- int quorum; /* Number of sentinels that need to agree on failure. */
+ unsigned int quorum;/* Number of sentinels that need to agree on failure. */
int parallel_syncs; /* How many slaves to reconfigure at same time. */
char *auth_pass; /* Password to use for AUTH against master & slaves. */
@@ -190,19 +221,26 @@ typedef struct sentinelRedisInstance {
* are set to NULL no script is executed. */
char *notification_script;
char *client_reconfig_script;
+ sds info; /* cached INFO output */
} sentinelRedisInstance;
/* Main state. */
struct sentinelState {
- uint64_t current_epoch; /* Current epoch. */
+ char myid[CONFIG_RUN_ID_SIZE+1]; /* This sentinel ID. */
+ uint64_t current_epoch; /* Current epoch. */
dict *masters; /* Dictionary of master sentinelRedisInstances.
Key is the instance name, value is the
sentinelRedisInstance structure pointer. */
int tilt; /* Are we in TILT mode? */
int running_scripts; /* Number of scripts in execution right now. */
- mstime_t tilt_start_time; /* When TITL started. */
- mstime_t previous_time; /* Last time we ran the time handler. */
- list *scripts_queue; /* Queue of user scripts to execute. */
+ mstime_t tilt_start_time; /* When TITL started. */
+ mstime_t previous_time; /* Last time we ran the time handler. */
+ list *scripts_queue; /* Queue of user scripts to execute. */
+ char *announce_ip; /* IP addr that is gossiped to other sentinels if
+ not NULL. */
+ int announce_port; /* Port that is gossiped to other sentinels if
+ non zero. */
+ unsigned long simfailure_flags; /* Failures simulation. */
} sentinel;
/* A script execution job. */
@@ -293,7 +331,7 @@ static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
/* Nothing should be attached when something is already attached */
if (ac->ev.data != NULL)
- return REDIS_ERR;
+ return C_ERR;
/* Create container for context and r/w events */
e = (redisAeEvents*)zmalloc(sizeof(*e));
@@ -310,7 +348,7 @@ static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
ac->ev.cleanup = redisAeCleanup;
ac->ev.data = e;
- return REDIS_OK;
+ return C_OK;
}
/* ============================= Prototypes ================================= */
@@ -322,8 +360,7 @@ sentinelRedisInstance *sentinelGetMasterByName(char *name);
char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
int yesnotoi(char *s);
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
+void instanceLinkConnectionError(const redisAsyncContext *c);
const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
void sentinelAbortFailover(sentinelRedisInstance *ri);
void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
@@ -337,14 +374,17 @@ void sentinelFlushConfig(void);
void sentinelGenerateInitialMonitorEvents(void);
int sentinelSendPing(sentinelRedisInstance *ri);
int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master);
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
+void sentinelSimFailureCrash(void);
/* ========================= Dictionary types =============================== */
-unsigned int dictSdsHash(const void *key);
+uint64_t dictSdsHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
void dictInstancesValDestructor (void *privdata, void *obj) {
+ UNUSED(privdata);
releaseSentinelRedisInstance(obj);
}
@@ -376,11 +416,11 @@ dictType leaderVotesDictType = {
/* =========================== Initialization =============================== */
-void sentinelCommand(redisClient *c);
-void sentinelInfoCommand(redisClient *c);
-void sentinelSetCommand(redisClient *c);
-void sentinelPublishCommand(redisClient *c);
-void sentinelRoleCommand(redisClient *c);
+void sentinelCommand(client *c);
+void sentinelInfoCommand(client *c);
+void sentinelSetCommand(client *c);
+void sentinelPublishCommand(client *c);
+void sentinelRoleCommand(client *c);
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
@@ -392,6 +432,7 @@ struct redisCommand sentinelcmds[] = {
{"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0},
{"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0},
{"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}
};
@@ -403,7 +444,7 @@ void initSentinelConfig(void) {
/* Perform the Sentinel mode initialization. */
void initSentinel(void) {
- int j;
+ unsigned int j;
/* Remove usual Redis commands from the command table, then just add
* the SENTINEL command. */
@@ -413,7 +454,7 @@ void initSentinel(void) {
struct redisCommand *cmd = sentinelcmds+j;
retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
/* Initialize various data structures. */
@@ -424,24 +465,43 @@ void initSentinel(void) {
sentinel.previous_time = mstime();
sentinel.running_scripts = 0;
sentinel.scripts_queue = listCreate();
+ sentinel.announce_ip = NULL;
+ sentinel.announce_port = 0;
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ memset(sentinel.myid,0,sizeof(sentinel.myid));
}
/* This function gets called when the server is in Sentinel mode, started,
* loaded the configuration, and is ready for normal operations. */
void sentinelIsRunning(void) {
- redisLog(REDIS_WARNING,"Sentinel runid is %s", server.runid);
+ int j;
if (server.configfile == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel started without a config file. Exiting...");
exit(1);
} else if (access(server.configfile,W_OK) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel config file %s is not writable: %s. Exiting...",
server.configfile,strerror(errno));
exit(1);
}
+ /* If this Sentinel has yet no ID set in the configuration file, we
+ * pick a random one and persist the config on disk. From now on this
+ * will be this Sentinel ID across restarts. */
+ for (j = 0; j < CONFIG_RUN_ID_SIZE; j++)
+ if (sentinel.myid[j] != 0) break;
+
+ if (j == CONFIG_RUN_ID_SIZE) {
+ /* Pick ID and presist the config. */
+ getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE);
+ sentinelFlushConfig();
+ }
+
+ /* Log its ID to make debugging of issues simpler. */
+ serverLog(LL_WARNING,"Sentinel ID is %s", sentinel.myid);
+
/* We want to generate a +monitor event for every configured master
* at startup. */
sentinelGenerateInitialMonitorEvents();
@@ -455,19 +515,19 @@ void sentinelIsRunning(void) {
* EINVAL: Invalid port number.
*/
sentinelAddr *createSentinelAddr(char *hostname, int port) {
- char buf[32];
+ char ip[NET_IP_STR_LEN];
sentinelAddr *sa;
- if (port <= 0 || port > 65535) {
+ if (port < 0 || port > 65535) {
errno = EINVAL;
return NULL;
}
- if (anetResolve(NULL,hostname,buf,sizeof(buf)) == ANET_ERR) {
+ if (anetResolve(NULL,hostname,ip,sizeof(ip)) == ANET_ERR) {
errno = ENOENT;
return NULL;
}
sa = zmalloc(sizeof(*sa));
- sa->ip = sdsnew(buf);
+ sa->ip = sdsnew(ip);
sa->port = port;
return sa;
}
@@ -497,7 +557,7 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
/* Send an event to log, pub/sub, user notification script.
*
- * 'level' is the log level for logging. Only REDIS_WARNING events will trigger
+ * 'level' is the log level for logging. Only LL_WARNING events will trigger
* the execution of the user notification script.
*
* 'type' is the message type, also used as a pub/sub channel name.
@@ -522,7 +582,7 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
const char *fmt, ...) {
va_list ap;
- char msg[REDIS_MAX_LOGMSG_LEN];
+ char msg[LOG_MAX_LEN];
robj *channel, *payload;
/* Handle %@ */
@@ -554,10 +614,10 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
/* Log the message if the log level allows it to be logged. */
if (level >= server.verbosity)
- redisLog(level,"%s %s",type,msg);
+ serverLog(level,"%s %s",type,msg);
/* Publish the message via Pub/Sub if it's not a debugging one. */
- if (level != REDIS_DEBUG) {
+ if (level != LL_DEBUG) {
channel = createStringObject(type,strlen(type));
payload = createStringObject(msg,strlen(msg));
pubsubPublishMessage(channel,payload);
@@ -566,10 +626,10 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
}
/* Call the notification script if applicable. */
- if (level == REDIS_WARNING && ri != NULL) {
+ if (level == LL_WARNING && ri != NULL) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
ri : ri->master;
- if (master->notification_script) {
+ if (master && master->notification_script) {
sentinelScheduleScriptExecution(master->notification_script,
type,msg,NULL);
}
@@ -587,7 +647,7 @@ void sentinelGenerateInitialMonitorEvents(void) {
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
- sentinelEvent(REDIS_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
}
dictReleaseIterator(di);
}
@@ -645,7 +705,7 @@ void sentinelScheduleScriptExecution(char *path, ...) {
sentinelReleaseScriptJob(sj);
break;
}
- redisAssert(listLength(sentinel.scripts_queue) <=
+ serverAssert(listLength(sentinel.scripts_queue) <=
SENTINEL_SCRIPT_MAX_QUEUE);
}
}
@@ -697,7 +757,7 @@ void sentinelRunPendingScripts(void) {
/* Parent (fork error).
* We report fork errors as signal 99, in order to unify the
* reporting with other kind of errors. */
- sentinelEvent(REDIS_WARNING,"-script-error",NULL,
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
"%s %d %d", sj->argv[0], 99, 0);
sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
sj->pid = 0;
@@ -709,7 +769,7 @@ void sentinelRunPendingScripts(void) {
} else {
sentinel.running_scripts++;
sj->pid = pid;
- sentinelEvent(REDIS_DEBUG,"+script-child",NULL,"%ld",(long)pid);
+ sentinelEvent(LL_DEBUG,"+script-child",NULL,"%ld",(long)pid);
}
}
}
@@ -743,12 +803,12 @@ void sentinelCollectTerminatedScripts(void) {
sentinelScriptJob *sj;
if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
- sentinelEvent(REDIS_DEBUG,"-script-child",NULL,"%ld %d %d",
+ sentinelEvent(LL_DEBUG,"-script-child",NULL,"%ld %d %d",
(long)pid, exitcode, bysignal);
ln = sentinelGetScriptListNodeByPid(pid);
if (ln == NULL) {
- redisLog(REDIS_WARNING,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
+ serverLog(LL_WARNING,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
continue;
}
sj = ln->value;
@@ -767,7 +827,7 @@ void sentinelCollectTerminatedScripts(void) {
/* Otherwise let's remove the script, but log the event if the
* execution did not terminated in the best of the ways. */
if (bysignal || exitcode != 0) {
- sentinelEvent(REDIS_WARNING,"-script-error",NULL,
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
"%s %d %d", sj->argv[0], bysignal, exitcode);
}
listDelNode(sentinel.scripts_queue,ln);
@@ -791,7 +851,7 @@ void sentinelKillTimedoutScripts(void) {
if (sj->flags & SENTINEL_SCRIPT_RUNNING &&
(now - sj->start_time) > SENTINEL_SCRIPT_MAX_RUNTIME)
{
- sentinelEvent(REDIS_WARNING,"-script-timeout",NULL,"%s %ld",
+ sentinelEvent(LL_WARNING,"-script-timeout",NULL,"%s %ld",
sj->argv[0], (long)sj->pid);
kill(sj->pid,SIGKILL);
}
@@ -799,7 +859,7 @@ void sentinelKillTimedoutScripts(void) {
}
/* Implements SENTINEL PENDING-SCRIPTS command. */
-void sentinelPendingScriptsCommand(redisClient *c) {
+void sentinelPendingScriptsCommand(client *c) {
listNode *ln;
listIter li;
@@ -863,6 +923,201 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
state, from->ip, fromport, to->ip, toport, NULL);
}
+/* =============================== instanceLink ============================= */
+
+/* Create a not yet connected link object. */
+instanceLink *createInstanceLink(void) {
+ instanceLink *link = zmalloc(sizeof(*link));
+
+ link->refcount = 1;
+ link->disconnected = 1;
+ link->pending_commands = 0;
+ link->cc = NULL;
+ link->pc = NULL;
+ link->cc_conn_time = 0;
+ link->pc_conn_time = 0;
+ link->last_reconn_time = 0;
+ link->pc_last_activity = 0;
+ /* We set the act_ping_time to "now" even if we actually don't have yet
+ * a connection with the node, nor we sent a ping.
+ * This is useful to detect a timeout in case we'll not be able to connect
+ * with the node at all. */
+ link->act_ping_time = mstime();
+ link->last_ping_time = 0;
+ link->last_avail_time = mstime();
+ link->last_pong_time = mstime();
+ return link;
+}
+
+/* Disconnect an hiredis connection in the context of an instance link. */
+void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
+ if (c == NULL) return;
+
+ if (link->cc == c) {
+ link->cc = NULL;
+ link->pending_commands = 0;
+ }
+ if (link->pc == c) link->pc = NULL;
+ c->data = NULL;
+ link->disconnected = 1;
+ redisAsyncFree(c);
+}
+
+/* Decrement the refcount of a link object, if it drops to zero, actually
+ * free it and return NULL. Otherwise don't do anything and return the pointer
+ * to the object.
+ *
+ * If we are not going to free the link and ri is not NULL, we rebind all the
+ * pending requests in link->cc (hiredis connection for commands) to a
+ * callback that will just ignore them. This is useful to avoid processing
+ * replies for an instance that no longer exists. */
+instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
+{
+ serverAssert(link->refcount > 0);
+ link->refcount--;
+ if (link->refcount != 0) {
+ if (ri && ri->link->cc) {
+ /* This instance may have pending callbacks in the hiredis async
+ * context, having as 'privdata' the instance that we are going to
+ * free. Let's rewrite the callback list, directly exploiting
+ * hiredis internal data structures, in order to bind them with
+ * a callback that will ignore the reply at all. */
+ redisCallback *cb;
+ redisCallbackList *callbacks = &link->cc->replies;
+
+ cb = callbacks->head;
+ while(cb) {
+ if (cb->privdata == ri) {
+ cb->fn = sentinelDiscardReplyCallback;
+ cb->privdata = NULL; /* Not strictly needed. */
+ }
+ cb = cb->next;
+ }
+ }
+ return link; /* Other active users. */
+ }
+
+ instanceLinkCloseConnection(link,link->cc);
+ instanceLinkCloseConnection(link,link->pc);
+ zfree(link);
+ return NULL;
+}
+
+/* This function will attempt to share the instance link we already have
+ * for the same Sentinel in the context of a different master, with the
+ * instance we are passing as argument.
+ *
+ * This way multiple Sentinel objects that refer all to the same physical
+ * Sentinel instance but in the context of different masters will use
+ * a single connection, will send a single PING per second for failure
+ * detection and so forth.
+ *
+ * Return C_OK if a matching Sentinel was found in the context of a
+ * different master and sharing was performed. Otherwise C_ERR
+ * is returned. */
+int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+
+ if (ri->runid == NULL) return C_ERR; /* No way to identify it. */
+ if (ri->link->refcount > 1) return C_ERR; /* Already shared. */
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ /* We want to share with the same physical Sentinel referenced
+ * in other masters, so skip our master. */
+ if (master == ri->master) continue;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ if (match == NULL) continue; /* No match. */
+ if (match == ri) continue; /* Should never happen but... safer. */
+
+ /* We identified a matching Sentinel, great! Let's free our link
+ * and use the one of the matching Sentinel. */
+ releaseInstanceLink(ri->link,NULL);
+ ri->link = match->link;
+ match->link->refcount++;
+ return C_OK;
+ }
+ dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* When we detect a Sentinel to switch address (reporting a different IP/port
+ * pair in Hello messages), let's update all the matching Sentinels in the
+ * context of other masters as well and disconnect the links, so that everybody
+ * will be updated.
+ *
+ * Return the number of updated Sentinel addresses. */
+int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+ int reconfigured = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ /* If there is no match, this master does not know about this
+ * Sentinel, try with the next one. */
+ if (match == NULL) continue;
+
+ /* Disconnect the old links if connected. */
+ if (match->link->cc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->cc);
+ if (match->link->pc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->pc);
+
+ if (match == ri) continue; /* Address already updated for it. */
+
+ /* Update the address of the matching Sentinel by copying the address
+ * of the Sentinel object that received the address update. */
+ releaseSentinelAddr(match->addr);
+ match->addr = dupSentinelAddr(ri->addr);
+ reconfigured++;
+ }
+ dictReleaseIterator(di);
+ if (reconfigured)
+ sentinelEvent(LL_NOTICE,"+sentinel-address-update", ri,
+ "%@ %d additional matching instances", reconfigured);
+ return reconfigured;
+}
+
+/* This function is called when an hiredis connection reported an error.
+ * We set it to NULL and mark the link as disconnected so that it will be
+ * reconnected again.
+ *
+ * Note: we don't free the hiredis context as hiredis will do it for us
+ * for async connections. */
+void instanceLinkConnectionError(const redisAsyncContext *c) {
+ instanceLink *link = c->data;
+ int pubsub;
+
+ if (!link) return;
+
+ pubsub = (link->pc == c);
+ if (pubsub)
+ link->pc = NULL;
+ else
+ link->cc = NULL;
+ link->disconnected = 1;
+}
+
+/* Hiredis connection established / disconnected callbacks. We need them
+ * just to cleanup our link state. */
+void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
+ if (status != C_OK) instanceLinkConnectionError(c);
+}
+
+void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
+ UNUSED(status);
+ instanceLinkConnectionError(c);
+}
+
/* ========================== sentinelRedisInstance ========================= */
/* Create a redis instance, the following fields must be populated by the
@@ -884,25 +1139,25 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
* createSentinelAddr() function.
*
* The function may also fail and return NULL with errno set to EBUSY if
- * a master or slave with the same name already exists. */
+ * a master with the same name, a slave with the same address, or a sentinel
+ * with the same ID already exists. */
+
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
sentinelRedisInstance *ri;
sentinelAddr *addr;
dict *table = NULL;
- char slavename[128], *sdsname;
+ char slavename[NET_PEER_ID_LEN], *sdsname;
- redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
- redisAssert((flags & SRI_MASTER) || master != NULL);
+ serverAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
+ serverAssert((flags & SRI_MASTER) || master != NULL);
/* Check address validity. */
addr = createSentinelAddr(hostname,port);
if (addr == NULL) return NULL;
- /* For slaves and sentinel we use ip:port as name. */
- if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
- snprintf(slavename,sizeof(slavename),
- strchr(hostname,':') ? "[%s]:%d" : "%s:%d",
- hostname,port);
+ /* For slaves use ip:port as name. */
+ if (flags & SRI_SLAVE) {
+ anetFormatAddr(slavename, sizeof(slavename), hostname, port);
name = slavename;
}
@@ -915,6 +1170,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
else if (flags & SRI_SENTINEL) table = master->sentinels;
sdsname = sdsnew(name);
if (dictFind(table,sdsname)) {
+ releaseSentinelAddr(addr);
sdsfree(sdsname);
errno = EBUSY;
return NULL;
@@ -924,24 +1180,12 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri = zmalloc(sizeof(*ri));
/* Note that all the instances are started in the disconnected state,
* the event loop will take care of connecting them. */
- ri->flags = flags | SRI_DISCONNECTED;
+ ri->flags = flags;
ri->name = sdsname;
ri->runid = NULL;
ri->config_epoch = 0;
ri->addr = addr;
- ri->cc = NULL;
- ri->pc = NULL;
- ri->pending_commands = 0;
- ri->cc_conn_time = 0;
- ri->pc_conn_time = 0;
- ri->pc_last_activity = 0;
- /* We set the last_ping_time to "now" even if we actually don't have yet
- * a connection with the node, nor we sent a ping.
- * This is useful to detect a timeout in case we'll not be able to connect
- * with the node at all. */
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link = createInstanceLink();
ri->last_pub_time = mstime();
ri->last_hello_time = mstime();
ri->last_master_down_reply_time = mstime();
@@ -976,6 +1220,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->promoted_slave = NULL;
ri->notification_script = NULL;
ri->client_reconfig_script = NULL;
+ ri->info = NULL;
/* Role */
ri->role_reported = ri->flags & (SRI_MASTER|SRI_SLAVE);
@@ -996,9 +1241,8 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
dictRelease(ri->sentinels);
dictRelease(ri->slaves);
- /* Release hiredis connections. */
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
+ /* Disconnect the instance. */
+ releaseInstanceLink(ri->link,ri);
/* Free other resources. */
sdsfree(ri->name);
@@ -1008,6 +1252,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
sdsfree(ri->slave_master_host);
sdsfree(ri->leader);
sdsfree(ri->auth_pass);
+ sdsfree(ri->info);
releaseSentinelAddr(ri->addr);
/* Clear state into the master if needed. */
@@ -1023,11 +1268,11 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
{
sds key;
sentinelRedisInstance *slave;
+ char buf[NET_PEER_ID_LEN];
- redisAssert(ri->flags & SRI_MASTER);
- key = sdscatprintf(sdsempty(),
- strchr(ip,':') ? "[%s]:%d" : "%s:%d",
- ip,port);
+ serverAssert(ri->flags & SRI_MASTER);
+ anetFormatAddr(buf,sizeof(buf),ip,port);
+ key = sdsnew(buf);
slave = dictFetchValue(ri->slaves,key);
sdsfree(key);
return slave;
@@ -1041,35 +1286,29 @@ const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
else return "unknown";
}
-/* This function removes all the instances found in the dictionary of
- * sentinels in the specified 'master', having either:
+/* This function remove the Sentinel with the specified ID from the
+ * specified master.
*
- * 1) The same ip/port as specified.
- * 2) The same runid.
+ * If "runid" is NULL the function returns ASAP.
*
- * "1" and "2" don't need to verify at the same time, just one is enough.
- * If "runid" is NULL it is not checked.
- * Similarly if "ip" is NULL it is not checked.
+ * This function is useful because on Sentinels address switch, we want to
+ * remove our old entry and add a new one for the same ID but with the new
+ * address.
*
- * This function is useful because every time we add a new Sentinel into
- * a master's Sentinels dictionary, we want to be very sure about not
- * having duplicated instances for any reason. This is important because
- * other sentinels are needed to reach ODOWN quorum, and later to get
- * voted for a given configuration epoch in order to perform the failover.
- *
- * The function returns the number of Sentinels removed. */
-int removeMatchingSentinelsFromMaster(sentinelRedisInstance *master, char *ip, int port, char *runid) {
+ * The function returns 1 if the matching Sentinel was removed, otherwise
+ * 0 if there was no Sentinel with this ID. */
+int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) {
dictIterator *di;
dictEntry *de;
int removed = 0;
+ if (runid == NULL) return 0;
+
di = dictGetSafeIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
- if ((ri->runid && runid && strcmp(ri->runid,runid) == 0) ||
- (ip && strcmp(ri->addr->ip,ip) == 0 && port == ri->addr->port))
- {
+ if (ri->runid && strcmp(ri->runid,runid) == 0) {
dictDelete(master->sentinels,ri->name);
removed++;
}
@@ -1089,7 +1328,7 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
dictEntry *de;
sentinelRedisInstance *instance = NULL;
- redisAssert(ip || runid); /* User must pass at least one search param. */
+ serverAssert(ip || runid); /* User must pass at least one search param. */
di = dictGetIterator(instances);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
@@ -1148,42 +1387,45 @@ void sentinelDelFlagsToDictOfRedisInstances(dict *instances, int flags) {
* 1) Remove all slaves.
* 2) Remove all sentinels.
* 3) Remove most of the flags resulting from runtime operations.
- * 4) Reset timers to their default value.
+ * 4) Reset timers to their default value. For example after a reset it will be
+ * possible to failover again the same master ASAP, without waiting the
+ * failover timeout delay.
* 5) In the process of doing this undo the failover if in progress.
* 6) Disconnect the connections with the master (will reconnect automatically).
*/
#define SENTINEL_RESET_NO_SENTINELS (1<<0)
void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
- redisAssert(ri->flags & SRI_MASTER);
+ serverAssert(ri->flags & SRI_MASTER);
dictRelease(ri->slaves);
ri->slaves = dictCreate(&instancesDictType,NULL);
if (!(flags & SENTINEL_RESET_NO_SENTINELS)) {
dictRelease(ri->sentinels);
ri->sentinels = dictCreate(&instancesDictType,NULL);
}
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
- ri->flags &= SRI_MASTER|SRI_DISCONNECTED;
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
+ ri->flags &= SRI_MASTER;
if (ri->leader) {
sdsfree(ri->leader);
ri->leader = NULL;
}
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
ri->failover_state_change_time = 0;
- ri->failover_start_time = 0;
+ ri->failover_start_time = 0; /* We can failover again ASAP. */
ri->promoted_slave = NULL;
sdsfree(ri->runid);
sdsfree(ri->slave_master_host);
ri->runid = NULL;
ri->slave_master_host = NULL;
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link->act_ping_time = mstime();
+ ri->link->last_ping_time = 0;
+ ri->link->last_avail_time = mstime();
+ ri->link->last_pong_time = mstime();
ri->role_reported_time = mstime();
ri->role_reported = SRI_MASTER;
if (flags & SENTINEL_GENERATE_EVENT)
- sentinelEvent(REDIS_WARNING,"+reset-master",ri,"%@");
+ sentinelEvent(LL_WARNING,"+reset-master",ri,"%@");
}
/* Call sentinelResetMaster() on every master with a name matching the specified
@@ -1213,8 +1455,8 @@ int sentinelResetMastersByPattern(char *pattern, int flags) {
*
* This is used to handle the +switch-master event.
*
- * The function returns REDIS_ERR if the address can't be resolved for some
- * reason. Otherwise REDIS_OK is returned. */
+ * The function returns C_ERR if the address can't be resolved for some
+ * reason. Otherwise C_OK is returned. */
int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, int port) {
sentinelAddr *oldaddr, *newaddr;
sentinelAddr **slaves = NULL;
@@ -1223,7 +1465,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
dictEntry *de;
newaddr = createSentinelAddr(ip,port);
- if (newaddr == NULL) return REDIS_ERR;
+ if (newaddr == NULL) return C_ERR;
/* Make a list of slaves to add back after the reset.
* Don't include the one having the address we are switching to. */
@@ -1261,10 +1503,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->ip,
slaves[j]->port, master->quorum, master);
releaseSentinelAddr(slaves[j]);
- if (slave) {
- sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
- sentinelFlushConfig();
- }
+ if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
}
zfree(slaves);
@@ -1272,7 +1511,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
* gets the master->addr->ip and master->addr->port as arguments. */
releaseSentinelAddr(oldaddr);
sentinelFlushConfig();
- return REDIS_OK;
+ return C_OK;
}
/* Return non-zero if there was no SDOWN or ODOWN error associated to this
@@ -1322,6 +1561,13 @@ void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *master) {
}
}
+char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
+ if (ri->flags & SRI_MASTER) return "master";
+ else if (ri->flags & SRI_SLAVE) return "slave";
+ else if (ri->flags & SRI_SENTINEL) return "sentinel";
+ else return "unknown";
+}
+
/* ============================ Config handling ============================= */
char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
@@ -1385,6 +1631,10 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
unsigned long long current_epoch = strtoull(argv[1],NULL,10);
if (current_epoch > sentinel.current_epoch)
sentinel.current_epoch = current_epoch;
+ } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
+ if (strlen(argv[1]) != CONFIG_RUN_ID_SIZE)
+ return "Malformed Sentinel id in myid option.";
+ memcpy(sentinel.myid,argv[1],CONFIG_RUN_ID_SIZE);
} else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
/* config-epoch <name> <epoch> */
ri = sentinelGetMasterByName(argv[1]);
@@ -1415,15 +1665,25 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
(argc == 4 || argc == 5)) {
sentinelRedisInstance *si;
- /* known-sentinel <name> <ip> <port> [runid] */
- ri = sentinelGetMasterByName(argv[1]);
- if (!ri) return "No such master with specified name.";
- if ((si = createSentinelRedisInstance(NULL,SRI_SENTINEL,argv[2],
- atoi(argv[3]), ri->quorum, ri)) == NULL)
- {
- return "Wrong hostname or port for sentinel.";
+ if (argc == 5) { /* Ignore the old form without runid. */
+ /* known-sentinel <name> <ip> <port> [runid] */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2],
+ atoi(argv[3]), ri->quorum, ri)) == NULL)
+ {
+ return "Wrong hostname or port for sentinel.";
+ }
+ si->runid = sdsnew(argv[4]);
+ sentinelTryConnectionSharing(si);
}
- if (argc == 5) si->runid = sdsnew(argv[4]);
+ } else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
+ /* announce-ip <ip-address> */
+ if (strlen(argv[1]))
+ sentinel.announce_ip = sdsnew(argv[1]);
+ } else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
+ /* announce-port <port> */
+ sentinel.announce_port = atoi(argv[1]);
} else {
return "Unrecognized sentinel configuration statement.";
}
@@ -1440,6 +1700,10 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
dictEntry *de;
sds line;
+ /* sentinel unique ID. */
+ line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -1531,7 +1795,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
slave_addr = master->addr;
line = sdscatprintf(sdsempty(),
"sentinel known-slave %s %s %d",
- master->name, ri->addr->ip, ri->addr->port);
+ master->name, slave_addr->ip, slave_addr->port);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
@@ -1540,11 +1804,10 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
di2 = dictGetIterator(master->sentinels);
while((de = dictNext(di2)) != NULL) {
ri = dictGetVal(de);
+ if (ri->runid == NULL) continue;
line = sdscatprintf(sdsempty(),
- "sentinel known-sentinel %s %s %d%s%s",
- master->name, ri->addr->ip, ri->addr->port,
- ri->runid ? " " : "",
- ri->runid ? ri->runid : "");
+ "sentinel known-sentinel %s %s %d %s",
+ master->name, ri->addr->ip, ri->addr->port, ri->runid);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
@@ -1555,6 +1818,20 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
"sentinel current-epoch %llu", (unsigned long long) sentinel.current_epoch);
rewriteConfigRewriteLine(state,"sentinel",line,1);
+ /* sentinel announce-ip. */
+ if (sentinel.announce_ip) {
+ line = sdsnew("sentinel announce-ip ");
+ line = sdscatrepr(line, sentinel.announce_ip, sdslen(sentinel.announce_ip));
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+ }
+
+ /* sentinel announce-port. */
+ if (sentinel.announce_port) {
+ line = sdscatprintf(sdsempty(),"sentinel announce-port %d",
+ sentinel.announce_port);
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+ }
+
dictReleaseIterator(di);
}
@@ -1570,7 +1847,7 @@ void sentinelFlushConfig(void) {
int saved_hz = server.hz;
int rewrite_status;
- server.hz = REDIS_DEFAULT_HZ;
+ server.hz = CONFIG_DEFAULT_HZ;
rewrite_status = rewriteConfig(server.configfile);
server.hz = saved_hz;
@@ -1582,61 +1859,11 @@ void sentinelFlushConfig(void) {
werr:
if (fd != -1) close(fd);
- redisLog(REDIS_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
+ serverLog(LL_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
}
/* ====================== hiredis connection handling ======================= */
-/* Completely disconnect a hiredis link from an instance. */
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c) {
- if (ri->cc == c) {
- ri->cc = NULL;
- ri->pending_commands = 0;
- }
- if (ri->pc == c) ri->pc = NULL;
- c->data = NULL;
- ri->flags |= SRI_DISCONNECTED;
- redisAsyncFree(c);
-}
-
-/* This function takes a hiredis context that is in an error condition
- * and make sure to mark the instance as disconnected performing the
- * cleanup needed.
- *
- * Note: we don't free the hiredis context as hiredis will do it for us
- * for async connections. */
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c) {
- sentinelRedisInstance *ri = c->data;
- int pubsub;
-
- if (ri == NULL) return; /* The instance no longer exists. */
-
- pubsub = (ri->pc == c);
- sentinelEvent(REDIS_DEBUG, pubsub ? "-pubsub-link" : "-cmd-link", ri,
- "%@ #%s", c->errstr);
- if (pubsub)
- ri->pc = NULL;
- else
- ri->cc = NULL;
- ri->flags |= SRI_DISCONNECTED;
-}
-
-void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
- if (status != REDIS_OK) {
- sentinelDisconnectInstanceFromContext(c);
- } else {
- sentinelRedisInstance *ri = c->data;
- int pubsub = (ri->pc == c);
-
- sentinelEvent(REDIS_DEBUG, pubsub ? "+pubsub-link" : "+cmd-link", ri,
- "%@");
- }
-}
-
-void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
- sentinelDisconnectInstanceFromContext(c);
-}
-
/* Send the AUTH command with the specified master password if needed.
* Note that for slaves the password set for the master is used.
*
@@ -1648,8 +1875,8 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
ri->master->auth_pass;
if (auth_pass) {
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL, "AUTH %s",
- auth_pass) == REDIS_OK) ri->pending_commands++;
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "AUTH %s",
+ auth_pass) == C_OK) ri->link->pending_commands++;
}
}
@@ -1662,77 +1889,84 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
char name[64];
- snprintf(name,sizeof(name),"sentinel-%.8s-%s",server.runid,type);
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL,
- "CLIENT SETNAME %s", name) == REDIS_OK)
+ snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
+ "CLIENT SETNAME %s", name) == C_OK)
{
- ri->pending_commands++;
+ ri->link->pending_commands++;
}
}
-/* Create the async connections for the specified instance if the instance
- * is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
+/* Create the async connections for the instance link if the link
+ * is disconnected. Note that link->disconnected is true even if just
* one of the two links (commands and pub/sub) is missing. */
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
- if (!(ri->flags & SRI_DISCONNECTED)) return;
+ if (ri->link->disconnected == 0) return;
+ if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
+ instanceLink *link = ri->link;
+ mstime_t now = mstime();
+
+ if (now - ri->link->last_reconn_time < SENTINEL_PING_PERIOD) return;
+ ri->link->last_reconn_time = now;
/* Commands connection. */
- if (ri->cc == NULL) {
- ri->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->cc->err) {
- sentinelEvent(REDIS_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
- ri->cc->errstr);
- sentinelKillLink(ri,ri->cc);
+ if (link->cc == NULL) {
+ link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (link->cc->err) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
+ link->cc->errstr);
+ instanceLinkCloseConnection(link,link->cc);
} else {
- ri->cc_conn_time = mstime();
- ri->cc->data = ri;
- redisAeAttach(server.el,ri->cc);
- redisAsyncSetConnectCallback(ri->cc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->cc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->cc);
- sentinelSetClientName(ri,ri->cc,"cmd");
+ link->pending_commands = 0;
+ link->cc_conn_time = mstime();
+ link->cc->data = link;
+ redisAeAttach(server.el,link->cc);
+ redisAsyncSetConnectCallback(link->cc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->cc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->cc);
+ sentinelSetClientName(ri,link->cc,"cmd");
/* Send a PING ASAP when reconnecting. */
sentinelSendPing(ri);
}
}
/* Pub / Sub */
- if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && ri->pc == NULL) {
- ri->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->pc->err) {
- sentinelEvent(REDIS_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
- ri->pc->errstr);
- sentinelKillLink(ri,ri->pc);
+ if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
+ link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (link->pc->err) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
+ link->pc->errstr);
+ instanceLinkCloseConnection(link,link->pc);
} else {
int retval;
- ri->pc_conn_time = mstime();
- ri->pc->data = ri;
- redisAeAttach(server.el,ri->pc);
- redisAsyncSetConnectCallback(ri->pc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->pc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->pc);
- sentinelSetClientName(ri,ri->pc,"pubsub");
+ link->pc_conn_time = mstime();
+ link->pc->data = link;
+ redisAeAttach(server.el,link->pc);
+ redisAsyncSetConnectCallback(link->pc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->pc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->pc);
+ sentinelSetClientName(ri,link->pc,"pubsub");
/* Now we subscribe to the Sentinels "Hello" channel. */
- retval = redisAsyncCommand(ri->pc,
- sentinelReceiveHelloMessages, NULL, "SUBSCRIBE %s",
+ retval = redisAsyncCommand(link->pc,
+ sentinelReceiveHelloMessages, ri, "SUBSCRIBE %s",
SENTINEL_HELLO_CHANNEL);
- if (retval != REDIS_OK) {
+ if (retval != C_OK) {
/* If we can't subscribe, the Pub/Sub connection is useless
* and we can simply disconnect it and try again. */
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(link,link->pc);
return;
}
}
}
- /* Clear the DISCONNECTED flags only if we have both the connections
+ /* Clear the disconnected status only if we have both the connections
* (or just the commands connection if this is a sentinel instance). */
- if (ri->cc && (ri->flags & SRI_SENTINEL || ri->pc))
- ri->flags &= ~SRI_DISCONNECTED;
+ if (link->cc && (ri->flags & SRI_SENTINEL || link->pc))
+ link->disconnected = 0;
}
/* ======================== Redis instances pinging ======================== */
@@ -1756,6 +1990,10 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int numlines, j;
int role = 0;
+ /* cache full INFO output for instance */
+ sdsfree(ri->info);
+ ri->info = sdsnew(info);
+
/* The following fields must be reset to a given value in the case they
* are not found at all in the INFO output. */
ri->master_link_down_time = 0;
@@ -1772,7 +2010,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->runid = sdsnewlen(l+7,40);
} else {
if (strncmp(ri->runid,l+7,40) != 0) {
- sentinelEvent(REDIS_NOTICE,"+reboot",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+reboot",ri,"%@");
sdsfree(ri->runid);
ri->runid = sdsnewlen(l+7,40);
}
@@ -1813,7 +2051,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
atoi(port), ri->quorum, ri)) != NULL)
{
- sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
+ sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
+ sentinelFlushConfig();
}
}
}
@@ -1882,7 +2121,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
if (role == SRI_SLAVE) ri->slave_conf_change_time = mstime();
/* Log the event with +role-change if the new role is coherent or
* with -role-change if there is a mismatch with the current config. */
- sentinelEvent(REDIS_VERBOSE,
+ sentinelEvent(LL_VERBOSE,
((ri->flags & (SRI_MASTER|SRI_SLAVE)) == role) ?
"+role-change" : "-role-change",
ri, "%@ new reported role is %s",
@@ -1919,8 +2158,11 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES;
ri->master->failover_state_change_time = mstime();
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+promoted-slave",ri,"%@");
- sentinelEvent(REDIS_WARNING,"+failover-state-reconf-slaves",
+ sentinelEvent(LL_WARNING,"+promoted-slave",ri,"%@");
+ if (sentinel.simfailure_flags &
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION)
+ sentinelSimFailureCrash();
+ sentinelEvent(LL_WARNING,"+failover-state-reconf-slaves",
ri->master,"%@");
sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
"start",ri->master->addr,ri->addr);
@@ -1939,8 +2181,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int retval = sentinelSendSlaveOf(ri,
ri->master->addr->ip,
ri->master->addr->port);
- if (retval == REDIS_OK)
- sentinelEvent(REDIS_NOTICE,"+convert-to-slave",ri,"%@");
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+convert-to-slave",ri,"%@");
}
}
}
@@ -1962,8 +2204,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int retval = sentinelSendSlaveOf(ri,
ri->master->addr->ip,
ri->master->addr->port);
- if (retval == REDIS_OK)
- sentinelEvent(REDIS_NOTICE,"+fix-slave-config",ri,"%@");
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+fix-slave-config",ri,"%@");
}
}
@@ -1981,7 +2223,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
{
ri->flags &= ~SRI_RECONF_SENT;
ri->flags |= SRI_RECONF_INPROG;
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-inprog",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-inprog",ri,"%@");
}
/* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
@@ -1990,38 +2232,41 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
{
ri->flags &= ~SRI_RECONF_INPROG;
ri->flags |= SRI_RECONF_DONE;
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-done",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-done",ri,"%@");
}
}
}
void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
- if (r->type == REDIS_REPLY_STRING) {
+ if (r->type == REDIS_REPLY_STRING)
sentinelRefreshInstanceInfo(ri,r->str);
- }
}
/* Just discard the reply. We use this when we are not monitoring the return
* value of the command but its effects directly. */
void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ instanceLink *link = c->data;
+ UNUSED(reply);
+ UNUSED(privdata);
- if (ri) ri->pending_commands--;
+ if (link) link->pending_commands--;
}
void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
if (r->type == REDIS_REPLY_STATUS ||
@@ -2032,8 +2277,8 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
strncmp(r->str,"LOADING",7) == 0 ||
strncmp(r->str,"MASTERDOWN",10) == 0)
{
- ri->last_avail_time = mstime();
- ri->last_ping_time = 0; /* Flag the pong as received. */
+ link->last_avail_time = mstime();
+ link->act_ping_time = 0; /* Flag the pong as received. */
} else {
/* Send a SCRIPT KILL command if the instance appears to be
* down because of a busy script. */
@@ -2041,25 +2286,26 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
(ri->flags & SRI_S_DOWN) &&
!(ri->flags & SRI_SCRIPT_KILL_SENT))
{
- if (redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL,
- "SCRIPT KILL") == REDIS_OK)
- ri->pending_commands++;
+ if (redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri,
+ "SCRIPT KILL") == C_OK)
+ ri->link->pending_commands++;
ri->flags |= SRI_SCRIPT_KILL_SENT;
}
}
}
- ri->last_pong_time = mstime();
+ link->last_pong_time = mstime();
}
/* This is called when we get the reply about the PUBLISH command we send
* to the master to advertise this sentinel. */
void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Only update pub_time if we actually published our message. Otherwise
@@ -2072,7 +2318,7 @@ void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privd
* or sent directly to this sentinel via the (fake) PUBLISH command of Sentinel.
*
* If the master name specified in the message is not known, the message is
- * discareded. */
+ * discarded. */
void sentinelProcessHelloMessage(char *hello, int hello_len) {
/* Format is composed of 8 tokens:
* 0=ip,1=port,2=runid,3=current_epoch,4=master_name,
@@ -2097,25 +2343,39 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (!si) {
/* If not, remove all the sentinels that have the same runid
- * OR the same ip/port, because it's either a restart or a
- * network topology change. */
- removed = removeMatchingSentinelsFromMaster(master,token[0],port,
- token[2]);
+ * because there was an address change, and add the same Sentinel
+ * with the new address back. */
+ removed = removeMatchingSentinelFromMaster(master,token[2]);
if (removed) {
- sentinelEvent(REDIS_NOTICE,"-dup-sentinel",master,
- "%@ #duplicate of %s:%d or %s",
- token[0],port,token[2]);
+ sentinelEvent(LL_NOTICE,"+sentinel-address-switch",master,
+ "%@ ip %s port %d for %s", token[0],port,token[2]);
+ } else {
+ /* Check if there is another Sentinel with the same address this
+ * new one is reporting. What we do if this happens is to set its
+ * port to 0, to signal the address is invalid. We'll update it
+ * later if we get an HELLO message. */
+ sentinelRedisInstance *other =
+ getSentinelRedisInstanceByAddrAndRunID(
+ master->sentinels, token[0],port,NULL);
+ if (other) {
+ sentinelEvent(LL_NOTICE,"+sentinel-invalid-addr",other,"%@");
+ other->addr->port = 0; /* It means: invalid address. */
+ sentinelUpdateSentinelAddressInAllMasters(other);
+ }
}
/* Add the new sentinel. */
- si = createSentinelRedisInstance(NULL,SRI_SENTINEL,
+ si = createSentinelRedisInstance(token[2],SRI_SENTINEL,
token[0],port,master->quorum,master);
+
if (si) {
- sentinelEvent(REDIS_NOTICE,"+sentinel",si,"%@");
+ if (!removed) sentinelEvent(LL_NOTICE,"+sentinel",si,"%@");
/* The runid is NULL after a new instance creation and
* for Sentinels we don't have a later chance to fill it,
* so do it now. */
si->runid = sdsnew(token[2]);
+ sentinelTryConnectionSharing(si);
+ if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
sentinelFlushConfig();
}
}
@@ -2124,20 +2384,20 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (current_epoch > sentinel.current_epoch) {
sentinel.current_epoch = current_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
}
/* Update master info if received configuration is newer. */
- if (master->config_epoch < master_config_epoch) {
+ if (si && master->config_epoch < master_config_epoch) {
master->config_epoch = master_config_epoch;
if (master_port != master->addr->port ||
strcmp(master->addr->ip, token[5]))
{
sentinelAddr *old_addr;
- sentinelEvent(REDIS_WARNING,"+config-update-from",si,"%@");
- sentinelEvent(REDIS_WARNING,"+switch-master",
+ sentinelEvent(LL_WARNING,"+config-update-from",si,"%@");
+ sentinelEvent(LL_WARNING,"+switch-master",
master,"%s %s %d %s %d",
master->name,
master->addr->ip, master->addr->port,
@@ -2164,8 +2424,9 @@ cleanup:
/* This is our Pub/Sub callback for the Hello channel. It's useful in order
* to discover other sentinels attached at the same master. */
void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
redisReply *r;
+ UNUSED(c);
if (!reply || !ri) return;
r = reply;
@@ -2173,7 +2434,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
/* Update the last activity in the pubsub channel. Note that since we
* receive our messages as well this timestamp can be used to detect
* if the link is probably disconnected even if it seems otherwise. */
- ri->pc_last_activity = mstime();
+ ri->link->pc_last_activity = mstime();
/* Sanity check in the reply we expect, so that the code that follows
* can avoid to check for details. */
@@ -2185,7 +2446,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
strcmp(r->element[0]->str,"message") != 0) return;
/* We are not interested in meeting ourselves */
- if (strstr(r->element[2]->str,server.runid) != NULL) return;
+ if (strstr(r->element[2]->str,sentinel.myid) != NULL) return;
sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
}
@@ -2199,34 +2460,46 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
* sentinel_ip,sentinel_port,sentinel_runid,current_epoch,
* master_name,master_ip,master_port,master_config_epoch.
*
- * Returns REDIS_OK if the PUBLISH was queued correctly, otherwise
- * REDIS_ERR is returned. */
+ * Returns C_OK if the PUBLISH was queued correctly, otherwise
+ * C_ERR is returned. */
int sentinelSendHello(sentinelRedisInstance *ri) {
- char ip[REDIS_IP_STR_LEN];
- char payload[REDIS_IP_STR_LEN+1024];
+ char ip[NET_IP_STR_LEN];
+ char payload[NET_IP_STR_LEN+1024];
int retval;
+ char *announce_ip;
+ int announce_port;
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ? ri : ri->master;
sentinelAddr *master_addr = sentinelGetCurrentMasterAddress(master);
- /* Try to obtain our own IP address. */
- if (anetSockName(ri->cc->c.fd,ip,sizeof(ip),NULL) == -1) return REDIS_ERR;
- if (ri->flags & SRI_DISCONNECTED) return REDIS_ERR;
+ if (ri->link->disconnected) return C_ERR;
+
+ /* Use the specified announce address if specified, otherwise try to
+ * obtain our own IP address. */
+ if (sentinel.announce_ip) {
+ announce_ip = sentinel.announce_ip;
+ } else {
+ if (anetSockName(ri->link->cc->c.fd,ip,sizeof(ip),NULL) == -1)
+ return C_ERR;
+ announce_ip = ip;
+ }
+ announce_port = sentinel.announce_port ?
+ sentinel.announce_port : server.port;
/* Format and send the Hello message. */
snprintf(payload,sizeof(payload),
"%s,%d,%s,%llu," /* Info about this sentinel. */
"%s,%s,%d,%llu", /* Info about current master. */
- ip, server.port, server.runid,
+ announce_ip, announce_port, sentinel.myid,
(unsigned long long) sentinel.current_epoch,
/* --- */
master->name,master_addr->ip,master_addr->port,
(unsigned long long) master->config_epoch);
- retval = redisAsyncCommand(ri->cc,
- sentinelPublishReplyCallback, NULL, "PUBLISH %s %s",
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelPublishReplyCallback, ri, "PUBLISH %s %s",
SENTINEL_HELLO_CHANNEL,payload);
- if (retval != REDIS_OK) return REDIS_ERR;
- ri->pending_commands++;
- return REDIS_OK;
+ if (retval != C_OK) return C_ERR;
+ ri->link->pending_commands++;
+ return C_OK;
}
/* Reset last_pub_time in all the instances in the specified dictionary
@@ -2253,28 +2526,30 @@ void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
* Sentinel upgrades a configuration it is a good idea to deliever an update
* to the other Sentinels ASAP. */
int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
- if (!(master->flags & SRI_MASTER)) return REDIS_ERR;
+ if (!(master->flags & SRI_MASTER)) return C_ERR;
if (master->last_pub_time >= (SENTINEL_PUBLISH_PERIOD+1))
master->last_pub_time -= (SENTINEL_PUBLISH_PERIOD+1);
sentinelForceHelloUpdateDictOfRedisInstances(master->sentinels);
sentinelForceHelloUpdateDictOfRedisInstances(master->slaves);
- return REDIS_OK;
+ return C_OK;
}
-/* Send a PING to the specified instance and refresh the last_ping_time
+/* Send a PING to the specified instance and refresh the act_ping_time
* if it is zero (that is, if we received a pong for the previous ping).
*
* On error zero is returned, and we can't consider the PING command
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
- int retval = redisAsyncCommand(ri->cc,
- sentinelPingReplyCallback, NULL, "PING");
- if (retval == REDIS_OK) {
- ri->pending_commands++;
- /* We update the ping time only if we received the pong for
- * the previous ping, otherwise we are technically waiting
- * since the first ping that did not received a reply. */
- if (ri->last_ping_time == 0) ri->last_ping_time = mstime();
+ int retval = redisAsyncCommand(ri->link->cc,
+ sentinelPingReplyCallback, ri, "PING");
+ if (retval == C_OK) {
+ ri->link->pending_commands++;
+ ri->link->last_ping_time = mstime();
+ /* We update the active ping time only if we received the pong for
+ * the previous ping, otherwise we are technically waiting since the
+ * first ping that did not received a reply. */
+ if (ri->link->act_ping_time == 0)
+ ri->link->act_ping_time = ri->link->last_ping_time;
return 1;
} else {
return 0;
@@ -2290,7 +2565,7 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
/* Return ASAP if we have already a PING or INFO already pending, or
* in the case the instance is not properly connected. */
- if (ri->flags & SRI_DISCONNECTED) return;
+ if (ri->link->disconnected) return;
/* For INFO, PING, PUBLISH that are not critical commands to send we
* also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
@@ -2298,14 +2573,21 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
* properly (note that anyway there is a redundant protection about this,
* that is, the link will be disconnected and reconnected if a long
* timeout condition is detected. */
- if (ri->pending_commands >= SENTINEL_MAX_PENDING_COMMANDS) return;
+ if (ri->link->pending_commands >=
+ SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;
/* If this is a slave of a master in O_DOWN condition we start sending
* it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
* period. In this state we want to closely monitor slaves in case they
- * are turned into masters by another Sentinel, or by the sysadmin. */
+ * are turned into masters by another Sentinel, or by the sysadmin.
+ *
+ * Similarly we monitor the INFO output more often if the slave reports
+ * to be disconnected from the master, so that we can have a fresh
+ * disconnection time figure. */
if ((ri->flags & SRI_SLAVE) &&
- (ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) {
+ ((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||
+ (ri->master_link_down_time != 0)))
+ {
info_period = 1000;
} else {
info_period = SENTINEL_INFO_PERIOD;
@@ -2322,10 +2604,11 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
(now - ri->info_refresh) > info_period))
{
/* Send INFO to masters and slaves, not sentinels. */
- retval = redisAsyncCommand(ri->cc,
- sentinelInfoReplyCallback, NULL, "INFO");
- if (retval == REDIS_OK) ri->pending_commands++;
- } else if ((now - ri->last_pong_time) > ping_period) {
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelInfoReplyCallback, ri, "INFO");
+ if (retval == C_OK) ri->link->pending_commands++;
+ } else if ((now - ri->link->last_pong_time) > ping_period &&
+ (now - ri->link->last_ping_time) > ping_period/2) {
/* Send PING to all the three kinds of instances. */
sentinelSendPing(ri);
} else if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
@@ -2350,7 +2633,7 @@ const char *sentinelFailoverStateStr(int state) {
}
/* Redis instance to Redis protocol representation. */
-void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
+void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
char *flags = sdsempty();
void *mbl;
int fields = 0;
@@ -2379,7 +2662,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
if (ri->flags & SRI_MASTER) flags = sdscat(flags,"master,");
if (ri->flags & SRI_SLAVE) flags = sdscat(flags,"slave,");
if (ri->flags & SRI_SENTINEL) flags = sdscat(flags,"sentinel,");
- if (ri->flags & SRI_DISCONNECTED) flags = sdscat(flags,"disconnected,");
+ if (ri->link->disconnected) flags = sdscat(flags,"disconnected,");
if (ri->flags & SRI_MASTER_DOWN) flags = sdscat(flags,"master_down,");
if (ri->flags & SRI_FAILOVER_IN_PROGRESS)
flags = sdscat(flags,"failover_in_progress,");
@@ -2393,8 +2676,12 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
sdsfree(flags);
fields++;
- addReplyBulkCString(c,"pending-commands");
- addReplyBulkLongLong(c,ri->pending_commands);
+ addReplyBulkCString(c,"link-pending-commands");
+ addReplyBulkLongLong(c,ri->link->pending_commands);
+ fields++;
+
+ addReplyBulkCString(c,"link-refcount");
+ addReplyBulkLongLong(c,ri->link->refcount);
fields++;
if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
@@ -2405,15 +2692,15 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
addReplyBulkCString(c,"last-ping-sent");
addReplyBulkLongLong(c,
- ri->last_ping_time ? (mstime() - ri->last_ping_time) : 0);
+ ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
fields++;
addReplyBulkCString(c,"last-ok-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_avail_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_avail_time);
fields++;
addReplyBulkCString(c,"last-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_pong_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_pong_time);
fields++;
if (ri->flags & SRI_S_DOWN) {
@@ -2537,7 +2824,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
/* Output a number of instances contained inside a dictionary as
* Redis protocol. */
-void addReplyDictOfRedisInstances(redisClient *c, dict *instances) {
+void addReplyDictOfRedisInstances(client *c, dict *instances) {
dictIterator *di;
dictEntry *de;
@@ -2554,12 +2841,12 @@ void addReplyDictOfRedisInstances(redisClient *c, dict *instances) {
/* Lookup the named master into sentinel.masters.
* If the master is not found reply to the client with an error and returns
* NULL. */
-sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
+sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(client *c,
robj *name)
{
sentinelRedisInstance *ri;
- ri = dictFetchValue(sentinel.masters,c->argv[2]->ptr);
+ ri = dictFetchValue(sentinel.masters,name->ptr);
if (!ri) {
addReplyError(c,"No such master with that name");
return NULL;
@@ -2567,7 +2854,32 @@ sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
return ri;
}
-void sentinelCommand(redisClient *c) {
+#define SENTINEL_ISQR_OK 0
+#define SENTINEL_ISQR_NOQUORUM (1<<0)
+#define SENTINEL_ISQR_NOAUTH (1<<1)
+int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
+ dictIterator *di;
+ dictEntry *de;
+ int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
+ int result = SENTINEL_ISQR_OK;
+ int voters = dictSize(master->sentinels)+1; /* Known Sentinels + myself. */
+
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ usable++;
+ }
+ dictReleaseIterator(di);
+
+ if (usable < (int)master->quorum) result |= SENTINEL_ISQR_NOQUORUM;
+ if (usable < voters/2+1) result |= SENTINEL_ISQR_NOAUTH;
+ if (usableptr) *usableptr = usable;
+ return result;
+}
+
+void sentinelCommand(client *c) {
if (!strcasecmp(c->argv[1]->ptr,"masters")) {
/* SENTINEL MASTERS */
if (c->argc != 2) goto numargserr;
@@ -2597,7 +2909,23 @@ void sentinelCommand(redisClient *c) {
return;
addReplyDictOfRedisInstances(c,ri->sentinels);
} else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
- /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>*/
+ /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
+ *
+ * Arguments:
+ *
+ * ip and port are the ip and port of the master we want to be
+ * checked by Sentinel. Note that the command will not check by
+ * name but just by master, in theory different Sentinels may monitor
+ * differnet masters with the same name.
+ *
+ * current-epoch is needed in order to understand if we are allowed
+ * to vote for a failover leader or not. Each Sentinel can vote just
+ * one time per epoch.
+ *
+ * runid is "*" if we are not seeking for a vote from the Sentinel
+ * in order to elect the failover leader. Otherwise it is set to the
+ * runid we want the Sentinel to vote if it did not already voted.
+ */
sentinelRedisInstance *ri;
long long req_epoch;
uint64_t leader_epoch = 0;
@@ -2606,9 +2934,9 @@ void sentinelCommand(redisClient *c) {
int isdown = 0;
if (c->argc != 6) goto numargserr;
- if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != REDIS_OK ||
+ if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != C_OK ||
getLongLongFromObjectOrReply(c,c->argv[4],&req_epoch,NULL)
- != REDIS_OK)
+ != C_OK)
return;
ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
c->argv[2]->ptr,port,NULL);
@@ -2668,7 +2996,7 @@ void sentinelCommand(redisClient *c) {
addReplySds(c,sdsnew("-NOGOODSLAVE No suitable slave to promote\r\n"));
return;
}
- redisLog(REDIS_WARNING,"Executing user requested FAILOVER of '%s'",
+ serverLog(LL_WARNING,"Executing user requested FAILOVER of '%s'",
ri->name);
sentinelStartFailover(ri);
ri->flags |= SRI_FORCE_FAILOVER;
@@ -2682,17 +3010,23 @@ void sentinelCommand(redisClient *c) {
/* SENTINEL MONITOR <name> <ip> <port> <quorum> */
sentinelRedisInstance *ri;
long quorum, port;
- char buf[32];
+ char ip[NET_IP_STR_LEN];
if (c->argc != 6) goto numargserr;
if (getLongFromObjectOrReply(c,c->argv[5],&quorum,"Invalid quorum")
- != REDIS_OK) return;
+ != C_OK) return;
if (getLongFromObjectOrReply(c,c->argv[4],&port,"Invalid port")
- != REDIS_OK) return;
+ != C_OK) return;
+
+ if (quorum <= 0) {
+ addReplyError(c, "Quorum must be 1 or greater.");
+ return;
+ }
+
/* Make sure the IP field is actually a valid IP before passing it
* to createSentinelRedisInstance(), otherwise we may trigger a
* DNS lookup at runtime. */
- if (anetResolveIP(NULL,c->argv[3]->ptr,buf,sizeof(buf)) == ANET_ERR) {
+ if (anetResolveIP(NULL,c->argv[3]->ptr,ip,sizeof(ip)) == ANET_ERR) {
addReplyError(c,"Invalid IP address specified");
return;
}
@@ -2714,22 +3048,144 @@ void sentinelCommand(redisClient *c) {
}
} else {
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
addReply(c,shared.ok);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"flushconfig")) {
+ if (c->argc != 2) goto numargserr;
+ sentinelFlushConfig();
+ addReply(c,shared.ok);
+ return;
} else if (!strcasecmp(c->argv[1]->ptr,"remove")) {
/* SENTINEL REMOVE <name> */
sentinelRedisInstance *ri;
+ if (c->argc != 3) goto numargserr;
if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
== NULL) return;
- sentinelEvent(REDIS_WARNING,"-monitor",ri,"%@");
+ sentinelEvent(LL_WARNING,"-monitor",ri,"%@");
dictDelete(sentinel.masters,c->argv[2]->ptr);
sentinelFlushConfig();
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"ckquorum")) {
+ /* SENTINEL CKQUORUM <name> */
+ sentinelRedisInstance *ri;
+ int usable;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ int result = sentinelIsQuorumReachable(ri,&usable);
+ if (result == SENTINEL_ISQR_OK) {
+ addReplySds(c, sdscatfmt(sdsempty(),
+ "+OK %i usable Sentinels. Quorum and failover authorization "
+ "can be reached\r\n",usable));
+ } else {
+ sds e = sdscatfmt(sdsempty(),
+ "-NOQUORUM %i usable Sentinels. ",usable);
+ if (result & SENTINEL_ISQR_NOQUORUM)
+ e = sdscat(e,"Not enough available Sentinels to reach the"
+ " specified quorum for this master");
+ if (result & SENTINEL_ISQR_NOAUTH) {
+ if (result & SENTINEL_ISQR_NOQUORUM) e = sdscat(e,". ");
+ e = sdscat(e, "Not enough available Sentinels to reach the"
+ " majority and authorize a failover");
+ }
+ e = sdscat(e,"\r\n");
+ addReplySds(c,e);
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
sentinelSetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
+ /* SENTINEL INFO-CACHE <name> */
+ if (c->argc < 2) goto numargserr;
+ mstime_t now = mstime();
+
+ /* Create an ad-hoc dictionary type so that we can iterate
+ * a dictionary composed of just the master groups the user
+ * requested. */
+ dictType copy_keeper = instancesDictType;
+ copy_keeper.valDestructor = NULL;
+ dict *masters_local = sentinel.masters;
+ if (c->argc > 2) {
+ masters_local = dictCreate(&copy_keeper, NULL);
+
+ for (int i = 2; i < c->argc; i++) {
+ sentinelRedisInstance *ri;
+ ri = sentinelGetMasterByName(c->argv[i]->ptr);
+ if (!ri) continue; /* ignore non-existing names */
+ dictAdd(masters_local, ri->name, ri);
+ }
+ }
+
+ /* Reply format:
+ * 1.) master name
+ * 2.) 1.) info from master
+ * 2.) info from replica
+ * ...
+ * 3.) other master name
+ * ...
+ */
+ addReplyMultiBulkLen(c,dictSize(masters_local) * 2);
+
+ dictIterator *di;
+ dictEntry *de;
+ di = dictGetIterator(masters_local);
+ while ((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+ addReplyBulkCBuffer(c,ri->name,strlen(ri->name));
+ addReplyMultiBulkLen(c,dictSize(ri->slaves) + 1); /* +1 for self */
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - ri->info_refresh);
+ if (ri->info)
+ addReplyBulkCBuffer(c,ri->info,sdslen(ri->info));
+ else
+ addReply(c,shared.nullbulk);
+
+ dictIterator *sdi;
+ dictEntry *sde;
+ sdi = dictGetIterator(ri->slaves);
+ while ((sde = dictNext(sdi)) != NULL) {
+ sentinelRedisInstance *sri = dictGetVal(sde);
+ addReplyMultiBulkLen(c,2);
+ addReplyLongLong(c, now - sri->info_refresh);
+ if (sri->info)
+ addReplyBulkCBuffer(c,sri->info,sdslen(sri->info));
+ else
+ addReply(c,shared.nullbulk);
+ }
+ dictReleaseIterator(sdi);
+ }
+ dictReleaseIterator(di);
+ if (masters_local != sentinel.masters) dictRelease(masters_local);
+ } else if (!strcasecmp(c->argv[1]->ptr,"simulate-failure")) {
+ /* SENTINEL SIMULATE-FAILURE <flag> <flag> ... <flag> */
+ int j;
+
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ for (j = 2; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"crash-after-election")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after being successfully elected as failover "
+ "leader");
+ } else if (!strcasecmp(c->argv[j]->ptr,"crash-after-promotion")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after promoting the selected slave to master");
+ } else if (!strcasecmp(c->argv[j]->ptr,"help")) {
+ addReplyMultiBulkLen(c,2);
+ addReplyBulkCString(c,"crash-after-election");
+ addReplyBulkCString(c,"crash-after-promotion");
+ } else {
+ addReplyError(c,"Unknown failure simulation specified");
+ return;
+ }
+ }
+ addReply(c,shared.ok);
} else {
addReplyErrorFormat(c,"Unknown sentinel subcommand '%s'",
(char*)c->argv[1]->ptr);
@@ -2741,26 +3197,41 @@ numargserr:
(char*)c->argv[1]->ptr);
}
-/* SENTINEL INFO [section] */
-void sentinelInfoCommand(redisClient *c) {
- char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
- sds info = sdsempty();
- int defsections = !strcasecmp(section,"default");
- int sections = 0;
+#define info_section_from_redis(section_name) do { \
+ if (defsections || allsections || !strcasecmp(section,section_name)) { \
+ sds redissection; \
+ if (sections++) info = sdscat(info,"\r\n"); \
+ redissection = genRedisInfoString(section_name); \
+ info = sdscatlen(info,redissection,sdslen(redissection)); \
+ sdsfree(redissection); \
+ } \
+} while(0)
+/* SENTINEL INFO [section] */
+void sentinelInfoCommand(client *c) {
if (c->argc > 2) {
addReply(c,shared.syntaxerr);
return;
}
- if (!strcasecmp(section,"server") || defsections) {
- if (sections++) info = sdscat(info,"\r\n");
- sds serversection = genRedisInfoString("server");
- info = sdscatlen(info,serversection,sdslen(serversection));
- sdsfree(serversection);
+ int defsections = 0, allsections = 0;
+ char *section = c->argc == 2 ? c->argv[1]->ptr : NULL;
+ if (section) {
+ allsections = !strcasecmp(section,"all");
+ defsections = !strcasecmp(section,"default");
+ } else {
+ defsections = 1;
}
- if (!strcasecmp(section,"sentinel") || defsections) {
+ int sections = 0;
+ sds info = sdsempty();
+
+ info_section_from_redis("server");
+ info_section_from_redis("clients");
+ info_section_from_redis("cpu");
+ info_section_from_redis("stats");
+
+ if (defsections || allsections || !strcasecmp(section,"sentinel")) {
dictIterator *di;
dictEntry *de;
int master_id = 0;
@@ -2771,11 +3242,13 @@ void sentinelInfoCommand(redisClient *c) {
"sentinel_masters:%lu\r\n"
"sentinel_tilt:%d\r\n"
"sentinel_running_scripts:%d\r\n"
- "sentinel_scripts_queue_length:%ld\r\n",
+ "sentinel_scripts_queue_length:%ld\r\n"
+ "sentinel_simulate_failure_flags:%lu\r\n",
dictSize(sentinel.masters),
sentinel.tilt,
sentinel.running_scripts,
- listLength(sentinel.scripts_queue));
+ listLength(sentinel.scripts_queue),
+ sentinel.simfailure_flags);
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -2795,15 +3268,12 @@ void sentinelInfoCommand(redisClient *c) {
dictReleaseIterator(di);
}
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, info);
}
/* Implements Sentinel verison of the ROLE command. The output is
* "sentinel" and the list of currently monitored master names. */
-void sentinelRoleCommand(redisClient *c) {
+void sentinelRoleCommand(client *c) {
dictIterator *di;
dictEntry *de;
@@ -2821,7 +3291,7 @@ void sentinelRoleCommand(redisClient *c) {
}
/* SENTINEL SET <mastername> [<option> <value> ...] */
-void sentinelSetCommand(redisClient *c) {
+void sentinelSetCommand(client *c) {
sentinelRedisInstance *ri;
int j, changes = 0;
char *option, *value;
@@ -2838,20 +3308,20 @@ void sentinelSetCommand(redisClient *c) {
if (!strcasecmp(option,"down-after-milliseconds")) {
/* down-after-millisecodns <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
goto badfmt;
ri->down_after_period = ll;
sentinelPropagateDownAfterPeriod(ri);
changes++;
} else if (!strcasecmp(option,"failover-timeout")) {
/* failover-timeout <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
goto badfmt;
ri->failover_timeout = ll;
changes++;
} else if (!strcasecmp(option,"parallel-syncs")) {
/* parallel-syncs <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
goto badfmt;
ri->parallel_syncs = ll;
changes++;
@@ -2885,7 +3355,7 @@ void sentinelSetCommand(redisClient *c) {
changes++;
} else if (!strcasecmp(option,"quorum")) {
/* quorum <count> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0)
goto badfmt;
ri->quorum = ll;
changes++;
@@ -2895,7 +3365,7 @@ void sentinelSetCommand(redisClient *c) {
if (changes) sentinelFlushConfig();
return;
}
- sentinelEvent(REDIS_WARNING,"+set",ri,"%@ %s %s",option,value);
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",option,value);
}
if (changes) sentinelFlushConfig();
@@ -2914,7 +3384,7 @@ badfmt: /* Bad format errors */
*
* Because we have a Sentinel PUBLISH, the code to send hello messages is the same
* for all the three kind of instances: masters, slaves, sentinels. */
-void sentinelPublishCommand(redisClient *c) {
+void sentinelPublishCommand(client *c) {
if (strcmp(c->argv[1]->ptr,SENTINEL_HELLO_CHANNEL)) {
addReplyError(c, "Only HELLO messages are accepted by Sentinel instances.");
return;
@@ -2929,8 +3399,10 @@ void sentinelPublishCommand(redisClient *c) {
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = 0;
- if (ri->last_ping_time)
- elapsed = mstime() - ri->last_ping_time;
+ if (ri->link->act_ping_time)
+ elapsed = mstime() - ri->link->act_ping_time;
+ else if (ri->link->disconnected)
+ elapsed = mstime() - ri->link->last_avail_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
@@ -2938,15 +3410,16 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* 1) Check if the command link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
* pending ping for more than half the timeout. */
- if (ri->cc &&
- (mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- ri->last_ping_time != 0 && /* Ther is a pending ping... */
+ if (ri->link->cc &&
+ (mstime() - ri->link->cc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ ri->link->act_ping_time != 0 && /* Ther is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
- (mstime() - ri->last_ping_time) > (ri->down_after_period/2) &&
- (mstime() - ri->last_pong_time) > (ri->down_after_period/2))
+ (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
+ (mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
{
- sentinelKillLink(ri,ri->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
}
/* 2) Check if the pubsub link seems connected, was connected not less
@@ -2954,11 +3427,12 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* activity in the Pub/Sub channel for more than
* SENTINEL_PUBLISH_PERIOD * 3.
*/
- if (ri->pc &&
- (mstime() - ri->pc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- (mstime() - ri->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
+ if (ri->link->pc &&
+ (mstime() - ri->link->pc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ (mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
{
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
}
/* Update the SDOWN flag. We believe the instance is SDOWN if:
@@ -2975,14 +3449,14 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
{
/* Is subjectively down */
if ((ri->flags & SRI_S_DOWN) == 0) {
- sentinelEvent(REDIS_WARNING,"+sdown",ri,"%@");
+ sentinelEvent(LL_WARNING,"+sdown",ri,"%@");
ri->s_down_since_time = mstime();
ri->flags |= SRI_S_DOWN;
}
} else {
/* Is subjectively up */
if (ri->flags & SRI_S_DOWN) {
- sentinelEvent(REDIS_WARNING,"-sdown",ri,"%@");
+ sentinelEvent(LL_WARNING,"-sdown",ri,"%@");
ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
}
}
@@ -2997,7 +3471,7 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
dictIterator *di;
dictEntry *de;
- int quorum = 0, odown = 0;
+ unsigned int quorum = 0, odown = 0;
if (master->flags & SRI_S_DOWN) {
/* Is down for enough sentinels? */
@@ -3016,14 +3490,14 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
/* Set the flag accordingly to the outcome. */
if (odown) {
if ((master->flags & SRI_O_DOWN) == 0) {
- sentinelEvent(REDIS_WARNING,"+odown",master,"%@ #quorum %d/%d",
+ sentinelEvent(LL_WARNING,"+odown",master,"%@ #quorum %d/%d",
quorum, master->quorum);
master->flags |= SRI_O_DOWN;
master->o_down_since_time = mstime();
}
} else {
if (master->flags & SRI_O_DOWN) {
- sentinelEvent(REDIS_WARNING,"-odown",master,"%@");
+ sentinelEvent(LL_WARNING,"-odown",master,"%@");
master->flags &= ~SRI_O_DOWN;
}
}
@@ -3032,11 +3506,12 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
/* Receive the SENTINEL is-master-down-by-addr reply, see the
* sentinelAskMasterStateToOtherSentinels() function for more information. */
void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Ignore every error or unexpected reply.
@@ -3057,8 +3532,8 @@ void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *p
/* If the runid in the reply is not "*" the Sentinel actually
* replied with a vote. */
sdsfree(ri->leader);
- if (ri->leader_epoch != r->element[2]->integer)
- redisLog(REDIS_WARNING,
+ if ((long long)ri->leader_epoch != r->element[2]->integer)
+ serverLog(LL_WARNING,
"%s voted for %s %llu", ri->name,
r->element[1]->str,
(unsigned long long) r->element[2]->integer);
@@ -3097,27 +3572,34 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
* 2) Sentinel is connected.
* 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
if ((master->flags & SRI_S_DOWN) == 0) continue;
- if (ri->flags & SRI_DISCONNECTED) continue;
+ if (ri->link->disconnected) continue;
if (!(flags & SENTINEL_ASK_FORCED) &&
mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
continue;
/* Ask */
ll2string(port,sizeof(port),master->addr->port);
- retval = redisAsyncCommand(ri->cc,
- sentinelReceiveIsMasterDownReply, NULL,
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelReceiveIsMasterDownReply, ri,
"SENTINEL is-master-down-by-addr %s %s %llu %s",
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
- server.runid : "*");
- if (retval == REDIS_OK) ri->pending_commands++;
+ sentinel.myid : "*");
+ if (retval == C_OK) ri->link->pending_commands++;
}
dictReleaseIterator(di);
}
/* =============================== FAILOVER ================================= */
+/* Crash because of user request via SENTINEL simulate-failure command. */
+void sentinelSimFailureCrash(void) {
+ serverLog(LL_WARNING,
+ "Sentinel CRASH because of SENTINEL simulate-failure");
+ exit(99);
+}
+
/* Vote for the sentinel with 'req_runid' or return the old vote if already
* voted for the specifed 'req_epoch' or one greater.
*
@@ -3127,7 +3609,7 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
if (req_epoch > sentinel.current_epoch) {
sentinel.current_epoch = req_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
}
@@ -3137,12 +3619,12 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
master->leader = sdsnew(req_runid);
master->leader_epoch = sentinel.current_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+vote-for-leader",master,"%s %llu",
+ sentinelEvent(LL_WARNING,"+vote-for-leader",master,"%s %llu",
master->leader, (unsigned long long) master->leader_epoch);
/* If we did not voted for ourselves, set the master failover start
* time to now, in order to force a delay before we can start a
* failover for the same master. */
- if (strcasecmp(master->leader,server.runid))
+ if (strcasecmp(master->leader,sentinel.myid))
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
}
@@ -3158,16 +3640,16 @@ struct sentinelLeader {
/* Helper function for sentinelGetLeader, increment the counter
* relative to the specified runid. */
int sentinelLeaderIncr(dict *counters, char *runid) {
- dictEntry *de = dictFind(counters,runid);
+ dictEntry *existing, *de;
uint64_t oldval;
- if (de) {
- oldval = dictGetUnsignedIntegerVal(de);
- dictSetUnsignedIntegerVal(de,oldval+1);
+ de = dictAddRaw(counters,runid,&existing);
+ if (existing) {
+ oldval = dictGetUnsignedIntegerVal(existing);
+ dictSetUnsignedIntegerVal(existing,oldval+1);
return oldval+1;
} else {
- de = dictAddRaw(counters,runid);
- redisAssert(de != NULL);
+ serverAssert(de != NULL);
dictSetUnsignedIntegerVal(de,1);
return 1;
}
@@ -3176,9 +3658,9 @@ int sentinelLeaderIncr(dict *counters, char *runid) {
/* Scan all the Sentinels attached to this master to check if there
* is a leader for the specified epoch.
*
- * To be a leader for a given epoch, we should have the majorify of
- * the Sentinels we know that reported the same instance as
- * leader for the same epoch. */
+ * To be a leader for a given epoch, we should have the majority of
+ * the Sentinels we know (ever seen since the last SENTINEL RESET) that
+ * reported the same instance as leader for the same epoch. */
char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
dict *counters;
dictIterator *di;
@@ -3189,16 +3671,17 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
uint64_t leader_epoch;
uint64_t max_votes = 0;
- redisAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
+ serverAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
counters = dictCreate(&leaderVotesDictType,NULL);
+ voters = dictSize(master->sentinels)+1; /* All the other sentinels and me.*/
+
/* Count other sentinels votes */
di = dictGetIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
if (ri->leader != NULL && ri->leader_epoch == sentinel.current_epoch)
sentinelLeaderIncr(counters,ri->leader);
- voters++;
}
dictReleaseIterator(di);
@@ -3222,7 +3705,7 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
if (winner)
myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
else
- myvote = sentinelVoteLeader(master,epoch,server.runid,&leader_epoch);
+ myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);
if (myvote && leader_epoch == epoch) {
uint64_t votes = sentinelLeaderIncr(counters,myvote);
@@ -3232,7 +3715,6 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
winner = myvote;
}
}
- voters++; /* Anyway, count me as one of the voters. */
voters_quorum = voters/2+1;
if (winner && (max_votes < voters_quorum || max_votes < master->quorum))
@@ -3251,8 +3733,8 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
*
* If Host is NULL the function sends "SLAVEOF NO ONE".
*
- * The command returns REDIS_OK if the SLAVEOF command was accepted for
- * (later) delivery otherwise REDIS_ERR. The command replies are just
+ * The command returns C_OK if the SLAVEOF command was accepted for
+ * (later) delivery otherwise C_ERR. The command replies are just
* discarded. */
int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
char portstr[32];
@@ -3277,49 +3759,49 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
*
* Note that we don't check the replies returned by commands, since we
* will observe instead the effects in the next INFO output. */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "MULTI");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "MULTI");
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "SLAVEOF %s %s", host, portstr);
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "CONFIG REWRITE");
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
/* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
* however sending it to an instance not understanding this command is not
* an issue because CLIENT is variadic command, so Redis will not
* recognized as a syntax error, and the transaction will not fail (but
* only the unsupported command will fail). */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CLIENT KILL TYPE normal");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "CLIENT KILL TYPE normal");
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "EXEC");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "EXEC");
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
- return REDIS_OK;
+ return C_OK;
}
/* Setup the master state to start a failover. */
void sentinelStartFailover(sentinelRedisInstance *master) {
- redisAssert(master->flags & SRI_MASTER);
+ serverAssert(master->flags & SRI_MASTER);
master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
master->flags |= SRI_FAILOVER_IN_PROGRESS;
master->failover_epoch = ++sentinel.current_epoch;
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
- sentinelEvent(REDIS_WARNING,"+try-failover",master,"%@");
+ sentinelEvent(LL_WARNING,"+try-failover",master,"%@");
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
master->failover_state_change_time = mstime();
}
@@ -3354,7 +3836,7 @@ int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
ctime_r(&clock,ctimebuf);
ctimebuf[24] = '\0'; /* Remove newline. */
master->failover_delay_logged = master->failover_start_time;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Next failover delay: I will not start a failover before %s",
ctimebuf);
}
@@ -3406,11 +3888,11 @@ int compareSlavesForPromotion(const void *a, const void *b) {
return (*sa)->slave_priority - (*sb)->slave_priority;
/* If priority is the same, select the slave with greater replication
- * offset (processed more data frmo the master). */
+ * offset (processed more data from the master). */
if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
return -1; /* a < b */
} else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
- return 1; /* b > a */
+ return 1; /* a > b */
}
/* If the replication offset is the same select the slave with that has
@@ -3443,8 +3925,9 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
mstime_t info_validity_time;
- if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN|SRI_DISCONNECTED)) continue;
- if (mstime() - slave->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
+ if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ if (slave->link->disconnected) continue;
+ if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
if (slave->slave_priority == 0) continue;
/* If the master is in SDOWN state we get INFO for slaves every second.
@@ -3475,7 +3958,7 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
/* Check if we are the leader for the failover epoch. */
leader = sentinelGetLeader(ri, ri->failover_epoch);
- isleader = leader && strcasecmp(leader,server.runid) == 0;
+ isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
sdsfree(leader);
/* If I'm not the leader, and it is not a forced failover via
@@ -3489,15 +3972,17 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
election_timeout = ri->failover_timeout;
/* Abort the failover if I'm not the leader after some time. */
if (mstime() - ri->failover_start_time > election_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-not-elected",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-not-elected",ri,"%@");
sentinelAbortFailover(ri);
}
return;
}
- sentinelEvent(REDIS_WARNING,"+elected-leader",ri,"%@");
+ sentinelEvent(LL_WARNING,"+elected-leader",ri,"%@");
+ if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION)
+ sentinelSimFailureCrash();
ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
ri->failover_state_change_time = mstime();
- sentinelEvent(REDIS_WARNING,"+failover-state-select-slave",ri,"%@");
+ sentinelEvent(LL_WARNING,"+failover-state-select-slave",ri,"%@");
}
void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
@@ -3506,15 +3991,15 @@ void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
/* We don't handle the timeout in this state as the function aborts
* the failover or go forward in the next state. */
if (slave == NULL) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-no-good-slave",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-no-good-slave",ri,"%@");
sentinelAbortFailover(ri);
} else {
- sentinelEvent(REDIS_WARNING,"+selected-slave",slave,"%@");
+ sentinelEvent(LL_WARNING,"+selected-slave",slave,"%@");
slave->flags |= SRI_PROMOTED;
ri->promoted_slave = slave;
ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
ri->failover_state_change_time = mstime();
- sentinelEvent(REDIS_NOTICE,"+failover-state-send-slaveof-noone",
+ sentinelEvent(LL_NOTICE,"+failover-state-send-slaveof-noone",
slave, "%@");
}
}
@@ -3525,9 +4010,9 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
/* We can't send the command to the promoted slave if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover. */
- if (ri->promoted_slave->flags & SRI_DISCONNECTED) {
+ if (ri->promoted_slave->link->disconnected) {
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
}
return;
@@ -3538,8 +4023,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
* really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave). */
retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
- if (retval != REDIS_OK) return;
- sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
+ if (retval != C_OK) return;
+ sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
ri->failover_state_change_time = mstime();
@@ -3551,7 +4036,7 @@ void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
/* Just handle the timeout. Switching to the next state is handled
* by the function parsing the INFO command of the promoted slave. */
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
}
}
@@ -3583,11 +4068,11 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (elapsed > master->failover_timeout) {
not_reconfigured = 0;
timeout = 1;
- sentinelEvent(REDIS_WARNING,"+failover-end-for-timeout",master,"%@");
+ sentinelEvent(LL_WARNING,"+failover-end-for-timeout",master,"%@");
}
if (not_reconfigured == 0) {
- sentinelEvent(REDIS_WARNING,"+failover-end",master,"%@");
+ sentinelEvent(LL_WARNING,"+failover-end",master,"%@");
master->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG;
master->failover_state_change_time = mstime();
}
@@ -3604,14 +4089,14 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
int retval;
- if (slave->flags &
- (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
+ if (slave->flags & (SRI_RECONF_DONE|SRI_RECONF_SENT)) continue;
+ if (slave->link->disconnected) continue;
retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->ip,
master->promoted_slave->addr->port);
- if (retval == REDIS_OK) {
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
+ if (retval == C_OK) {
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent-be",slave,"%@");
slave->flags |= SRI_RECONF_SENT;
}
}
@@ -3653,24 +4138,24 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
(mstime() - slave->slave_reconf_sent_time) >
SENTINEL_SLAVE_RECONF_TIMEOUT)
{
- sentinelEvent(REDIS_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
+ sentinelEvent(LL_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
slave->flags &= ~SRI_RECONF_SENT;
slave->flags |= SRI_RECONF_DONE;
}
/* Nothing to do for instances that are disconnected or already
* in RECONF_SENT state. */
- if (slave->flags & (SRI_DISCONNECTED|SRI_RECONF_SENT|SRI_RECONF_INPROG))
- continue;
+ if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
+ if (slave->link->disconnected) continue;
/* Send SLAVEOF <new master>. */
retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->ip,
master->promoted_slave->addr->port);
- if (retval == REDIS_OK) {
+ if (retval == C_OK) {
slave->flags |= SRI_RECONF_SENT;
slave->slave_reconf_sent_time = mstime();
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent",slave,"%@");
in_progress++;
}
}
@@ -3687,7 +4172,7 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
sentinelRedisInstance *ref = master->promoted_slave ?
master->promoted_slave : master;
- sentinelEvent(REDIS_WARNING,"+switch-master",master,"%s %s %d %s %d",
+ sentinelEvent(LL_WARNING,"+switch-master",master,"%s %s %d %s %d",
master->name, master->addr->ip, master->addr->port,
ref->addr->ip, ref->addr->port);
@@ -3695,7 +4180,7 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
}
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
- redisAssert(ri->flags & SRI_MASTER);
+ serverAssert(ri->flags & SRI_MASTER);
if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
@@ -3724,8 +4209,8 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
* the slave -> master switch. Otherwise the failover can't be aborted and
* will reach its end (possibly by timeout). */
void sentinelAbortFailover(sentinelRedisInstance *ri) {
- redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
- redisAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
+ serverAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
+ serverAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_FORCE_FAILOVER);
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
@@ -3755,7 +4240,7 @@ void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
if (sentinel.tilt) {
if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;
sentinel.tilt = 0;
- sentinelEvent(REDIS_WARNING,"-tilt",NULL,"#tilt mode exited");
+ sentinelEvent(LL_WARNING,"-tilt",NULL,"#tilt mode exited");
}
/* Every kind of instance */
@@ -3828,7 +4313,7 @@ void sentinelCheckTiltCondition(void) {
if (delta < 0 || delta > SENTINEL_TILT_TRIGGER) {
sentinel.tilt = 1;
sentinel.tilt_start_time = mstime();
- sentinelEvent(REDIS_WARNING,"+tilt",NULL,"#tilt mode entered");
+ sentinelEvent(LL_WARNING,"+tilt",NULL,"#tilt mode entered");
}
sentinel.previous_time = mstime();
}
@@ -3846,6 +4331,6 @@ void sentinelTimer(void) {
* exactly continue to stay synchronized asking to be voted at the
* same time again and again (resulting in nobody likely winning the
* election because of split brain voting). */
- server.hz = REDIS_DEFAULT_HZ + rand() % REDIS_DEFAULT_HZ;
+ server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ;
}
diff --git a/src/redis.c b/src/server.c
index 5ddbc9250..2da6fb544 100644
--- a/src/redis.c
+++ b/src/server.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,10 +27,12 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include "slowlog.h"
#include "bio.h"
+#include "latency.h"
+#include "atomicvar.h"
#include <time.h>
#include <signal.h>
@@ -45,12 +47,14 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/uio.h>
+#include <sys/un.h>
#include <limits.h>
#include <float.h>
#include <math.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#include <locale.h>
+#include <sys/socket.h>
/* Our shared "common" objects */
@@ -65,8 +69,8 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan;
/*================================= Globals ================================= */
/* Global vars */
-struct redisServer server; /* server global state */
-struct redisCommand *commandTable;
+struct redisServer server; /* Server global state */
+volatile unsigned long lru_clock; /* Server global current LRU time. */
/* Our command table.
*
@@ -121,6 +125,7 @@ struct redisCommand *commandTable;
* are not fast commands.
*/
struct redisCommand redisCommandTable[] = {
+ {"module",moduleCommand,-2,"as",0,NULL,1,1,1,0,0},
{"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
{"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
{"setnx",setnxCommand,3,"wmF",0,NULL,1,1,1,0,0},
@@ -129,23 +134,25 @@ struct redisCommand redisCommandTable[] = {
{"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
{"strlen",strlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"del",delCommand,-2,"w",0,NULL,1,-1,1,0,0},
- {"exists",existsCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"unlink",unlinkCommand,-2,"wF",0,NULL,1,-1,1,0,0},
+ {"exists",existsCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getbit",getbitCommand,3,"rF",0,NULL,1,1,1,0,0},
+ {"bitfield",bitfieldCommand,-2,"wm",0,NULL,1,1,1,0,0},
{"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"incr",incrCommand,2,"wmF",0,NULL,1,1,1,0,0},
{"decr",decrCommand,2,"wmF",0,NULL,1,1,1,0,0},
- {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
+ {"mget",mgetCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"rpush",rpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"lpush",lpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
- {"rpushx",rpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
- {"lpushx",lpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
+ {"rpushx",rpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
+ {"lpushx",lpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
{"rpop",rpopCommand,2,"wF",0,NULL,1,1,1,0,0},
{"lpop",lpopCommand,2,"wF",0,NULL,1,1,1,0,0},
- {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
+ {"brpop",brpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
{"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"llen",llenCommand,2,"rF",0,NULL,1,1,1,0,0},
@@ -160,7 +167,7 @@ struct redisCommand redisCommandTable[] = {
{"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
{"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"spop",spopCommand,2,"wRsF",0,NULL,1,1,1,0,0},
+ {"spop",spopCommand,-2,"wRF",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
@@ -191,15 +198,16 @@ struct redisCommand redisCommandTable[] = {
{"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
- {"hset",hsetCommand,4,"wmF",0,NULL,1,1,1,0,0},
+ {"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
{"hsetnx",hsetnxCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hget",hgetCommand,3,"rF",0,NULL,1,1,1,0,0},
- {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
- {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
+ {"hmset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
+ {"hmget",hmgetCommand,-3,"rF",0,NULL,1,1,1,0,0},
{"hincrby",hincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hincrbyfloat",hincrbyfloatCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hdel",hdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"hlen",hlenCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"hstrlen",hstrlenCommand,3,"rF",0,NULL,1,1,1,0,0},
{"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
@@ -212,7 +220,8 @@ struct redisCommand redisCommandTable[] = {
{"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
- {"select",selectCommand,2,"rlF",0,NULL,0,0,0,0,0},
+ {"select",selectCommand,2,"lF",0,NULL,0,0,0,0,0},
+ {"swapdb",swapdbCommand,3,"wF",0,NULL,0,0,0,0,0},
{"move",moveCommand,3,"wF",0,NULL,1,1,1,0,0},
{"rename",renameCommand,3,"w",0,NULL,1,2,1,0,0},
{"renamenx",renamenxCommand,3,"wF",0,NULL,1,2,1,0,0},
@@ -223,81 +232,91 @@ struct redisCommand redisCommandTable[] = {
{"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
{"scan",scanCommand,-2,"rR",0,NULL,0,0,0,0,0},
{"dbsize",dbsizeCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"auth",authCommand,2,"rsltF",0,NULL,0,0,0,0,0},
- {"ping",pingCommand,-1,"rtF",0,NULL,0,0,0,0,0},
- {"echo",echoCommand,2,"rF",0,NULL,0,0,0,0,0},
- {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
- {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
- {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
- {"shutdown",shutdownCommand,-1,"arlt",0,NULL,0,0,0,0,0},
- {"lastsave",lastsaveCommand,1,"rRF",0,NULL,0,0,0,0,0},
+ {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0},
+ {"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
+ {"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
+ {"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
+ {"bgsave",bgsaveCommand,-1,"a",0,NULL,0,0,0,0,0},
+ {"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0},
+ {"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0},
+ {"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
{"type",typeCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"multi",multiCommand,1,"rsF",0,NULL,0,0,0,0,0},
+ {"multi",multiCommand,1,"sF",0,NULL,0,0,0,0,0},
{"exec",execCommand,1,"sM",0,NULL,0,0,0,0,0},
- {"discard",discardCommand,1,"rsF",0,NULL,0,0,0,0,0},
+ {"discard",discardCommand,1,"sF",0,NULL,0,0,0,0,0},
{"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
{"psync",syncCommand,3,"ars",0,NULL,0,0,0,0,0},
- {"replconf",replconfCommand,-1,"arslt",0,NULL,0,0,0,0,0},
- {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
- {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
+ {"replconf",replconfCommand,-1,"aslt",0,NULL,0,0,0,0,0},
+ {"flushdb",flushdbCommand,-1,"w",0,NULL,0,0,0,0,0},
+ {"flushall",flushallCommand,-1,"w",0,NULL,0,0,0,0,0},
{"sort",sortCommand,-2,"wm",0,sortGetKeys,1,1,1,0,0},
- {"info",infoCommand,-1,"rlt",0,NULL,0,0,0,0,0},
- {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
+ {"info",infoCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"monitor",monitorCommand,1,"as",0,NULL,0,0,0,0,0},
{"ttl",ttlCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"touch",touchCommand,-2,"rF",0,NULL,1,1,1,0,0},
{"pttl",pttlCommand,2,"rF",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
{"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
- {"role",roleCommand,1,"last",0,NULL,0,0,0,0,0},
- {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
- {"config",configCommand,-2,"art",0,NULL,0,0,0,0,0},
- {"subscribe",subscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
- {"unsubscribe",unsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
- {"psubscribe",psubscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
- {"punsubscribe",punsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
- {"publish",publishCommand,3,"pltrF",0,NULL,0,0,0,0,0},
- {"pubsub",pubsubCommand,-2,"pltrR",0,NULL,0,0,0,0,0},
- {"watch",watchCommand,-2,"rsF",0,NULL,1,-1,1,0,0},
- {"unwatch",unwatchCommand,1,"rsF",0,NULL,0,0,0,0,0},
- {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
- {"restore",restoreCommand,-4,"awm",0,NULL,1,1,1,0,0},
- {"restore-asking",restoreCommand,-4,"awmk",0,NULL,1,1,1,0,0},
- {"migrate",migrateCommand,-6,"aw",0,NULL,0,0,0,0,0},
- {"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
- {"readonly",readonlyCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"readwrite",readwriteCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0},
+ {"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
+ {"debug",debugCommand,-1,"as",0,NULL,0,0,0,0,0},
+ {"config",configCommand,-2,"lat",0,NULL,0,0,0,0,0},
+ {"subscribe",subscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
+ {"unsubscribe",unsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
+ {"psubscribe",psubscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
+ {"punsubscribe",punsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
+ {"publish",publishCommand,3,"pltF",0,NULL,0,0,0,0,0},
+ {"pubsub",pubsubCommand,-2,"pltR",0,NULL,0,0,0,0,0},
+ {"watch",watchCommand,-2,"sF",0,NULL,1,-1,1,0,0},
+ {"unwatch",unwatchCommand,1,"sF",0,NULL,0,0,0,0,0},
+ {"cluster",clusterCommand,-2,"a",0,NULL,0,0,0,0,0},
+ {"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
+ {"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
+ {"migrate",migrateCommand,-6,"w",0,migrateGetKeys,0,0,0,0,0},
+ {"asking",askingCommand,1,"F",0,NULL,0,0,0,0,0},
+ {"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0},
+ {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0},
+ {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0},
{"object",objectCommand,3,"r",0,NULL,2,2,2,0,0},
- {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0},
+ {"memory",memoryCommand,-2,"r",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0},
{"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
- {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
- {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0},
- {"time",timeCommand,1,"rRF",0,NULL,0,0,0,0,0},
+ {"slowlog",slowlogCommand,-2,"a",0,NULL,0,0,0,0,0},
+ {"script",scriptCommand,-2,"s",0,NULL,0,0,0,0,0},
+ {"time",timeCommand,1,"RF",0,NULL,0,0,0,0,0},
{"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
{"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
{"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
- {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0},
- {"command",commandCommand,0,"rlt",0,NULL,0,0,0,0,0},
- {"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0},
+ {"wait",waitCommand,3,"s",0,NULL,0,0,0,0,0},
+ {"command",commandCommand,0,"lt",0,NULL,0,0,0,0,0},
+ {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0},
+ {"georadius",georadiusCommand,-6,"w",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadius_ro",georadiusroCommand,-6,"r",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadiusbymember",georadiusbymemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadiusbymember_ro",georadiusbymemberroCommand,-5,"r",0,georadiusGetKeys,1,1,1,0,0},
+ {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0},
+ {"pfselftest",pfselftestCommand,1,"a",0,NULL,0,0,0,0,0},
{"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
- {"pfcount",pfcountCommand,-2,"w",0,NULL,1,1,1,0,0},
+ {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0},
{"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
{"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
- {"latency",latencyCommand,-2,"arslt",0,NULL,0,0,0,0,0}
+ {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0}
};
-struct evictionPoolEntry *evictionPoolAlloc(void);
-
/*============================ Utility functions ============================ */
/* Low level logging. To use only for very big messages, otherwise
- * redisLog() is to prefer. */
-void redisLogRaw(int level, const char *msg) {
+ * serverLog() is to prefer. */
+void serverLogRaw(int level, const char *msg) {
const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
const char *c = ".-*#";
FILE *fp;
char buf[64];
- int rawmode = (level & REDIS_LOG_RAW);
+ int rawmode = (level & LL_RAW);
int log_to_stdout = server.logfile[0] == '\0';
level &= 0xff; /* clear flags */
@@ -333,12 +352,12 @@ void redisLogRaw(int level, const char *msg) {
if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
}
-/* Like redisLogRaw() but with printf-alike support. This is the function that
+/* Like serverLogRaw() but with printf-alike support. This is the function that
* is used across the code. The raw version is only used in order to dump
* the INFO output on crash. */
-void redisLog(int level, const char *fmt, ...) {
+void serverLog(int level, const char *fmt, ...) {
va_list ap;
- char msg[REDIS_MAX_LOGMSG_LEN];
+ char msg[LOG_MAX_LEN];
if ((level&0xff) < server.verbosity) return;
@@ -346,7 +365,7 @@ void redisLog(int level, const char *fmt, ...) {
vsnprintf(msg, sizeof(msg), fmt, ap);
va_end(ap);
- redisLogRaw(level,msg);
+ serverLogRaw(level,msg);
}
/* Log a fixed message without printf-alike capabilities, in a way that is
@@ -354,8 +373,8 @@ void redisLog(int level, const char *fmt, ...) {
*
* We actually use this only for signals that are not fatal from the point
* of view of Redis. Signals that are going to kill the server anyway and
- * where we need printf-alike features are served by redisLog(). */
-void redisLogFromHandler(int level, const char *msg) {
+ * where we need printf-alike features are served by serverLog(). */
+void serverLogFromHandler(int level, const char *msg) {
int fd;
int log_to_stdout = server.logfile[0] == '\0';
char buf[64];
@@ -389,7 +408,7 @@ long long ustime(void) {
}
/* Return the UNIX time in milliseconds */
-long long mstime(void) {
+mstime_t mstime(void) {
return ustime()/1000;
}
@@ -408,7 +427,7 @@ void exitFromChild(int retcode) {
/*====================== Hash table type implementation ==================== */
/* This is a hash table type that uses the SDS dynamic strings library as
- * keys and radis objects as values (objects can hold SDS strings,
+ * keys and redis objects as values (objects can hold SDS strings,
* lists, sets). */
void dictVanillaFree(void *privdata, void *val)
@@ -445,11 +464,11 @@ int dictSdsKeyCaseCompare(void *privdata, const void *key1,
return strcasecmp(key1, key2) == 0;
}
-void dictRedisObjectDestructor(void *privdata, void *val)
+void dictObjectDestructor(void *privdata, void *val)
{
DICT_NOTUSED(privdata);
- if (val == NULL) return; /* Values of swapped out keys as set to NULL */
+ if (val == NULL) return; /* Lazy freeing will set value to NULL. */
decrRefCount(val);
}
@@ -467,16 +486,16 @@ int dictObjKeyCompare(void *privdata, const void *key1,
return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
}
-unsigned int dictObjHash(const void *key) {
+uint64_t dictObjHash(const void *key) {
const robj *o = key;
return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
}
-unsigned int dictSdsHash(const void *key) {
+uint64_t dictSdsHash(const void *key) {
return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}
-unsigned int dictSdsCaseHash(const void *key) {
+uint64_t dictSdsCaseHash(const void *key) {
return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
}
@@ -486,8 +505,8 @@ int dictEncObjKeyCompare(void *privdata, const void *key1,
robj *o1 = (robj*) key1, *o2 = (robj*) key2;
int cmp;
- if (o1->encoding == REDIS_ENCODING_INT &&
- o2->encoding == REDIS_ENCODING_INT)
+ if (o1->encoding == OBJ_ENCODING_INT &&
+ o2->encoding == OBJ_ENCODING_INT)
return o1->ptr == o2->ptr;
o1 = getDecodedObject(o1);
@@ -498,20 +517,20 @@ int dictEncObjKeyCompare(void *privdata, const void *key1,
return cmp;
}
-unsigned int dictEncObjHash(const void *key) {
+uint64_t dictEncObjHash(const void *key) {
robj *o = (robj*) key;
if (sdsEncodedObject(o)) {
return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
} else {
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
char buf[32];
int len;
len = ll2string(buf,32,(long)o->ptr);
return dictGenHashFunction((unsigned char*)buf, len);
} else {
- unsigned int hash;
+ uint64_t hash;
o = getDecodedObject(o);
hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
@@ -521,23 +540,34 @@ unsigned int dictEncObjHash(const void *key) {
}
}
-/* Sets type hash table */
-dictType setDictType = {
+/* Generic hash table type where keys are Redis Objects, Values
+ * dummy pointers. */
+dictType objectKeyPointerValueDictType = {
dictEncObjHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictObjectDestructor, /* key destructor */
+ NULL /* val destructor */
+};
+
+/* Set dictionary type. Keys are SDS strings, values are ot used. */
+dictType setDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
NULL /* val destructor */
};
/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
dictType zsetDictType = {
- dictEncObjHash, /* hash function */
+ dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* Note: SDS string shared & freed by skiplist */
NULL /* val destructor */
};
@@ -548,7 +578,7 @@ dictType dbDictType = {
NULL, /* val dup */
dictSdsKeyCompare, /* key compare */
dictSdsDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictObjectDestructor /* val destructor */
};
/* server.lua_scripts sha (as sds string) -> scripts (as robj) cache. */
@@ -558,37 +588,37 @@ dictType shaScriptObjectDictType = {
NULL, /* val dup */
dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictObjectDestructor /* val destructor */
};
/* Db->expires */
dictType keyptrDictType = {
- dictSdsHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCompare, /* key compare */
- NULL, /* key destructor */
- NULL /* val destructor */
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
};
/* Command table. sds string -> command struct pointer. */
dictType commandTableDictType = {
- dictSdsCaseHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCaseCompare, /* key compare */
- dictSdsDestructor, /* key destructor */
- NULL /* val destructor */
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
};
/* Hash type hash table (note that small hashes are represented with ziplists) */
dictType hashDictType = {
- dictEncObjHash, /* hash function */
+ dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor /* val destructor */
};
/* Keylist hash table type has unencoded redis objects as keys and
@@ -599,7 +629,7 @@ dictType keylistDictType = {
NULL, /* key dup */
NULL, /* val dup */
dictObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictObjectDestructor, /* key destructor */
dictListDestructor /* val destructor */
};
@@ -626,6 +656,18 @@ dictType clusterNodesBlackListDictType = {
NULL /* val destructor */
};
+/* Cluster re-addition blacklist. This maps node IDs to the time
+ * we can re-add this node. The goal is to avoid readding a removed
+ * node for some time. */
+dictType modulesDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
+};
+
/* Migrate cache dict type. */
dictType migrateCacheDictType = {
dictSdsHash, /* hash function */
@@ -653,11 +695,11 @@ int htNeedsResize(dict *dict) {
size = dictSlots(dict);
used = dictSize(dict);
- return (size && used && size > DICT_HT_INITIAL_SIZE &&
- (used*100/size < REDIS_HT_MINFILL));
+ return (size > DICT_HT_INITIAL_SIZE &&
+ (used*100/size < HASHTABLE_MIN_FILL));
}
-/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
+/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
* we resize the hash table to save memory */
void tryResizeHashTables(int dbid) {
if (htNeedsResize(server.db[dbid].dict))
@@ -702,226 +744,64 @@ void updateDictResizePolicy(void) {
/* ======================= Cron: called every 100 ms ======================== */
-/* Helper function for the activeExpireCycle() function.
- * This function will try to expire the key that is stored in the hash table
- * entry 'de' of the 'expires' hash table of a Redis database.
- *
- * If the key is found to be expired, it is removed from the database and
- * 1 is returned. Otherwise no operation is performed and 0 is returned.
- *
- * When a key is expired, server.stat_expiredkeys is incremented.
- *
- * The parameter 'now' is the current time in milliseconds as is passed
- * to the function to avoid too many gettimeofday() syscalls. */
-int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
- long long t = dictGetSignedIntegerVal(de);
- if (now > t) {
- sds key = dictGetKey(de);
- robj *keyobj = createStringObject(key,sdslen(key));
-
- propagateExpire(db,keyobj);
- dbDelete(db,keyobj);
- notifyKeyspaceEvent(REDIS_NOTIFY_EXPIRED,
- "expired",keyobj,db->id);
- decrRefCount(keyobj);
- server.stat_expiredkeys++;
- return 1;
- } else {
- return 0;
- }
-}
-
-/* Try to expire a few timed out keys. The algorithm used is adaptive and
- * will use few CPU cycles if there are few expiring keys, otherwise
- * it will get more aggressive to avoid that too much memory is used by
- * keys that can be removed from the keyspace.
- *
- * No more than REDIS_DBCRON_DBS_PER_CALL databases are tested at every
- * iteration.
- *
- * This kind of call is used when Redis detects that timelimit_exit is
- * true, so there is more work to do, and we do it more incrementally from
- * the beforeSleep() function of the event loop.
- *
- * Expire cycle type:
- *
- * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
- * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
- * microseconds, and is not repeated again before the same amount of time.
- *
- * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
- * executed, where the time limit is a percentage of the REDIS_HZ period
- * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */
-
-void activeExpireCycle(int type) {
- /* This function has some global state in order to continue the work
- * incrementally across calls. */
- static unsigned int current_db = 0; /* Last DB tested. */
- static int timelimit_exit = 0; /* Time limit hit in previous call? */
- static long long last_fast_cycle = 0; /* When last fast cycle ran. */
-
- unsigned int j, iteration = 0;
- unsigned int dbs_per_call = REDIS_DBCRON_DBS_PER_CALL;
- long long start = ustime(), timelimit;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
- /* Don't start a fast cycle if the previous cycle did not exited
- * for time limt. Also don't repeat a fast cycle for the same period
- * as the fast cycle total duration itself. */
- if (!timelimit_exit) return;
- if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
- last_fast_cycle = start;
- }
-
- /* We usually should test REDIS_DBCRON_DBS_PER_CALL per iteration, with
- * two exceptions:
- *
- * 1) Don't test more DBs than we have.
- * 2) If last time we hit the time limit, we want to scan all DBs
- * in this iteration, as there is work to do in some DB and we don't want
- * expired keys to use memory for too much time. */
- if (dbs_per_call > server.dbnum || timelimit_exit)
- dbs_per_call = server.dbnum;
-
- /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
- * per iteration. Since this function gets called with a frequency of
- * server.hz times per second, the following is the max amount of
- * microseconds we can spend in this function. */
- timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
- timelimit_exit = 0;
- if (timelimit <= 0) timelimit = 1;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST)
- timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
-
- for (j = 0; j < dbs_per_call; j++) {
- int expired;
- redisDb *db = server.db+(current_db % server.dbnum);
-
- /* Increment the DB now so we are sure if we run out of time
- * in the current DB we'll restart from the next. This allows to
- * distribute the time evenly across DBs. */
- current_db++;
-
- /* Continue to expire if at the end of the cycle more than 25%
- * of the keys were expired. */
- do {
- unsigned long num, slots;
- long long now, ttl_sum;
- int ttl_samples;
-
- /* If there is nothing to expire try next DB ASAP. */
- if ((num = dictSize(db->expires)) == 0) {
- db->avg_ttl = 0;
- break;
- }
- slots = dictSlots(db->expires);
- now = mstime();
-
- /* When there are less than 1% filled slots getting random
- * keys is expensive, so stop here waiting for better times...
- * The dictionary will be resized asap. */
- if (num && slots > DICT_HT_INITIAL_SIZE &&
- (num*100/slots < 1)) break;
-
- /* The main collection cycle. Sample random keys among keys
- * with an expire set, checking for expired ones. */
- expired = 0;
- ttl_sum = 0;
- ttl_samples = 0;
-
- if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
- num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
-
- while (num--) {
- dictEntry *de;
- long long ttl;
-
- if ((de = dictGetRandomKey(db->expires)) == NULL) break;
- ttl = dictGetSignedIntegerVal(de)-now;
- if (activeExpireCycleTryExpire(db,de,now)) expired++;
- if (ttl < 0) ttl = 0;
- ttl_sum += ttl;
- ttl_samples++;
- }
-
- /* Update the average TTL stats for this database. */
- if (ttl_samples) {
- long long avg_ttl = ttl_sum/ttl_samples;
-
- if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
- /* Smooth the value averaging with the previous one. */
- db->avg_ttl = (db->avg_ttl+avg_ttl)/2;
- }
-
- /* We can't block forever here even if there are many keys to
- * expire. So after a given amount of milliseconds return to the
- * caller waiting for the other active expire cycle. */
- iteration++;
- if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
- long long elapsed = ustime()-start;
-
- latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
- if (elapsed > timelimit) timelimit_exit = 1;
- }
- if (timelimit_exit) return;
- /* We don't repeat the cycle if there are less than 25% of keys
- * found expired in the current DB. */
- } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
- }
-}
-
-unsigned int getLRUClock(void) {
- return (mstime()/REDIS_LRU_CLOCK_RESOLUTION) & REDIS_LRU_CLOCK_MAX;
-}
-
/* Add a sample to the operations per second array of samples. */
-void trackOperationsPerSecond(void) {
- long long t = mstime() - server.ops_sec_last_sample_time;
- long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops;
+void trackInstantaneousMetric(int metric, long long current_reading) {
+ long long t = mstime() - server.inst_metric[metric].last_sample_time;
+ long long ops = current_reading -
+ server.inst_metric[metric].last_sample_count;
long long ops_sec;
ops_sec = t > 0 ? (ops*1000/t) : 0;
- server.ops_sec_samples[server.ops_sec_idx] = ops_sec;
- server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES;
- server.ops_sec_last_sample_time = mstime();
- server.ops_sec_last_sample_ops = server.stat_numcommands;
+ server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
+ ops_sec;
+ server.inst_metric[metric].idx++;
+ server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
+ server.inst_metric[metric].last_sample_time = mstime();
+ server.inst_metric[metric].last_sample_count = current_reading;
}
/* Return the mean of all the samples. */
-long long getOperationsPerSecond(void) {
+long long getInstantaneousMetric(int metric) {
int j;
long long sum = 0;
- for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++)
- sum += server.ops_sec_samples[j];
- return sum / REDIS_OPS_SEC_SAMPLES;
+ for (j = 0; j < STATS_METRIC_SAMPLES; j++)
+ sum += server.inst_metric[metric].samples[j];
+ return sum / STATS_METRIC_SAMPLES;
}
-/* Check for timeouts. Returns non-zero if the client was terminated */
-int clientsCronHandleTimeout(redisClient *c) {
- time_t now = server.unixtime;
+/* Check for timeouts. Returns non-zero if the client was terminated.
+ * The function gets the current time in milliseconds as argument since
+ * it gets called multiple times in a loop, so calling gettimeofday() for
+ * each iteration would be costly without any actual gain. */
+int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
+ time_t now = now_ms/1000;
if (server.maxidletime &&
- !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
- !(c->flags & REDIS_MASTER) && /* no timeout for masters */
- !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
- !(c->flags & REDIS_PUBSUB) && /* no timeout for Pub/Sub clients */
+ !(c->flags & CLIENT_SLAVE) && /* no timeout for slaves */
+ !(c->flags & CLIENT_MASTER) && /* no timeout for masters */
+ !(c->flags & CLIENT_BLOCKED) && /* no timeout for BLPOP */
+ !(c->flags & CLIENT_PUBSUB) && /* no timeout for Pub/Sub clients */
(now - c->lastinteraction > server.maxidletime))
{
- redisLog(REDIS_VERBOSE,"Closing idle client");
+ serverLog(LL_VERBOSE,"Closing idle client");
freeClient(c);
return 1;
- } else if (c->flags & REDIS_BLOCKED) {
+ } else if (c->flags & CLIENT_BLOCKED) {
/* Blocked OPS timeout is handled with milliseconds resolution.
* However note that the actual resolution is limited by
* server.hz. */
- mstime_t now_ms = mstime();
if (c->bpop.timeout != 0 && c->bpop.timeout < now_ms) {
+ /* Handle blocking operation specific timeout. */
replyToBlockedClientTimedOut(c);
unblockClient(c);
+ } else if (server.cluster_enabled) {
+ /* Cluster: handle unblock & redirect of clients blocked
+ * into keys no longer served by this server. */
+ if (clusterRedirectBlockedClientIfNeeded(c))
+ unblockClient(c);
}
}
return 0;
@@ -931,14 +811,14 @@ int clientsCronHandleTimeout(redisClient *c) {
* free space not used, this function reclaims space if needed.
*
* The function always returns 0 as it never terminates the client. */
-int clientsCronResizeQueryBuffer(redisClient *c) {
+int clientsCronResizeQueryBuffer(client *c) {
size_t querybuf_size = sdsAllocSize(c->querybuf);
time_t idletime = server.unixtime - c->lastinteraction;
/* There are two conditions to resize the query buffer:
* 1) Query buffer is > BIG_ARG and too big for latest peak.
* 2) Client is inactive and the buffer is bigger than 1k. */
- if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
+ if (((querybuf_size > PROTO_MBULK_BIG_ARG) &&
(querybuf_size/(c->querybuf_peak+1)) > 2) ||
(querybuf_size > 1024 && idletime > 2))
{
@@ -953,19 +833,25 @@ int clientsCronResizeQueryBuffer(redisClient *c) {
return 0;
}
+#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
- /* Make sure to process at least 1/(server.hz*10) of clients per call.
- * Since this function is called server.hz times per second we are sure that
- * in the worst case we process all the clients in 10 seconds.
- * In normal conditions (a reasonable number of clients) we process
- * all the clients in a shorter time. */
+ /* Make sure to process at least numclients/server.hz of clients
+ * per call. Since this function is called server.hz times per second
+ * we are sure that in the worst case we process all the clients in 1
+ * second. */
int numclients = listLength(server.clients);
- int iterations = numclients/(server.hz*10);
+ int iterations = numclients/server.hz;
+ mstime_t now = mstime();
+
+ /* Process at least a few clients while we are at it, even if we need
+ * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
+ * of processing each client once per second. */
+ if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
+ iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
+ numclients : CLIENTS_CRON_MIN_ITERATIONS;
- if (iterations < 50)
- iterations = (numclients < 50) ? numclients : 50;
while(listLength(server.clients) && iterations--) {
- redisClient *c;
+ client *c;
listNode *head;
/* Rotate the list, take the current head, process.
@@ -977,7 +863,7 @@ void clientsCron(void) {
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
- if (clientsCronHandleTimeout(c)) continue;
+ if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
}
}
@@ -988,8 +874,15 @@ void clientsCron(void) {
void databasesCron(void) {
/* Expire keys by random sampling. Not required for slaves
* as master will synthesize DELs for us. */
- if (server.active_expire_enabled && server.masterhost == NULL)
+ if (server.active_expire_enabled && server.masterhost == NULL) {
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
+ } else if (server.masterhost != NULL) {
+ expireSlaveKeys();
+ }
+
+ /* Defrag keys gradually. */
+ if (server.active_defrag_enabled)
+ activeDefragCycle();
/* Perform hash tables rehashing if needed, but only if there are no
* other processes saving the DB on disk. Otherwise rehashing is bad
@@ -1000,8 +893,8 @@ void databasesCron(void) {
* cron loop iteration. */
static unsigned int resize_db = 0;
static unsigned int rehash_db = 0;
- unsigned int dbs_per_call = REDIS_DBCRON_DBS_PER_CALL;
- unsigned int j;
+ int dbs_per_call = CRON_DBS_PER_CALL;
+ int j;
/* Don't test more DBs than we have. */
if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
@@ -1032,7 +925,8 @@ void databasesCron(void) {
* every object access, and accuracy is not needed. To access a global var is
* a lot faster than calling time(NULL) */
void updateCachedTime(void) {
- server.unixtime = time(NULL);
+ time_t unixtime = time(NULL);
+ atomicSet(server.unixtime,unixtime);
server.mstime = mstime();
}
@@ -1057,9 +951,9 @@ void updateCachedTime(void) {
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
int j;
- REDIS_NOTUSED(eventLoop);
- REDIS_NOTUSED(id);
- REDIS_NOTUSED(clientData);
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
/* Software watchdog: deliver the SIGALRM that will reach the signal
* handler if we don't return here fast enough. */
@@ -1068,9 +962,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Update the time cache. */
updateCachedTime();
- run_with_period(100) trackOperationsPerSecond();
+ run_with_period(100) {
+ trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);
+ trackInstantaneousMetric(STATS_METRIC_NET_INPUT,
+ server.stat_net_input_bytes);
+ trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
+ server.stat_net_output_bytes);
+ }
- /* We have just REDIS_LRU_BITS bits per object for LRU information.
+ /* We have just LRU_BITS bits per object for LRU information.
* So we use an (eventually wrapping) LRU clock.
*
* Note that even if the counter wraps it's not a big problem,
@@ -1080,8 +980,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* not likely.
*
* Note that you can change the resolution altering the
- * REDIS_LRU_CLOCK_RESOLUTION define. */
- server.lruclock = getLRUClock();
+ * LRU_CLOCK_RESOLUTION define. */
+ unsigned long lruclock = getLRUClock();
+ atomicSet(server.lruclock,lruclock);
/* Record the max memory used since the server was started. */
if (zmalloc_used_memory() > server.stat_peak_memory)
@@ -1093,8 +994,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* We received a SIGTERM, shutting down here in a safe way, as it is
* not ok doing so inside the signal handler. */
if (server.shutdown_asap) {
- if (prepareForShutdown(0) == REDIS_OK) exit(0);
- redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
+ if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) exit(0);
+ serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
server.shutdown_asap = 0;
}
@@ -1107,7 +1008,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
used = dictSize(server.db[j].dict);
vkeys = dictSize(server.db[j].expires);
if (used || vkeys) {
- redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
+ serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
/* dictPrintStats(server.dict); */
}
}
@@ -1116,7 +1017,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Show information about connected clients */
if (!server.sentinel_mode) {
run_with_period(5000) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"%lu clients connected (%lu slaves), %zu bytes in use",
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
@@ -1139,7 +1040,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
/* Check if a background saving or AOF rewrite in progress terminated. */
- if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
+ if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||
+ ldbPendingChildren())
+ {
int statloc;
pid_t pid;
@@ -1149,16 +1052,27 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
- if (pid == server.rdb_child_pid) {
+ if (pid == -1) {
+ serverLog(LL_WARNING,"wait3() returned an error: %s. "
+ "rdb_child_pid = %d, aof_child_pid = %d",
+ strerror(errno),
+ (int) server.rdb_child_pid,
+ (int) server.aof_child_pid);
+ } else if (pid == server.rdb_child_pid) {
backgroundSaveDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
} else if (pid == server.aof_child_pid) {
backgroundRewriteDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
} else {
- redisLog(REDIS_WARNING,
- "Warning, detected child with unmatched pid: %ld",
- (long)pid);
+ if (!ldbRemoveChild(pid)) {
+ serverLog(LL_WARNING,
+ "Warning, detected child with unmatched pid: %ld",
+ (long)pid);
+ }
}
updateDictResizePolicy();
+ closeChildInfoPipe();
}
} else {
/* If there is not a background saving/rewrite in progress check if
@@ -1169,16 +1083,16 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Save if we reached the given amount of changes,
* the given amount of seconds, and if the latest bgsave was
* successful or if, in case of an error, at least
- * REDIS_BGSAVE_RETRY_DELAY seconds already elapsed. */
+ * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
if (server.dirty >= sp->changes &&
server.unixtime-server.lastsave > sp->seconds &&
(server.unixtime-server.lastbgsave_try >
- REDIS_BGSAVE_RETRY_DELAY ||
- server.lastbgsave_status == REDIS_OK))
+ CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
{
- redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
+ serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
- rdbSaveBackground(server.rdb_filename);
+ rdbSaveBackground(server.rdb_filename,NULL);
break;
}
}
@@ -1193,7 +1107,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
if (growth >= server.aof_rewrite_perc) {
- redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
+ serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
}
@@ -1209,7 +1123,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* however to try every second is enough in case of 'hz' is set to
* an higher frequency. */
run_with_period(1000) {
- if (server.aof_last_write_status == REDIS_ERR)
+ if (server.aof_last_write_status == C_ERR)
flushAppendOnlyFile(0);
}
@@ -1217,10 +1131,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
freeClientsInAsyncFreeQueue();
/* Clear the paused clients flag if needed. */
- clientsArePaused(); /* Don't check return value, just use the side effect. */
+ clientsArePaused(); /* Don't check return value, just use the side effect.*/
- /* Replication cron function -- used to reconnect to master and
- * to detect transfer failures. */
+ /* Replication cron function -- used to reconnect to master,
+ * detect transfer failures, start background RDB transfers and so forth. */
run_with_period(1000) replicationCron();
/* Run the Redis Cluster cron. */
@@ -1238,6 +1152,22 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
migrateCloseTimedoutSockets();
}
+ /* Start a scheduled BGSAVE if the corresponding flag is set. This is
+ * useful when we are forced to postpone a BGSAVE because an AOF
+ * rewrite is in progress.
+ *
+ * Note: this code must be after the replicationCron() call above so
+ * make sure when refactoring this file to keep this order. This is useful
+ * because we want to give priority to RDB savings for replication. */
+ if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
+ server.rdb_bgsave_scheduled &&
+ (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
+ {
+ if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK)
+ server.rdb_bgsave_scheduled = 0;
+ }
+
server.cronloops++;
return 1000/server.hz;
}
@@ -1246,7 +1176,13 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors. */
void beforeSleep(struct aeEventLoop *eventLoop) {
- REDIS_NOTUSED(eventLoop);
+ UNUSED(eventLoop);
+
+ /* Call the Redis Cluster before sleep function. Note that this function
+ * may change the state of Redis Cluster (from ok to fail or vice versa),
+ * so it's a good idea to call it before serving the unblocked clients
+ * later in this function. */
+ if (server.cluster_enabled) clusterBeforeSleep();
/* Run a fast expire cycle (the called function will return
* ASAP if a fast cycle is not needed). */
@@ -1273,6 +1209,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
if (listLength(server.clients_waiting_acks))
processClientsWaitingReplicas();
+ /* Check if there are clients unblocked by modules that implement
+ * blocking commands. */
+ moduleHandleBlockedClients();
+
/* Try to process pending commands for clients that were just unblocked. */
if (listLength(server.unblocked_clients))
processUnblockedClients();
@@ -1280,8 +1220,21 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
/* Write the AOF buffer on disk */
flushAppendOnlyFile(0);
- /* Call the Redis Cluster before sleep function. */
- if (server.cluster_enabled) clusterBeforeSleep();
+ /* Handle writes with pending output buffers. */
+ handleClientsWithPendingWrites();
+
+ /* Before we are going to sleep, let the threads access the dataset by
+ * releasing the GIL. Redis main thread will not touch anything at this
+ * time. */
+ if (moduleCount()) moduleReleaseGIL();
+}
+
+/* This function is called immadiately after the event loop multiplexing
+ * API returned, and the control is going to soon return to Redis by invoking
+ * the different events callbacks. */
+void afterSleep(struct aeEventLoop *eventLoop) {
+ UNUSED(eventLoop);
+ if (moduleCount()) moduleAcquireGIL();
}
/* =========================== Server initialization ======================== */
@@ -1289,61 +1242,61 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
void createSharedObjects(void) {
int j;
- shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
- shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
- shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
- shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
- shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
- shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
- shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
- shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
- shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
- shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
- shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
- shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
- shared.emptyscan = createObject(REDIS_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
- shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
+ shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
+ shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
+ shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
+ shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
+ shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
+ shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
+ shared.cnegone = createObject(OBJ_STRING,sdsnew(":-1\r\n"));
+ shared.nullbulk = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
+ shared.nullmultibulk = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
+ shared.emptymultibulk = createObject(OBJ_STRING,sdsnew("*0\r\n"));
+ shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
+ shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
+ shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
+ shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
"-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
- shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
+ shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
"-ERR no such key\r\n"));
- shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
+ shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
"-ERR syntax error\r\n"));
- shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
+ shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
"-ERR source and destination objects are the same\r\n"));
- shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
+ shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
"-ERR index out of range\r\n"));
- shared.noscripterr = createObject(REDIS_STRING,sdsnew(
+ shared.noscripterr = createObject(OBJ_STRING,sdsnew(
"-NOSCRIPT No matching script. Please use EVAL.\r\n"));
- shared.loadingerr = createObject(REDIS_STRING,sdsnew(
+ shared.loadingerr = createObject(OBJ_STRING,sdsnew(
"-LOADING Redis is loading the dataset in memory\r\n"));
- shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
+ shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
"-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
- shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
+ shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
"-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
- shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
- "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
- shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
+ shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
+ "-MISCONF Redis is configured to save RDB snapshots, but it is currently not able to persist on disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n"));
+ shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
"-READONLY You can't write against a read only slave.\r\n"));
- shared.noautherr = createObject(REDIS_STRING,sdsnew(
+ shared.noautherr = createObject(OBJ_STRING,sdsnew(
"-NOAUTH Authentication required.\r\n"));
- shared.oomerr = createObject(REDIS_STRING,sdsnew(
+ shared.oomerr = createObject(OBJ_STRING,sdsnew(
"-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
- shared.execaborterr = createObject(REDIS_STRING,sdsnew(
+ shared.execaborterr = createObject(OBJ_STRING,sdsnew(
"-EXECABORT Transaction discarded because of previous errors.\r\n"));
- shared.noreplicaserr = createObject(REDIS_STRING,sdsnew(
+ shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
"-NOREPLICAS Not enough good slaves to write.\r\n"));
- shared.busykeyerr = createObject(REDIS_STRING,sdsnew(
+ shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
"-BUSYKEY Target key name already exists.\r\n"));
- shared.space = createObject(REDIS_STRING,sdsnew(" "));
- shared.colon = createObject(REDIS_STRING,sdsnew(":"));
- shared.plus = createObject(REDIS_STRING,sdsnew("+"));
+ shared.space = createObject(OBJ_STRING,sdsnew(" "));
+ shared.colon = createObject(OBJ_STRING,sdsnew(":"));
+ shared.plus = createObject(OBJ_STRING,sdsnew("+"));
- for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
+ for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
char dictid_str[64];
int dictid_len;
dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
- shared.select[j] = createObject(REDIS_STRING,
+ shared.select[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),
"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
dictid_len, dictid_str));
@@ -1355,144 +1308,179 @@ void createSharedObjects(void) {
shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
shared.del = createStringObject("DEL",3);
+ shared.unlink = createStringObject("UNLINK",6);
shared.rpop = createStringObject("RPOP",4);
shared.lpop = createStringObject("LPOP",4);
shared.lpush = createStringObject("LPUSH",5);
- for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
- shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
- shared.integers[j]->encoding = REDIS_ENCODING_INT;
+ for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
+ shared.integers[j] =
+ makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
+ shared.integers[j]->encoding = OBJ_ENCODING_INT;
}
- for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
- shared.mbulkhdr[j] = createObject(REDIS_STRING,
+ for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
+ shared.mbulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"*%d\r\n",j));
- shared.bulkhdr[j] = createObject(REDIS_STRING,
+ shared.bulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"$%d\r\n",j));
}
/* The following two shared objects, minstring and maxstrings, are not
* actually used for their value but as a special object meaning
* respectively the minimum possible string and the maximum possible
* string in string comparisons for the ZRANGEBYLEX command. */
- shared.minstring = createStringObject("minstring",9);
- shared.maxstring = createStringObject("maxstring",9);
+ shared.minstring = sdsnew("minstring");
+ shared.maxstring = sdsnew("maxstring");
}
-void initServerConfig() {
+void initServerConfig(void) {
int j;
- getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
+ pthread_mutex_init(&server.next_client_id_mutex,NULL);
+ pthread_mutex_init(&server.lruclock_mutex,NULL);
+ pthread_mutex_init(&server.unixtime_mutex,NULL);
+
+ getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
+ server.runid[CONFIG_RUN_ID_SIZE] = '\0';
+ changeReplicationId();
+ clearReplicationId2();
server.configfile = NULL;
- server.hz = REDIS_DEFAULT_HZ;
- server.runid[REDIS_RUN_ID_SIZE] = '\0';
+ server.executable = NULL;
+ server.hz = CONFIG_DEFAULT_HZ;
server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
- server.port = REDIS_SERVERPORT;
- server.tcp_backlog = REDIS_TCP_BACKLOG;
+ server.port = CONFIG_DEFAULT_SERVER_PORT;
+ server.tcp_backlog = CONFIG_DEFAULT_TCP_BACKLOG;
server.bindaddr_count = 0;
server.unixsocket = NULL;
- server.unixsocketperm = REDIS_DEFAULT_UNIX_SOCKET_PERM;
+ server.unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM;
server.ipfd_count = 0;
server.sofd = -1;
- server.dbnum = REDIS_DEFAULT_DBNUM;
- server.verbosity = REDIS_DEFAULT_VERBOSITY;
- server.maxidletime = REDIS_MAXIDLETIME;
- server.tcpkeepalive = REDIS_DEFAULT_TCP_KEEPALIVE;
+ server.protected_mode = CONFIG_DEFAULT_PROTECTED_MODE;
+ server.dbnum = CONFIG_DEFAULT_DBNUM;
+ server.verbosity = CONFIG_DEFAULT_VERBOSITY;
+ server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT;
+ server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE;
server.active_expire_enabled = 1;
- server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
+ server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG;
+ server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES;
+ server.active_defrag_threshold_lower = CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER;
+ server.active_defrag_threshold_upper = CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER;
+ server.active_defrag_cycle_min = CONFIG_DEFAULT_DEFRAG_CYCLE_MIN;
+ server.active_defrag_cycle_max = CONFIG_DEFAULT_DEFRAG_CYCLE_MAX;
+ server.client_max_querybuf_len = PROTO_MAX_QUERYBUF_LEN;
server.saveparams = NULL;
server.loading = 0;
- server.logfile = zstrdup(REDIS_DEFAULT_LOGFILE);
- server.syslog_enabled = REDIS_DEFAULT_SYSLOG_ENABLED;
- server.syslog_ident = zstrdup(REDIS_DEFAULT_SYSLOG_IDENT);
+ server.logfile = zstrdup(CONFIG_DEFAULT_LOGFILE);
+ server.syslog_enabled = CONFIG_DEFAULT_SYSLOG_ENABLED;
+ server.syslog_ident = zstrdup(CONFIG_DEFAULT_SYSLOG_IDENT);
server.syslog_facility = LOG_LOCAL0;
- server.daemonize = REDIS_DEFAULT_DAEMONIZE;
- server.aof_state = REDIS_AOF_OFF;
- server.aof_fsync = REDIS_DEFAULT_AOF_FSYNC;
- server.aof_no_fsync_on_rewrite = REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
- server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
- server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
+ server.daemonize = CONFIG_DEFAULT_DAEMONIZE;
+ server.supervised = 0;
+ server.supervised_mode = SUPERVISED_NONE;
+ server.aof_state = AOF_OFF;
+ server.aof_fsync = CONFIG_DEFAULT_AOF_FSYNC;
+ server.aof_no_fsync_on_rewrite = CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
+ server.aof_rewrite_perc = AOF_REWRITE_PERC;
+ server.aof_rewrite_min_size = AOF_REWRITE_MIN_SIZE;
server.aof_rewrite_base_size = 0;
server.aof_rewrite_scheduled = 0;
server.aof_last_fsync = time(NULL);
server.aof_rewrite_time_last = -1;
server.aof_rewrite_time_start = -1;
- server.aof_lastbgrewrite_status = REDIS_OK;
+ server.aof_lastbgrewrite_status = C_OK;
server.aof_delayed_fsync = 0;
server.aof_fd = -1;
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.aof_flush_postponed_start = 0;
- server.aof_rewrite_incremental_fsync = REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
- server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
- server.rdb_filename = zstrdup(REDIS_DEFAULT_RDB_FILENAME);
- server.aof_filename = zstrdup(REDIS_DEFAULT_AOF_FILENAME);
+ server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
+ server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
+ server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE;
+ server.pidfile = NULL;
+ server.rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME);
+ server.aof_filename = zstrdup(CONFIG_DEFAULT_AOF_FILENAME);
server.requirepass = NULL;
- server.rdb_compression = REDIS_DEFAULT_RDB_COMPRESSION;
- server.rdb_checksum = REDIS_DEFAULT_RDB_CHECKSUM;
- server.stop_writes_on_bgsave_err = REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR;
- server.activerehashing = REDIS_DEFAULT_ACTIVE_REHASHING;
+ server.rdb_compression = CONFIG_DEFAULT_RDB_COMPRESSION;
+ server.rdb_checksum = CONFIG_DEFAULT_RDB_CHECKSUM;
+ server.stop_writes_on_bgsave_err = CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR;
+ server.activerehashing = CONFIG_DEFAULT_ACTIVE_REHASHING;
+ server.active_defrag_running = 0;
server.notify_keyspace_events = 0;
- server.maxclients = REDIS_MAX_CLIENTS;
+ server.maxclients = CONFIG_DEFAULT_MAX_CLIENTS;
server.bpop_blocked_clients = 0;
- server.maxmemory = REDIS_DEFAULT_MAXMEMORY;
- server.maxmemory_policy = REDIS_DEFAULT_MAXMEMORY_POLICY;
- server.maxmemory_samples = REDIS_DEFAULT_MAXMEMORY_SAMPLES;
- server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
- server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
- server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
- server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
- server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
- server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
- server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
- server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES;
+ server.maxmemory = CONFIG_DEFAULT_MAXMEMORY;
+ server.maxmemory_policy = CONFIG_DEFAULT_MAXMEMORY_POLICY;
+ server.maxmemory_samples = CONFIG_DEFAULT_MAXMEMORY_SAMPLES;
+ server.lfu_log_factor = CONFIG_DEFAULT_LFU_LOG_FACTOR;
+ server.lfu_decay_time = CONFIG_DEFAULT_LFU_DECAY_TIME;
+ server.hash_max_ziplist_entries = OBJ_HASH_MAX_ZIPLIST_ENTRIES;
+ server.hash_max_ziplist_value = OBJ_HASH_MAX_ZIPLIST_VALUE;
+ server.list_max_ziplist_size = OBJ_LIST_MAX_ZIPLIST_SIZE;
+ server.list_compress_depth = OBJ_LIST_COMPRESS_DEPTH;
+ server.set_max_intset_entries = OBJ_SET_MAX_INTSET_ENTRIES;
+ server.zset_max_ziplist_entries = OBJ_ZSET_MAX_ZIPLIST_ENTRIES;
+ server.zset_max_ziplist_value = OBJ_ZSET_MAX_ZIPLIST_VALUE;
+ server.hll_sparse_max_bytes = CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES;
server.shutdown_asap = 0;
- server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
- server.repl_timeout = REDIS_REPL_TIMEOUT;
- server.repl_min_slaves_to_write = REDIS_DEFAULT_MIN_SLAVES_TO_WRITE;
- server.repl_min_slaves_max_lag = REDIS_DEFAULT_MIN_SLAVES_MAX_LAG;
server.cluster_enabled = 0;
- server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
- server.cluster_migration_barrier = REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER;
- server.cluster_slave_validity_factor = REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY;
- server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
- server.lua_caller = NULL;
- server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
- server.lua_client = NULL;
- server.lua_timedout = 0;
+ server.cluster_node_timeout = CLUSTER_DEFAULT_NODE_TIMEOUT;
+ server.cluster_migration_barrier = CLUSTER_DEFAULT_MIGRATION_BARRIER;
+ server.cluster_slave_validity_factor = CLUSTER_DEFAULT_SLAVE_VALIDITY;
+ server.cluster_require_full_coverage = CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
+ server.cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
+ server.cluster_announce_ip = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP;
+ server.cluster_announce_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT;
+ server.cluster_announce_bus_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT;
server.migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL);
server.next_client_id = 1; /* Client IDs, start from 1 .*/
server.loading_process_events_interval_bytes = (1024*1024*2);
-
- server.lruclock = getLRUClock();
+ server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION;
+ server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE;
+ server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL;
+ server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO;
+ server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT;
+
+ unsigned int lruclock = getLRUClock();
+ atomicSet(server.lruclock,lruclock);
resetServerSaveParams();
appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
+
/* Replication related */
server.masterauth = NULL;
server.masterhost = NULL;
server.masterport = 6379;
server.master = NULL;
server.cached_master = NULL;
- server.repl_master_initial_offset = -1;
- server.repl_state = REDIS_REPL_NONE;
- server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
- server.repl_serve_stale_data = REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA;
- server.repl_slave_ro = REDIS_DEFAULT_SLAVE_READ_ONLY;
+ server.master_initial_offset = -1;
+ server.repl_state = REPL_STATE_NONE;
+ server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
+ server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA;
+ server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY;
+ server.repl_slave_lazy_flush = CONFIG_DEFAULT_SLAVE_LAZY_FLUSH;
server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
- server.repl_disable_tcp_nodelay = REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY;
- server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY;
+ server.repl_disable_tcp_nodelay = CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY;
+ server.repl_diskless_sync = CONFIG_DEFAULT_REPL_DISKLESS_SYNC;
+ server.repl_diskless_sync_delay = CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY;
+ server.repl_ping_slave_period = CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD;
+ server.repl_timeout = CONFIG_DEFAULT_REPL_TIMEOUT;
+ server.repl_min_slaves_to_write = CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE;
+ server.repl_min_slaves_max_lag = CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG;
+ server.slave_priority = CONFIG_DEFAULT_SLAVE_PRIORITY;
+ server.slave_announce_ip = CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP;
+ server.slave_announce_port = CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT;
server.master_repl_offset = 0;
/* Replication partial resync backlog */
server.repl_backlog = NULL;
- server.repl_backlog_size = REDIS_DEFAULT_REPL_BACKLOG_SIZE;
+ server.repl_backlog_size = CONFIG_DEFAULT_REPL_BACKLOG_SIZE;
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
server.repl_backlog_off = 0;
- server.repl_backlog_time_limit = REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
+ server.repl_backlog_time_limit = CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
server.repl_no_slaves_since = time(NULL);
/* Client output buffer limits */
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++)
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
/* Double constants initialization */
@@ -1512,13 +1500,17 @@ void initServerConfig() {
server.lpushCommand = lookupCommandByCString("lpush");
server.lpopCommand = lookupCommandByCString("lpop");
server.rpopCommand = lookupCommandByCString("rpop");
+ server.sremCommand = lookupCommandByCString("srem");
+ server.execCommand = lookupCommandByCString("exec");
+ server.expireCommand = lookupCommandByCString("expire");
+ server.pexpireCommand = lookupCommandByCString("pexpire");
/* Slow log */
- server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
- server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
+ server.slowlog_log_slower_than = CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN;
+ server.slowlog_max_len = CONFIG_DEFAULT_SLOWLOG_MAX_LEN;
/* Latency monitor */
- server.latency_monitor_threshold = REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD;
+ server.latency_monitor_threshold = CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD;
/* Debugging */
server.assert_failed = "<no assertion failed>";
@@ -1528,78 +1520,132 @@ void initServerConfig() {
server.watchdog_period = 0;
}
+extern char **environ;
+
+/* Restart the server, executing the same executable that started this
+ * instance, with the same arguments and configuration file.
+ *
+ * The function is designed to directly call execve() so that the new
+ * server instance will retain the PID of the previous one.
+ *
+ * The list of flags, that may be bitwise ORed together, alter the
+ * behavior of this function:
+ *
+ * RESTART_SERVER_NONE No flags.
+ * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting.
+ * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting.
+ *
+ * On success the function does not return, because the process turns into
+ * a different process. On error C_ERR is returned. */
+int restartServer(int flags, mstime_t delay) {
+ int j;
+
+ /* Check if we still have accesses to the executable that started this
+ * server instance. */
+ if (access(server.executable,X_OK) == -1) return C_ERR;
+
+ /* Config rewriting. */
+ if (flags & RESTART_SERVER_CONFIG_REWRITE &&
+ server.configfile &&
+ rewriteConfig(server.configfile) == -1) return C_ERR;
+
+ /* Perform a proper shutdown. */
+ if (flags & RESTART_SERVER_GRACEFULLY &&
+ prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK) return C_ERR;
+
+ /* Close all file descriptors, with the exception of stdin, stdout, strerr
+ * which are useful if we restart a Redis server which is not daemonized. */
+ for (j = 3; j < (int)server.maxclients + 1024; j++) {
+ /* Test the descriptor validity before closing it, otherwise
+ * Valgrind issues a warning on close(). */
+ if (fcntl(j,F_GETFD) != -1) close(j);
+ }
+
+ /* Execute the server with the original command line. */
+ if (delay) usleep(delay*1000);
+ execve(server.executable,server.exec_argv,environ);
+
+ /* If an error occurred here, there is nothing we can do, but exit. */
+ _exit(1);
+
+ return C_ERR; /* Never reached. */
+}
+
/* This function will try to raise the max number of open files accordingly to
* the configured max number of clients. It also reserves a number of file
- * descriptors (REDIS_MIN_RESERVED_FDS) for extra operations of
+ * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
* persistence, listening sockets, log files and so forth.
*
* If it will not be possible to set the limit accordingly to the configured
* max number of clients, the function will do the reverse setting
* server.maxclients to the value that we can actually handle. */
void adjustOpenFilesLimit(void) {
- rlim_t maxfiles = server.maxclients+REDIS_MIN_RESERVED_FDS;
+ rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
struct rlimit limit;
if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
- redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
+ serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
strerror(errno));
- server.maxclients = 1024-REDIS_MIN_RESERVED_FDS;
+ server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
} else {
rlim_t oldlimit = limit.rlim_cur;
/* Set the max number of files if the current limit is not enough
* for our needs. */
if (oldlimit < maxfiles) {
- rlim_t f;
+ rlim_t bestlimit;
int setrlimit_error = 0;
/* Try to set the file limit to match 'maxfiles' or at least
* to the higher value supported less than maxfiles. */
- f = maxfiles;
- while(f > oldlimit) {
- int decr_step = 16;
+ bestlimit = maxfiles;
+ while(bestlimit > oldlimit) {
+ rlim_t decr_step = 16;
- limit.rlim_cur = f;
- limit.rlim_max = f;
+ limit.rlim_cur = bestlimit;
+ limit.rlim_max = bestlimit;
if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
setrlimit_error = errno;
- /* We failed to set file limit to 'f'. Try with a
+ /* We failed to set file limit to 'bestlimit'. Try with a
* smaller limit decrementing by a few FDs per iteration. */
- if (f < decr_step) break;
- f -= decr_step;
+ if (bestlimit < decr_step) break;
+ bestlimit -= decr_step;
}
/* Assume that the limit we get initially is still valid if
* our last try was even lower. */
- if (f < oldlimit) f = oldlimit;
-
- if (f != maxfiles) {
- int old_maxclients = server.maxclients;
- server.maxclients = f-REDIS_MIN_RESERVED_FDS;
- if (server.maxclients < 1) {
- redisLog(REDIS_WARNING,"Your current 'ulimit -n' "
- "of %llu is not enough for Redis to start. "
+ if (bestlimit < oldlimit) bestlimit = oldlimit;
+
+ if (bestlimit < maxfiles) {
+ unsigned int old_maxclients = server.maxclients;
+ server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
+ /* maxclients is unsigned so may overflow: in order
+ * to check if maxclients is now logically less than 1
+ * we test indirectly via bestlimit. */
+ if (bestlimit <= CONFIG_MIN_RESERVED_FDS) {
+ serverLog(LL_WARNING,"Your current 'ulimit -n' "
+ "of %llu is not enough for the server to start. "
"Please increase your open file limit to at least "
"%llu. Exiting.",
(unsigned long long) oldlimit,
(unsigned long long) maxfiles);
exit(1);
}
- redisLog(REDIS_WARNING,"You requested maxclients of %d "
+ serverLog(LL_WARNING,"You requested maxclients of %d "
"requiring at least %llu max file descriptors.",
old_maxclients,
(unsigned long long) maxfiles);
- redisLog(REDIS_WARNING,"Redis can't set maximum open files "
+ serverLog(LL_WARNING,"Server can't set maximum open files "
"to %llu because of OS error: %s.",
(unsigned long long) maxfiles, strerror(setrlimit_error));
- redisLog(REDIS_WARNING,"Current maximum open files is %llu. "
+ serverLog(LL_WARNING,"Current maximum open files is %llu. "
"maxclients has been reduced to %d to compensate for "
"low ulimit. "
"If you need higher maxclients increase 'ulimit -n'.",
- (unsigned long long) oldlimit, server.maxclients);
+ (unsigned long long) bestlimit, server.maxclients);
} else {
- redisLog(REDIS_NOTICE,"Increased maximum number of open files "
+ serverLog(LL_NOTICE,"Increased maximum number of open files "
"to %llu (it was originally set to %llu).",
(unsigned long long) maxfiles,
(unsigned long long) oldlimit);
@@ -1608,6 +1654,23 @@ void adjustOpenFilesLimit(void) {
}
}
+/* Check that server.tcp_backlog can be actually enforced in Linux according
+ * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
+void checkTcpBacklogSettings(void) {
+#ifdef HAVE_PROC_SOMAXCONN
+ FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
+ char buf[1024];
+ if (!fp) return;
+ if (fgets(buf,sizeof(buf),fp) != NULL) {
+ int somaxconn = atoi(buf);
+ if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ }
+ }
+ fclose(fp);
+#endif
+}
+
/* Initialize a set of file descriptors to listen to the specified 'port'
* binding the addresses specified in the Redis server configuration.
*
@@ -1619,9 +1682,9 @@ void adjustOpenFilesLimit(void) {
* contains no specific addresses to bind, this function will try to
* bind * (all addresses) for both the IPv4 and IPv6 protocols.
*
- * On success the function returns REDIS_OK.
+ * On success the function returns C_OK.
*
- * On error the function returns REDIS_ERR. For the function to be on
+ * On error the function returns C_ERR. For the function to be on
* error, at least one of the server.bindaddr addresses was
* impossible to bind, or no bind addresses were specified in the server
* configuration but the function is not able to bind * for at least
@@ -1634,6 +1697,7 @@ int listenToPort(int port, int *fds, int *count) {
if (server.bindaddr_count == 0) server.bindaddr[0] = NULL;
for (j = 0; j < server.bindaddr_count || j == 0; j++) {
if (server.bindaddr[j] == NULL) {
+ int unsupported = 0;
/* Bind * for both IPv6 and IPv4, we enter here only if
* server.bindaddr_count == 0. */
fds[*count] = anetTcp6Server(server.neterr,port,NULL,
@@ -1641,17 +1705,27 @@ int listenToPort(int port, int *fds, int *count) {
if (fds[*count] != ANET_ERR) {
anetNonBlock(NULL,fds[*count]);
(*count)++;
+ } else if (errno == EAFNOSUPPORT) {
+ unsupported++;
+ serverLog(LL_WARNING,"Not listening to IPv6: unsupproted");
}
- fds[*count] = anetTcpServer(server.neterr,port,NULL,
- server.tcp_backlog);
- if (fds[*count] != ANET_ERR) {
- anetNonBlock(NULL,fds[*count]);
- (*count)++;
+
+ if (*count == 1 || unsupported) {
+ /* Bind the IPv4 address as well. */
+ fds[*count] = anetTcpServer(server.neterr,port,NULL,
+ server.tcp_backlog);
+ if (fds[*count] != ANET_ERR) {
+ anetNonBlock(NULL,fds[*count]);
+ (*count)++;
+ } else if (errno == EAFNOSUPPORT) {
+ unsupported++;
+ serverLog(LL_WARNING,"Not listening to IPv4: unsupproted");
+ }
}
- /* Exit the loop if we were able to bind * on IPv4 or IPv6,
+ /* Exit the loop if we were able to bind * on IPv4 and IPv6,
* otherwise fds[*count] will be ANET_ERR and we'll print an
* error and return to the caller with an error. */
- if (*count) break;
+ if (*count + unsupported == 2) break;
} else if (strchr(server.bindaddr[j],':')) {
/* Bind IPv6 address. */
fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j],
@@ -1662,41 +1736,53 @@ int listenToPort(int port, int *fds, int *count) {
server.tcp_backlog);
}
if (fds[*count] == ANET_ERR) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Creating Server TCP listening socket %s:%d: %s",
server.bindaddr[j] ? server.bindaddr[j] : "*",
port, server.neterr);
- return REDIS_ERR;
+ return C_ERR;
}
anetNonBlock(NULL,fds[*count]);
(*count)++;
}
- return REDIS_OK;
+ return C_OK;
}
/* Resets the stats that we expose via INFO or other means that we want
* to reset via CONFIG RESETSTAT. The function is also used in order to
* initialize these fields in initServer() at server startup. */
void resetServerStats(void) {
+ int j;
+
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
server.stat_evictedkeys = 0;
server.stat_keyspace_misses = 0;
server.stat_keyspace_hits = 0;
+ server.stat_active_defrag_hits = 0;
+ server.stat_active_defrag_misses = 0;
+ server.stat_active_defrag_key_hits = 0;
+ server.stat_active_defrag_key_misses = 0;
server.stat_fork_time = 0;
server.stat_fork_rate = 0;
server.stat_rejected_conn = 0;
server.stat_sync_full = 0;
server.stat_sync_partial_ok = 0;
server.stat_sync_partial_err = 0;
- memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples));
- server.ops_sec_idx = 0;
- server.ops_sec_last_sample_time = mstime();
- server.ops_sec_last_sample_ops = 0;
+ for (j = 0; j < STATS_METRIC_COUNT; j++) {
+ server.inst_metric[j].idx = 0;
+ server.inst_metric[j].last_sample_time = mstime();
+ server.inst_metric[j].last_sample_count = 0;
+ memset(server.inst_metric[j].samples,0,
+ sizeof(server.inst_metric[j].samples));
+ }
+ server.stat_net_input_bytes = 0;
+ server.stat_net_output_bytes = 0;
+ server.aof_delayed_fsync = 0;
}
-void initServer() {
+void initServer(void) {
int j;
signal(SIGHUP, SIG_IGN);
@@ -1714,21 +1800,29 @@ void initServer() {
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
+ server.clients_pending_write = listCreate();
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
server.unblocked_clients = listCreate();
server.ready_keys = listCreate();
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
+ server.system_memory_size = zmalloc_get_memory_size();
createSharedObjects();
adjustOpenFilesLimit();
- server.el = aeCreateEventLoop(server.maxclients+REDIS_EVENTLOOP_FDSET_INCR);
+ server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
+ if (server.el == NULL) {
+ serverLog(LL_WARNING,
+ "Failed creating the event loop. Error message: '%s'",
+ strerror(errno));
+ exit(1);
+ }
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
/* Open the TCP listening socket for the user commands. */
if (server.port != 0 &&
- listenToPort(server.port,server.ipfd,&server.ipfd_count) == REDIS_ERR)
+ listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
exit(1);
/* Open the listening Unix domain socket. */
@@ -1737,7 +1831,7 @@ void initServer() {
server.sofd = anetUnixServer(server.neterr,server.unixsocket,
server.unixsocketperm, server.tcp_backlog);
if (server.sofd == ANET_ERR) {
- redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
+ serverLog(LL_WARNING, "Opening Unix socket: %s", server.neterr);
exit(1);
}
anetNonBlock(NULL,server.sofd);
@@ -1745,7 +1839,7 @@ void initServer() {
/* Abort if there are no listening sockets at all. */
if (server.ipfd_count == 0 && server.sofd < 0) {
- redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
+ serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
exit(1);
}
@@ -1754,12 +1848,12 @@ void initServer() {
server.db[j].dict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].ready_keys = dictCreate(&setDictType,NULL);
+ server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].eviction_pool = evictionPoolAlloc();
server.db[j].id = j;
server.db[j].avg_ttl = 0;
}
+ evictionPoolAlloc(); /* Initialize the LRU keys pool. */
server.pubsub_channels = dictCreate(&keylistDictType,NULL);
server.pubsub_patterns = listCreate();
listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
@@ -1767,6 +1861,11 @@ void initServer() {
server.cronloops = 0;
server.rdb_child_pid = -1;
server.aof_child_pid = -1;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_bgsave_scheduled = 0;
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ server.child_info_data.magic = 0;
aofRewriteBufferReset();
server.aof_buf = sdsempty();
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
@@ -1778,17 +1877,20 @@ void initServer() {
/* A few stats we don't want to reset: server startup time, and peak mem. */
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
+ server.stat_rdb_cow_bytes = 0;
+ server.stat_aof_cow_bytes = 0;
server.resident_set_size = 0;
- server.lastbgsave_status = REDIS_OK;
- server.aof_last_write_status = REDIS_OK;
+ server.lastbgsave_status = C_OK;
+ server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
updateCachedTime();
- /* Create the serverCron() time event, that's our main way to process
- * background operations. */
- if(aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
- redisPanic("Can't create the serverCron time event.");
+ /* Create the timer callback, this is our way to process many background
+ * operations incrementally, like clients timeout, eviction of unaccessed
+ * expired keys and so forth. */
+ if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
+ serverPanic("Can't create event loop timers.");
exit(1);
}
@@ -1798,19 +1900,29 @@ void initServer() {
if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
acceptTcpHandler,NULL) == AE_ERR)
{
- redisPanic(
+ serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
}
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
- acceptUnixHandler,NULL) == AE_ERR) redisPanic("Unrecoverable error creating server.sofd file event.");
+ acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
+
+
+ /* Register a readable event for the pipe used to awake the event loop
+ * when a blocked client in a module needs attention. */
+ if (aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE,
+ moduleBlockedClientPipeReadable,NULL) == AE_ERR) {
+ serverPanic(
+ "Error registering the readable event for the module "
+ "blocked clients subsystem.");
+ }
/* Open the AOF file if needed. */
- if (server.aof_state == REDIS_AOF_ON) {
+ if (server.aof_state == AOF_ON) {
server.aof_fd = open(server.aof_filename,
O_WRONLY|O_APPEND|O_CREAT,0644);
if (server.aof_fd == -1) {
- redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
+ serverLog(LL_WARNING, "Can't open the append-only file: %s",
strerror(errno));
exit(1);
}
@@ -1821,17 +1933,18 @@ void initServer() {
* at 3 GB using maxmemory with 'noeviction' policy'. This avoids
* useless crashes of the Redis instance for out of memory. */
if (server.arch_bits == 32 && server.maxmemory == 0) {
- redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
+ serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
+ server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
}
if (server.cluster_enabled) clusterInit();
replicationScriptCacheInit();
- scriptingInit();
+ scriptingInit(1);
slowlogInit();
latencyMonitorInit();
bioInit();
+ server.initial_memory_usage = zmalloc_used_memory();
}
/* Populates the Redis Command Table starting from the hard coded list
@@ -1847,20 +1960,20 @@ void populateCommandTable(void) {
while(*f != '\0') {
switch(*f) {
- case 'w': c->flags |= REDIS_CMD_WRITE; break;
- case 'r': c->flags |= REDIS_CMD_READONLY; break;
- case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
- case 'a': c->flags |= REDIS_CMD_ADMIN; break;
- case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
- case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
- case 'R': c->flags |= REDIS_CMD_RANDOM; break;
- case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
- case 'l': c->flags |= REDIS_CMD_LOADING; break;
- case 't': c->flags |= REDIS_CMD_STALE; break;
- case 'M': c->flags |= REDIS_CMD_SKIP_MONITOR; break;
- case 'k': c->flags |= REDIS_CMD_ASKING; break;
- case 'F': c->flags |= REDIS_CMD_FAST; break;
- default: redisPanic("Unsupported command flag"); break;
+ case 'w': c->flags |= CMD_WRITE; break;
+ case 'r': c->flags |= CMD_READONLY; break;
+ case 'm': c->flags |= CMD_DENYOOM; break;
+ case 'a': c->flags |= CMD_ADMIN; break;
+ case 'p': c->flags |= CMD_PUBSUB; break;
+ case 's': c->flags |= CMD_NOSCRIPT; break;
+ case 'R': c->flags |= CMD_RANDOM; break;
+ case 'S': c->flags |= CMD_SORT_FOR_SCRIPT; break;
+ case 'l': c->flags |= CMD_LOADING; break;
+ case 't': c->flags |= CMD_STALE; break;
+ case 'M': c->flags |= CMD_SKIP_MONITOR; break;
+ case 'k': c->flags |= CMD_ASKING; break;
+ case 'F': c->flags |= CMD_FAST; break;
+ default: serverPanic("Unsupported command flag"); break;
}
f++;
}
@@ -1869,7 +1982,7 @@ void populateCommandTable(void) {
/* Populate an additional dictionary that will be unaffected
* by rename-command statements in redis.conf. */
retval2 = dictAdd(server.orig_commands, sdsnew(c->name), c);
- redisAssert(retval1 == DICT_OK && retval2 == DICT_OK);
+ serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
}
}
@@ -1955,37 +2068,113 @@ struct redisCommand *lookupCommandOrOriginal(sds name) {
* to AOF and Slaves.
*
* flags are an xor between:
- * + REDIS_PROPAGATE_NONE (no propagation of command at all)
- * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
- * + REDIS_PROPAGATE_REPL (propagate into the replication link)
+ * + PROPAGATE_NONE (no propagation of command at all)
+ * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
+ * + PROPAGATE_REPL (propagate into the replication link)
+ *
+ * This should not be used inside commands implementation. Use instead
+ * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation().
*/
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
{
- if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
+ if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)
feedAppendOnlyFile(cmd,dbid,argv,argc);
- if (flags & REDIS_PROPAGATE_REPL)
+ if (flags & PROPAGATE_REPL)
replicationFeedSlaves(server.slaves,dbid,argv,argc);
}
/* Used inside commands to schedule the propagation of additional commands
- * after the current command is propagated to AOF / Replication. */
+ * after the current command is propagated to AOF / Replication.
+ *
+ * 'cmd' must be a pointer to the Redis command to replicate, dbid is the
+ * database ID the command should be propagated into.
+ * Arguments of the command to propagte are passed as an array of redis
+ * objects pointers of len 'argc', using the 'argv' vector.
+ *
+ * The function does not take a reference to the passed 'argv' vector,
+ * so it is up to the caller to release the passed argv (but it is usually
+ * stack allocated). The function autoamtically increments ref count of
+ * passed objects, so the caller does not need to. */
void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int target)
{
- redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
+ robj **argvcopy;
+ int j;
+
+ if (server.loading) return; /* No propagation during loading. */
+
+ argvcopy = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ argvcopy[j] = argv[j];
+ incrRefCount(argv[j]);
+ }
+ redisOpArrayAppend(&server.also_propagate,cmd,dbid,argvcopy,argc,target);
}
/* It is possible to call the function forceCommandPropagation() inside a
- * Redis command implementaiton in order to to force the propagation of a
+ * Redis command implementation in order to to force the propagation of a
* specific command execution into AOF / Replication. */
-void forceCommandPropagation(redisClient *c, int flags) {
- if (flags & REDIS_PROPAGATE_REPL) c->flags |= REDIS_FORCE_REPL;
- if (flags & REDIS_PROPAGATE_AOF) c->flags |= REDIS_FORCE_AOF;
+void forceCommandPropagation(client *c, int flags) {
+ if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
+ if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
+}
+
+/* Avoid that the executed command is propagated at all. This way we
+ * are free to just propagate what we want using the alsoPropagate()
+ * API. */
+void preventCommandPropagation(client *c) {
+ c->flags |= CLIENT_PREVENT_PROP;
}
-/* Call() is the core of Redis execution of a command */
-void call(redisClient *c, int flags) {
+/* AOF specific version of preventCommandPropagation(). */
+void preventCommandAOF(client *c) {
+ c->flags |= CLIENT_PREVENT_AOF_PROP;
+}
+
+/* Replication specific version of preventCommandPropagation(). */
+void preventCommandReplication(client *c) {
+ c->flags |= CLIENT_PREVENT_REPL_PROP;
+}
+
+/* Call() is the core of Redis execution of a command.
+ *
+ * The following flags can be passed:
+ * CMD_CALL_NONE No flags.
+ * CMD_CALL_SLOWLOG Check command speed and log in the slow log if needed.
+ * CMD_CALL_STATS Populate command stats.
+ * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE_REPL Send command to salves if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
+ * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
+ *
+ * The exact propagation behavior depends on the client flags.
+ * Specifically:
+ *
+ * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
+ * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
+ * in the call flags, then the command is propagated even if the
+ * dataset was not affected by the command.
+ * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
+ * are set, the propagation into AOF or to slaves is not performed even
+ * if the command modified the dataset.
+ *
+ * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
+ * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
+ * slaves propagation will never occur.
+ *
+ * Client flags are modified by the implementation of a given command
+ * using the following API:
+ *
+ * forceCommandPropagation(client *c, int flags);
+ * preventCommandPropagation(client *c);
+ * preventCommandAOF(client *c);
+ * preventCommandReplication(client *c);
+ *
+ */
+void call(client *c, int flags) {
long long dirty, start, duration;
int client_old_flags = c->flags;
@@ -1993,77 +2182,111 @@ void call(redisClient *c, int flags) {
* not generated from reading an AOF. */
if (listLength(server.monitors) &&
!server.loading &&
- !(c->cmd->flags & REDIS_CMD_SKIP_MONITOR))
+ !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
{
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
}
- /* Call the command. */
- c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL);
+ /* Initialization: clear the flags that must be set by the command on
+ * demand, and initialize the array for additional commands propagation. */
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+ redisOpArray prev_also_propagate = server.also_propagate;
redisOpArrayInit(&server.also_propagate);
+
+ /* Call the command. */
dirty = server.dirty;
start = ustime();
c->cmd->proc(c);
duration = ustime()-start;
dirty = server.dirty-dirty;
+ if (dirty < 0) dirty = 0;
/* When EVAL is called loading the AOF we don't want commands called
* from Lua to go into the slowlog or to populate statistics. */
- if (server.loading && c->flags & REDIS_LUA_CLIENT)
- flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
+ if (server.loading && c->flags & CLIENT_LUA)
+ flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS);
/* If the caller is Lua, we want to force the EVAL caller to propagate
* the script if the command flag or client flag are forcing the
* propagation. */
- if (c->flags & REDIS_LUA_CLIENT && server.lua_caller) {
- if (c->flags & REDIS_FORCE_REPL)
- server.lua_caller->flags |= REDIS_FORCE_REPL;
- if (c->flags & REDIS_FORCE_AOF)
- server.lua_caller->flags |= REDIS_FORCE_AOF;
+ if (c->flags & CLIENT_LUA && server.lua_caller) {
+ if (c->flags & CLIENT_FORCE_REPL)
+ server.lua_caller->flags |= CLIENT_FORCE_REPL;
+ if (c->flags & CLIENT_FORCE_AOF)
+ server.lua_caller->flags |= CLIENT_FORCE_AOF;
}
/* Log the command into the Slow log if needed, and populate the
* per-command statistics that we show in INFO commandstats. */
- if (flags & REDIS_CALL_SLOWLOG && c->cmd->proc != execCommand) {
- char *latency_event = (c->cmd->flags & REDIS_CMD_FAST) ?
+ if (flags & CMD_CALL_SLOWLOG && c->cmd->proc != execCommand) {
+ char *latency_event = (c->cmd->flags & CMD_FAST) ?
"fast-command" : "command";
latencyAddSampleIfNeeded(latency_event,duration/1000);
- slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
+ slowlogPushEntryIfNeeded(c,c->argv,c->argc,duration);
}
- if (flags & REDIS_CALL_STATS) {
- c->cmd->microseconds += duration;
- c->cmd->calls++;
+ if (flags & CMD_CALL_STATS) {
+ c->lastcmd->microseconds += duration;
+ c->lastcmd->calls++;
}
/* Propagate the command into the AOF and replication link */
- if (flags & REDIS_CALL_PROPAGATE) {
- int flags = REDIS_PROPAGATE_NONE;
-
- if (c->flags & REDIS_FORCE_REPL) flags |= REDIS_PROPAGATE_REPL;
- if (c->flags & REDIS_FORCE_AOF) flags |= REDIS_PROPAGATE_AOF;
- if (dirty)
- flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
- if (flags != REDIS_PROPAGATE_NONE)
- propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
- }
-
- /* Restore the old FORCE_AOF/REPL flags, since call can be executed
+ if (flags & CMD_CALL_PROPAGATE &&
+ (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP)
+ {
+ int propagate_flags = PROPAGATE_NONE;
+
+ /* Check if the command operated changes in the data set. If so
+ * set for replication / AOF propagation. */
+ if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
+
+ /* If the client forced AOF / replication of the command, set
+ * the flags regardless of the command effects on the data set. */
+ if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
+ if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
+
+ /* However prevent AOF / replication propagation if the command
+ * implementatino called preventCommandPropagation() or similar,
+ * or if we don't have the call() flags to do so. */
+ if (c->flags & CLIENT_PREVENT_REPL_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_REPL))
+ propagate_flags &= ~PROPAGATE_REPL;
+ if (c->flags & CLIENT_PREVENT_AOF_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_AOF))
+ propagate_flags &= ~PROPAGATE_AOF;
+
+ /* Call propagate() only if at least one of AOF / replication
+ * propagation is needed. */
+ if (propagate_flags != PROPAGATE_NONE)
+ propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags);
+ }
+
+ /* Restore the old replication flags, since call() can be executed
* recursively. */
- c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL);
- c->flags |= client_old_flags & (REDIS_FORCE_AOF|REDIS_FORCE_REPL);
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+ c->flags |= client_old_flags &
+ (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
/* Handle the alsoPropagate() API to handle commands that want to propagate
- * multiple separated commands. */
+ * multiple separated commands. Note that alsoPropagate() is not affected
+ * by CLIENT_PREVENT_PROP flag. */
if (server.also_propagate.numops) {
int j;
redisOp *rop;
- for (j = 0; j < server.also_propagate.numops; j++) {
- rop = &server.also_propagate.ops[j];
- propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
+ if (flags & CMD_CALL_PROPAGATE) {
+ for (j = 0; j < server.also_propagate.numops; j++) {
+ rop = &server.also_propagate.ops[j];
+ int target = rop->target;
+ /* Whatever the command wish is, we honor the call() flags. */
+ if (!(flags&CMD_CALL_PROPAGATE_AOF)) target &= ~PROPAGATE_AOF;
+ if (!(flags&CMD_CALL_PROPAGATE_REPL)) target &= ~PROPAGATE_REPL;
+ if (target)
+ propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,target);
+ }
}
redisOpArrayFree(&server.also_propagate);
}
+ server.also_propagate = prev_also_propagate;
server.stat_numcommands++;
}
@@ -2072,18 +2295,18 @@ void call(redisClient *c, int flags) {
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
- * If 1 is returned the client is still alive and valid and
+ * If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
- * if 0 is returned the client was destroyed (i.e. after QUIT). */
-int processCommand(redisClient *c) {
+ * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
+int processCommand(client *c) {
/* The QUIT command is handled separately. Normal command procs will
* go through checking for replication and QUIT will cause trouble
* when FORCE_REPLICATION is enabled and would be implemented in
* a regular command proc. */
if (!strcasecmp(c->argv[0]->ptr,"quit")) {
addReply(c,shared.ok);
- c->flags |= REDIS_CLOSE_AFTER_REPLY;
- return REDIS_ERR;
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ return C_ERR;
}
/* Now lookup the command and check ASAP about trivial error conditions
@@ -2093,13 +2316,13 @@ int processCommand(redisClient *c) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[0]->ptr);
- return REDIS_OK;
+ return C_OK;
} else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c);
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
- return REDIS_OK;
+ return C_OK;
}
/* Check if the user is authenticated */
@@ -2107,7 +2330,7 @@ int processCommand(redisClient *c) {
{
flagTransaction(c);
addReply(c,shared.noautherr);
- return REDIS_OK;
+ return C_OK;
}
/* If cluster is enabled perform the cluster redirection here.
@@ -2115,39 +2338,24 @@ int processCommand(redisClient *c) {
* 1) The sender of this command is our master.
* 2) The command has no key arguments. */
if (server.cluster_enabled &&
- !(c->flags & REDIS_MASTER) &&
- !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0))
+ !(c->flags & CLIENT_MASTER) &&
+ !(c->flags & CLIENT_LUA &&
+ server.lua_caller->flags & CLIENT_MASTER) &&
+ !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0 &&
+ c->cmd->proc != execCommand))
{
int hashslot;
-
- if (server.cluster->state != REDIS_CLUSTER_OK) {
- flagTransaction(c);
- addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down. Use CLUSTER INFO for more information\r\n"));
- return REDIS_OK;
- } else {
- int error_code;
- clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&error_code);
- if (n == NULL) {
- flagTransaction(c);
- if (error_code == REDIS_CLUSTER_REDIR_CROSS_SLOT) {
- addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
- } else if (error_code == REDIS_CLUSTER_REDIR_UNSTABLE) {
- /* The request spawns mutliple keys in the same slot,
- * but the slot is not "stable" currently as there is
- * a migration or import in progress. */
- addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
- } else {
- redisPanic("getNodeByQuery() unknown error.");
- }
- return REDIS_OK;
- } else if (n != server.cluster->myself) {
+ int error_code;
+ clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
+ &hashslot,&error_code);
+ if (n == NULL || n != server.cluster->myself) {
+ if (c->cmd->proc == execCommand) {
+ discardTransaction(c);
+ } else {
flagTransaction(c);
- addReplySds(c,sdscatprintf(sdsempty(),
- "-%s %d %s:%d\r\n",
- (error_code == REDIS_CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
- hashslot,n->ip,n->port));
- return REDIS_OK;
}
+ clusterRedirectClient(c,n,hashslot,error_code);
+ return C_OK;
}
}
@@ -2158,10 +2366,16 @@ int processCommand(redisClient *c) {
* is returning an error. */
if (server.maxmemory) {
int retval = freeMemoryIfNeeded();
- if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
+ /* freeMemoryIfNeeded may flush slave output buffers. This may result
+ * into a slave, that may be the active client, to be freed. */
+ if (server.current_client == NULL) return C_ERR;
+
+ /* It was impossible to free enough memory, and the command the client
+ * is trying to execute is denied during OOM conditions? Error. */
+ if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
flagTransaction(c);
addReply(c, shared.oomerr);
- return REDIS_OK;
+ return C_OK;
}
}
@@ -2169,21 +2383,21 @@ int processCommand(redisClient *c) {
* and if this is a master instance. */
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > 0 &&
- server.lastbgsave_status == REDIS_ERR) ||
- server.aof_last_write_status == REDIS_ERR) &&
+ server.lastbgsave_status == C_ERR) ||
+ server.aof_last_write_status == C_ERR) &&
server.masterhost == NULL &&
- (c->cmd->flags & REDIS_CMD_WRITE ||
+ (c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
{
flagTransaction(c);
- if (server.aof_last_write_status == REDIS_OK)
+ if (server.aof_last_write_status == C_OK)
addReply(c, shared.bgsaveerr);
else
addReplySds(c,
sdscatprintf(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
strerror(server.aof_last_write_errno)));
- return REDIS_OK;
+ return C_OK;
}
/* Don't accept write commands if there are not enough good slaves and
@@ -2191,51 +2405,51 @@ int processCommand(redisClient *c) {
if (server.masterhost == NULL &&
server.repl_min_slaves_to_write &&
server.repl_min_slaves_max_lag &&
- c->cmd->flags & REDIS_CMD_WRITE &&
+ c->cmd->flags & CMD_WRITE &&
server.repl_good_slaves_count < server.repl_min_slaves_to_write)
{
flagTransaction(c);
addReply(c, shared.noreplicaserr);
- return REDIS_OK;
+ return C_OK;
}
/* Don't accept write commands if this is a read only slave. But
* accept write commands if this is our master. */
if (server.masterhost && server.repl_slave_ro &&
- !(c->flags & REDIS_MASTER) &&
- c->cmd->flags & REDIS_CMD_WRITE)
+ !(c->flags & CLIENT_MASTER) &&
+ c->cmd->flags & CMD_WRITE)
{
addReply(c, shared.roslaveerr);
- return REDIS_OK;
+ return C_OK;
}
/* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
- if (c->flags & REDIS_PUBSUB &&
+ if (c->flags & CLIENT_PUBSUB &&
c->cmd->proc != pingCommand &&
c->cmd->proc != subscribeCommand &&
c->cmd->proc != unsubscribeCommand &&
c->cmd->proc != psubscribeCommand &&
c->cmd->proc != punsubscribeCommand) {
- addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
- return REDIS_OK;
+ addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT allowed in this context");
+ return C_OK;
}
/* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
* we are a slave with a broken link with master. */
- if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
server.repl_serve_stale_data == 0 &&
- !(c->cmd->flags & REDIS_CMD_STALE))
+ !(c->cmd->flags & CMD_STALE))
{
flagTransaction(c);
addReply(c, shared.masterdownerr);
- return REDIS_OK;
+ return C_OK;
}
/* Loading DB? Return an error if the command has not the
- * REDIS_CMD_LOADING flag. */
- if (server.loading && !(c->cmd->flags & REDIS_CMD_LOADING)) {
+ * CMD_LOADING flag. */
+ if (server.loading && !(c->cmd->flags & CMD_LOADING)) {
addReply(c, shared.loadingerr);
- return REDIS_OK;
+ return C_OK;
}
/* Lua script too slow? Only allow a limited number of commands. */
@@ -2251,23 +2465,23 @@ int processCommand(redisClient *c) {
{
flagTransaction(c);
addReply(c, shared.slowscripterr);
- return REDIS_OK;
+ return C_OK;
}
/* Exec the command */
- if (c->flags & REDIS_MULTI &&
+ if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{
queueMultiCommand(c);
addReply(c,shared.queued);
} else {
- call(c,REDIS_CALL_FULL);
+ call(c,CMD_CALL_FULL);
c->woff = server.master_repl_offset;
if (listLength(server.ready_keys))
handleClientsBlockedOnLists();
}
- return REDIS_OK;
+ return C_OK;
}
/*================================== Shutdown =============================== */
@@ -2282,64 +2496,78 @@ void closeListeningSockets(int unlink_unix_socket) {
if (server.cluster_enabled)
for (j = 0; j < server.cfd_count; j++) close(server.cfd[j]);
if (unlink_unix_socket && server.unixsocket) {
- redisLog(REDIS_NOTICE,"Removing the unix socket file.");
+ serverLog(LL_NOTICE,"Removing the unix socket file.");
unlink(server.unixsocket); /* don't care if this fails */
}
}
int prepareForShutdown(int flags) {
- int save = flags & REDIS_SHUTDOWN_SAVE;
- int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
+ int save = flags & SHUTDOWN_SAVE;
+ int nosave = flags & SHUTDOWN_NOSAVE;
+
+ serverLog(LL_WARNING,"User requested shutdown...");
+
+ /* Kill all the Lua debugger forked sessions. */
+ ldbKillForkedSessions();
- redisLog(REDIS_WARNING,"User requested shutdown...");
/* Kill the saving child if there is a background saving in progress.
We want to avoid race conditions, for instance our saving child may
overwrite the synchronous saving did by SHUTDOWN. */
if (server.rdb_child_pid != -1) {
- redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
+ serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
kill(server.rdb_child_pid,SIGUSR1);
rdbRemoveTempFile(server.rdb_child_pid);
}
- if (server.aof_state != REDIS_AOF_OFF) {
+
+ if (server.aof_state != AOF_OFF) {
/* Kill the AOF saving child as the AOF we already have may be longer
* but contains the full dataset anyway. */
if (server.aof_child_pid != -1) {
/* If we have AOF enabled but haven't written the AOF yet, don't
* shutdown or else the dataset will be lost. */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE) {
- redisLog(REDIS_WARNING, "Writing initial AOF, can't exit.");
- return REDIS_ERR;
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
+ return C_ERR;
}
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"There is a child rewriting the AOF. Killing it!");
kill(server.aof_child_pid,SIGUSR1);
}
/* Append only file: fsync() the AOF and exit */
- redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
+ serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
aof_fsync(server.aof_fd);
}
+
+ /* Create a new RDB file before exiting. */
if ((server.saveparamslen > 0 && !nosave) || save) {
- redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
+ serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
/* Snapshotting. Perform a SYNC SAVE and exit */
- if (rdbSave(server.rdb_filename) != REDIS_OK) {
+ if (rdbSave(server.rdb_filename,NULL) != C_OK) {
/* Ooops.. error saving! The best we can do is to continue
* operating. Note that if there was a background saving process,
* in the next cron() Redis will be notified that the background
* saving aborted, handling special stuff like slaves pending for
* synchronization... */
- redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
- return REDIS_ERR;
+ serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
+ return C_ERR;
}
}
- if (server.daemonize) {
- redisLog(REDIS_NOTICE,"Removing the pid file.");
+
+ /* Remove the pid file if possible and needed. */
+ if (server.daemonize || server.pidfile) {
+ serverLog(LL_NOTICE,"Removing the pid file.");
unlink(server.pidfile);
}
+
+ /* Best effort flush of slave output buffers, so that we hopefully
+ * send them pending writes. */
+ flushSlavesOutputBuffers();
+
/* Close the listening sockets. Apparently this allows faster restarts. */
closeListeningSockets(1);
- redisLog(REDIS_WARNING,"%s is now ready to exit, bye bye...",
+ serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
server.sentinel_mode ? "Sentinel" : "Redis");
- return REDIS_OK;
+ return C_OK;
}
/*================================== Commands =============================== */
@@ -2354,14 +2582,14 @@ int prepareForShutdown(int flags) {
* possible branch misprediction related leak.
*/
int time_independent_strcmp(char *a, char *b) {
- char bufa[REDIS_AUTHPASS_MAX_LEN], bufb[REDIS_AUTHPASS_MAX_LEN];
+ char bufa[CONFIG_AUTHPASS_MAX_LEN], bufb[CONFIG_AUTHPASS_MAX_LEN];
/* The above two strlen perform len(a) + len(b) operations where either
* a or b are fixed (our password) length, and the difference is only
* relative to the length of the user provided string, so no information
* leak is possible in the following two lines of code. */
- int alen = strlen(a);
- int blen = strlen(b);
- int j;
+ unsigned int alen = strlen(a);
+ unsigned int blen = strlen(b);
+ unsigned int j;
int diff = 0;
/* We can't compare strings longer than our static buffers.
@@ -2386,7 +2614,7 @@ int time_independent_strcmp(char *a, char *b) {
return diff; /* If zero strings are the same. */
}
-void authCommand(redisClient *c) {
+void authCommand(client *c) {
if (!server.requirepass) {
addReplyError(c,"Client sent AUTH, but no password is set");
} else if (!time_independent_strcmp(c->argv[1]->ptr, server.requirepass)) {
@@ -2400,7 +2628,7 @@ void authCommand(redisClient *c) {
/* The PING command. It works in a different way if the client is in
* in Pub/Sub mode. */
-void pingCommand(redisClient *c) {
+void pingCommand(client *c) {
/* The command takes zero or one arguments. */
if (c->argc > 2) {
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
@@ -2408,7 +2636,7 @@ void pingCommand(redisClient *c) {
return;
}
- if (c->flags & REDIS_PUBSUB) {
+ if (c->flags & CLIENT_PUBSUB) {
addReply(c,shared.mbulkhdr[2]);
addReplyBulkCBuffer(c,"pong",4);
if (c->argc == 1)
@@ -2423,11 +2651,11 @@ void pingCommand(redisClient *c) {
}
}
-void echoCommand(redisClient *c) {
+void echoCommand(client *c) {
addReplyBulk(c,c->argv[1]);
}
-void timeCommand(redisClient *c) {
+void timeCommand(client *c) {
struct timeval tv;
/* gettimeofday() can only fail if &tv is a bad address so we
@@ -2438,9 +2666,8 @@ void timeCommand(redisClient *c) {
addReplyBulkLongLong(c,tv.tv_usec);
}
-
/* Helper function for addReplyCommand() to output flags. */
-int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *reply) {
+int addReplyCommandFlag(client *c, struct redisCommand *cmd, int f, char *reply) {
if (cmd->flags & f) {
addReplyStatus(c, reply);
return 1;
@@ -2449,7 +2676,7 @@ int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *r
}
/* Output the representation of a Redis command. Used by the COMMAND command. */
-void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
+void addReplyCommand(client *c, struct redisCommand *cmd) {
if (!cmd) {
addReply(c, shared.nullbulk);
} else {
@@ -2460,20 +2687,22 @@ void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
int flagcount = 0;
void *flaglen = addDeferredMultiBulkLength(c);
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_WRITE, "write");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_READONLY, "readonly");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_DENYOOM, "denyoom");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_ADMIN, "admin");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_PUBSUB, "pubsub");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_NOSCRIPT, "noscript");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_RANDOM, "random");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_SORT_FOR_SCRIPT,"sort_for_script");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_LOADING, "loading");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_STALE, "stale");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_SKIP_MONITOR, "skip_monitor");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_ASKING, "asking");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_FAST, "fast");
- if (cmd->getkeys_proc) {
+ flagcount += addReplyCommandFlag(c,cmd,CMD_WRITE, "write");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_READONLY, "readonly");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_DENYOOM, "denyoom");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_ADMIN, "admin");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_PUBSUB, "pubsub");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_NOSCRIPT, "noscript");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_RANDOM, "random");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_SORT_FOR_SCRIPT,"sort_for_script");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_LOADING, "loading");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_STALE, "stale");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_SKIP_MONITOR, "skip_monitor");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_ASKING, "asking");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_FAST, "fast");
+ if ((cmd->getkeys_proc && !(cmd->flags & CMD_MODULE)) ||
+ cmd->flags & CMD_MODULE_GETKEYS)
+ {
addReplyStatus(c, "movablekeys");
flagcount += 1;
}
@@ -2486,7 +2715,7 @@ void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
}
/* COMMAND <subcommand> <args> */
-void commandCommand(redisClient *c) {
+void commandCommand(client *c) {
dictIterator *di;
dictEntry *de;
@@ -2547,6 +2776,15 @@ void bytesToHuman(char *s, unsigned long long n) {
} else if (n < (1024LL*1024*1024*1024)) {
d = (double)n/(1024LL*1024*1024);
sprintf(s,"%.2fG",d);
+ } else if (n < (1024LL*1024*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024*1024);
+ sprintf(s,"%.2fT",d);
+ } else if (n < (1024LL*1024*1024*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024*1024*1024);
+ sprintf(s,"%.2fP",d);
+ } else {
+ /* Let's hope we never need this */
+ sprintf(s,"%lluB",n);
}
}
@@ -2562,10 +2800,9 @@ sds genRedisInfoString(char *section) {
int allsections = 0, defsections = 0;
int sections = 0;
- if (section) {
- allsections = strcasecmp(section,"all") == 0;
- defsections = strcasecmp(section,"default") == 0;
- }
+ if (section == NULL) section = "default";
+ allsections = strcasecmp(section,"all") == 0;
+ defsections = strcasecmp(section,"default") == 0;
getrusage(RUSAGE_SELF, &self_ru);
getrusage(RUSAGE_CHILDREN, &c_ru);
@@ -2589,6 +2826,8 @@ sds genRedisInfoString(char *section) {
call_uname = 0;
}
+ unsigned int lruclock;
+ atomicGet(server.lruclock,lruclock);
info = sdscatprintf(info,
"# Server\r\n"
"redis_version:%s\r\n"
@@ -2599,6 +2838,7 @@ sds genRedisInfoString(char *section) {
"os:%s %s %s\r\n"
"arch_bits:%d\r\n"
"multiplexing_api:%s\r\n"
+ "atomicvar_api:%s\r\n"
"gcc_version:%d.%d.%d\r\n"
"process_id:%ld\r\n"
"run_id:%s\r\n"
@@ -2607,6 +2847,7 @@ sds genRedisInfoString(char *section) {
"uptime_in_days:%jd\r\n"
"hz:%d\r\n"
"lru_clock:%ld\r\n"
+ "executable:%s\r\n"
"config_file:%s\r\n",
REDIS_VERSION,
redisGitSHA1(),
@@ -2616,6 +2857,7 @@ sds genRedisInfoString(char *section) {
name.sysname, name.release, name.machine,
server.arch_bits,
aeGetApiName(),
+ REDIS_ATOMIC_API,
#ifdef __GNUC__
__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
#else
@@ -2627,7 +2869,8 @@ sds genRedisInfoString(char *section) {
(intmax_t)uptime,
(intmax_t)(uptime/(3600*24)),
server.hz,
- (unsigned long) server.lruclock,
+ (unsigned long) lruclock,
+ server.executable ? server.executable : "",
server.configfile ? server.configfile : "");
}
@@ -2649,7 +2892,15 @@ sds genRedisInfoString(char *section) {
if (allsections || defsections || !strcasecmp(section,"memory")) {
char hmem[64];
char peak_hmem[64];
+ char total_system_hmem[64];
+ char used_memory_lua_hmem[64];
+ char used_memory_rss_hmem[64];
+ char maxmemory_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
+ size_t total_system_mem = server.system_memory_size;
+ const char *evict_policy = evictPolicyToString();
+ long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024;
+ struct redisMemOverhead *mh = getMemoryOverheadData();
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
@@ -2660,26 +2911,60 @@ sds genRedisInfoString(char *section) {
bytesToHuman(hmem,zmalloc_used);
bytesToHuman(peak_hmem,server.stat_peak_memory);
+ bytesToHuman(total_system_hmem,total_system_mem);
+ bytesToHuman(used_memory_lua_hmem,memory_lua);
+ bytesToHuman(used_memory_rss_hmem,server.resident_set_size);
+ bytesToHuman(maxmemory_hmem,server.maxmemory);
+
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Memory\r\n"
"used_memory:%zu\r\n"
"used_memory_human:%s\r\n"
"used_memory_rss:%zu\r\n"
+ "used_memory_rss_human:%s\r\n"
"used_memory_peak:%zu\r\n"
"used_memory_peak_human:%s\r\n"
+ "used_memory_peak_perc:%.2f%%\r\n"
+ "used_memory_overhead:%zu\r\n"
+ "used_memory_startup:%zu\r\n"
+ "used_memory_dataset:%zu\r\n"
+ "used_memory_dataset_perc:%.2f%%\r\n"
+ "total_system_memory:%lu\r\n"
+ "total_system_memory_human:%s\r\n"
"used_memory_lua:%lld\r\n"
+ "used_memory_lua_human:%s\r\n"
+ "maxmemory:%lld\r\n"
+ "maxmemory_human:%s\r\n"
+ "maxmemory_policy:%s\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
- "mem_allocator:%s\r\n",
+ "mem_allocator:%s\r\n"
+ "active_defrag_running:%d\r\n"
+ "lazyfree_pending_objects:%zu\r\n",
zmalloc_used,
hmem,
server.resident_set_size,
+ used_memory_rss_hmem,
server.stat_peak_memory,
peak_hmem,
- ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
- zmalloc_get_fragmentation_ratio(server.resident_set_size),
- ZMALLOC_LIB
- );
+ mh->peak_perc,
+ mh->overhead_total,
+ mh->startup_allocated,
+ mh->dataset,
+ mh->dataset_perc,
+ (unsigned long)total_system_mem,
+ total_system_hmem,
+ memory_lua,
+ used_memory_lua_hmem,
+ server.maxmemory,
+ maxmemory_hmem,
+ evict_policy,
+ mh->fragmentation,
+ ZMALLOC_LIB,
+ server.active_defrag_running,
+ lazyfreeGetPendingObjectsCount()
+ );
+ freeMemoryOverheadData(mh);
}
/* Persistence */
@@ -2694,31 +2979,35 @@ sds genRedisInfoString(char *section) {
"rdb_last_bgsave_status:%s\r\n"
"rdb_last_bgsave_time_sec:%jd\r\n"
"rdb_current_bgsave_time_sec:%jd\r\n"
+ "rdb_last_cow_size:%zu\r\n"
"aof_enabled:%d\r\n"
"aof_rewrite_in_progress:%d\r\n"
"aof_rewrite_scheduled:%d\r\n"
"aof_last_rewrite_time_sec:%jd\r\n"
"aof_current_rewrite_time_sec:%jd\r\n"
"aof_last_bgrewrite_status:%s\r\n"
- "aof_last_write_status:%s\r\n",
+ "aof_last_write_status:%s\r\n"
+ "aof_last_cow_size:%zu\r\n",
server.loading,
server.dirty,
server.rdb_child_pid != -1,
(intmax_t)server.lastsave,
- (server.lastbgsave_status == REDIS_OK) ? "ok" : "err",
+ (server.lastbgsave_status == C_OK) ? "ok" : "err",
(intmax_t)server.rdb_save_time_last,
(intmax_t)((server.rdb_child_pid == -1) ?
-1 : time(NULL)-server.rdb_save_time_start),
- server.aof_state != REDIS_AOF_OFF,
+ server.stat_rdb_cow_bytes,
+ server.aof_state != AOF_OFF,
server.aof_child_pid != -1,
server.aof_rewrite_scheduled,
(intmax_t)server.aof_rewrite_time_last,
(intmax_t)((server.aof_child_pid == -1) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
- (server.aof_lastbgrewrite_status == REDIS_OK) ? "ok" : "err",
- (server.aof_last_write_status == REDIS_OK) ? "ok" : "err");
+ (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
+ (server.aof_last_write_status == C_OK) ? "ok" : "err",
+ server.stat_aof_cow_bytes);
- if (server.aof_state != REDIS_AOF_OFF) {
+ if (server.aof_state != AOF_OFF) {
info = sdscatprintf(info,
"aof_current_size:%lld\r\n"
"aof_base_size:%lld\r\n"
@@ -2732,7 +3021,7 @@ sds genRedisInfoString(char *section) {
server.aof_rewrite_scheduled,
sdslen(server.aof_buf),
aofRewriteBufferSize(),
- bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
+ bioPendingJobsOfType(BIO_AOF_FSYNC),
server.aof_delayed_fsync);
}
@@ -2743,14 +3032,14 @@ sds genRedisInfoString(char *section) {
server.loading_loaded_bytes;
perc = ((double)server.loading_loaded_bytes /
- server.loading_total_bytes) * 100;
+ (server.loading_total_bytes+1)) * 100;
- elapsed = server.unixtime-server.loading_start_time;
+ elapsed = time(NULL)-server.loading_start_time;
if (elapsed == 0) {
eta = 1; /* A fake 1 second figure if we don't have
enough info */
} else {
- eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes;
+ eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
}
info = sdscatprintf(info,
@@ -2776,6 +3065,10 @@ sds genRedisInfoString(char *section) {
"total_connections_received:%lld\r\n"
"total_commands_processed:%lld\r\n"
"instantaneous_ops_per_sec:%lld\r\n"
+ "total_net_input_bytes:%lld\r\n"
+ "total_net_output_bytes:%lld\r\n"
+ "instantaneous_input_kbps:%.2f\r\n"
+ "instantaneous_output_kbps:%.2f\r\n"
"rejected_connections:%lld\r\n"
"sync_full:%lld\r\n"
"sync_partial_ok:%lld\r\n"
@@ -2787,10 +3080,19 @@ sds genRedisInfoString(char *section) {
"pubsub_channels:%ld\r\n"
"pubsub_patterns:%lu\r\n"
"latest_fork_usec:%lld\r\n"
- "migrate_cached_sockets:%ld\r\n",
+ "migrate_cached_sockets:%ld\r\n"
+ "slave_expires_tracked_keys:%zu\r\n"
+ "active_defrag_hits:%lld\r\n"
+ "active_defrag_misses:%lld\r\n"
+ "active_defrag_key_hits:%lld\r\n"
+ "active_defrag_key_misses:%lld\r\n",
server.stat_numconnections,
server.stat_numcommands,
- getOperationsPerSecond(),
+ getInstantaneousMetric(STATS_METRIC_COMMAND),
+ server.stat_net_input_bytes,
+ server.stat_net_output_bytes,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
server.stat_rejected_conn,
server.stat_sync_full,
server.stat_sync_partial_ok,
@@ -2802,7 +3104,12 @@ sds genRedisInfoString(char *section) {
dictSize(server.pubsub_channels),
listLength(server.pubsub_patterns),
server.stat_fork_time,
- dictSize(server.migrate_cached_sockets));
+ dictSize(server.migrate_cached_sockets),
+ getSlaveKeyWithExpireCount(),
+ server.stat_active_defrag_hits,
+ server.stat_active_defrag_misses,
+ server.stat_active_defrag_key_hits,
+ server.stat_active_defrag_key_misses);
}
/* Replication */
@@ -2829,15 +3136,15 @@ sds genRedisInfoString(char *section) {
"slave_repl_offset:%lld\r\n"
,server.masterhost,
server.masterport,
- (server.repl_state == REDIS_REPL_CONNECTED) ?
+ (server.repl_state == REPL_STATE_CONNECTED) ?
"up" : "down",
server.master ?
((int)(server.unixtime-server.master->lastinteraction)) : -1,
- server.repl_state == REDIS_REPL_TRANSFER,
+ server.repl_state == REPL_STATE_TRANSFER,
slave_repl_offset
);
- if (server.repl_state == REDIS_REPL_TRANSFER) {
+ if (server.repl_state == REPL_STATE_TRANSFER) {
info = sdscatprintf(info,
"master_sync_left_bytes:%lld\r\n"
"master_sync_last_io_seconds_ago:%d\r\n"
@@ -2847,7 +3154,7 @@ sds genRedisInfoString(char *section) {
);
}
- if (server.repl_state != REDIS_REPL_CONNECTED) {
+ if (server.repl_state != REPL_STATE_CONNECTED) {
info = sdscatprintf(info,
"master_link_down_since_seconds:%jd\r\n",
(intmax_t)server.unixtime-server.repl_down_since);
@@ -2879,44 +3186,54 @@ sds genRedisInfoString(char *section) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
+ client *slave = listNodeValue(ln);
char *state = NULL;
- char ip[REDIS_IP_STR_LEN];
+ char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
int port;
long lag = 0;
- if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1) continue;
+ if (slaveip[0] == '\0') {
+ if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1)
+ continue;
+ slaveip = ip;
+ }
switch(slave->replstate) {
- case REDIS_REPL_WAIT_BGSAVE_START:
- case REDIS_REPL_WAIT_BGSAVE_END:
+ case SLAVE_STATE_WAIT_BGSAVE_START:
+ case SLAVE_STATE_WAIT_BGSAVE_END:
state = "wait_bgsave";
break;
- case REDIS_REPL_SEND_BULK:
+ case SLAVE_STATE_SEND_BULK:
state = "send_bulk";
break;
- case REDIS_REPL_ONLINE:
+ case SLAVE_STATE_ONLINE:
state = "online";
break;
}
if (state == NULL) continue;
- if (slave->replstate == REDIS_REPL_ONLINE)
+ if (slave->replstate == SLAVE_STATE_ONLINE)
lag = time(NULL) - slave->repl_ack_time;
info = sdscatprintf(info,
"slave%d:ip=%s,port=%d,state=%s,"
"offset=%lld,lag=%ld\r\n",
- slaveid,ip,slave->slave_listening_port,state,
+ slaveid,slaveip,slave->slave_listening_port,state,
slave->repl_ack_off, lag);
slaveid++;
}
}
info = sdscatprintf(info,
+ "master_replid:%s\r\n"
+ "master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
+ "second_repl_offset:%lld\r\n"
"repl_backlog_active:%d\r\n"
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
+ server.replid,
+ server.replid2,
server.master_repl_offset,
+ server.second_replid_offset,
server.repl_backlog != NULL,
server.repl_backlog_size,
server.repl_backlog_off,
@@ -2982,324 +3299,25 @@ sds genRedisInfoString(char *section) {
return info;
}
-void infoCommand(redisClient *c) {
+void infoCommand(client *c) {
char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
if (c->argc > 2) {
addReply(c,shared.syntaxerr);
return;
}
- sds info = genRedisInfoString(section);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
+ addReplyBulkSds(c, genRedisInfoString(section));
}
-void monitorCommand(redisClient *c) {
+void monitorCommand(client *c) {
/* ignore MONITOR if already slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
+ if (c->flags & CLIENT_SLAVE) return;
- c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
+ c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
listAddNodeTail(server.monitors,c);
addReply(c,shared.ok);
}
-/* ============================ Maxmemory directive ======================== */
-
-/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
- * file to limit the max memory used by the server, before processing a
- * command.
- *
- * The goal of the function is to free enough memory to keep Redis under the
- * configured memory limit.
- *
- * The function starts calculating how many bytes should be freed to keep
- * Redis under the limit, and enters a loop selecting the best keys to
- * evict accordingly to the configured policy.
- *
- * If all the bytes needed to return back under the limit were freed the
- * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
- * should block the execution of commands that will result in more memory
- * used by the server.
- *
- * ------------------------------------------------------------------------
- *
- * LRU approximation algorithm
- *
- * Redis uses an approximation of the LRU algorithm that runs in constant
- * memory. Every time there is a key to expire, we sample N keys (with
- * N very small, usually in around 5) to populate a pool of best keys to
- * evict of M keys (the pool size is defined by REDIS_EVICTION_POOL_SIZE).
- *
- * The N keys sampled are added in the pool of good keys to expire (the one
- * with an old access time) if they are better than one of the current keys
- * in the pool.
- *
- * After the pool is populated, the best key we have in the pool is expired.
- * However note that we don't remove keys from the pool when they are deleted
- * so the pool may contain keys that no longer exist.
- *
- * When we try to evict a key, and all the entries in the pool don't exist
- * we populate it again. This time we'll be sure that the pool has at least
- * one key that can be evicted, if there is at least one key that can be
- * evicted in the whole database. */
-
-/* Create a new eviction pool. */
-struct evictionPoolEntry *evictionPoolAlloc(void) {
- struct evictionPoolEntry *ep;
- int j;
-
- ep = zmalloc(sizeof(*ep)*REDIS_EVICTION_POOL_SIZE);
- for (j = 0; j < REDIS_EVICTION_POOL_SIZE; j++) {
- ep[j].idle = 0;
- ep[j].key = NULL;
- }
- return ep;
-}
-
-/* This is an helper function for freeMemoryIfNeeded(), it is used in order
- * to populate the evictionPool with a few entries every time we want to
- * expire a key. Keys with idle time smaller than one of the current
- * keys are added. Keys are always added if there are free entries.
- *
- * We insert keys on place in ascending order, so keys with the smaller
- * idle time are on the left, and keys with the higher idle time on the
- * right. */
-
-#define EVICTION_SAMPLES_ARRAY_SIZE 16
-void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
- int j, k, count;
- dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
- dictEntry **samples;
-
- /* Try to use a static buffer: this function is a big hit...
- * Note: it was actually measured that this helps. */
- if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
- samples = _samples;
- } else {
- samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
- }
-
-#if 1 /* Use bulk get by default. */
- count = dictGetRandomKeys(sampledict,samples,server.maxmemory_samples);
-#else
- count = server.maxmemory_samples;
- for (j = 0; j < count; j++) samples[j] = dictGetRandomKey(sampledict);
-#endif
-
- for (j = 0; j < count; j++) {
- unsigned long long idle;
- sds key;
- robj *o;
- dictEntry *de;
-
- de = samples[j];
- key = dictGetKey(de);
- /* If the dictionary we are sampling from is not the main
- * dictionary (but the expires one) we need to lookup the key
- * again in the key dictionary to obtain the value object. */
- if (sampledict != keydict) de = dictFind(keydict, key);
- o = dictGetVal(de);
- idle = estimateObjectIdleTime(o);
-
- /* Insert the element inside the pool.
- * First, find the first empty bucket or the first populated
- * bucket that has an idle time smaller than our idle time. */
- k = 0;
- while (k < REDIS_EVICTION_POOL_SIZE &&
- pool[k].key &&
- pool[k].idle < idle) k++;
- if (k == 0 && pool[REDIS_EVICTION_POOL_SIZE-1].key != NULL) {
- /* Can't insert if the element is < the worst element we have
- * and there are no empty buckets. */
- continue;
- } else if (k < REDIS_EVICTION_POOL_SIZE && pool[k].key == NULL) {
- /* Inserting into empty position. No setup needed before insert. */
- } else {
- /* Inserting in the middle. Now k points to the first element
- * greater than the element to insert. */
- if (pool[REDIS_EVICTION_POOL_SIZE-1].key == NULL) {
- /* Free space on the right? Insert at k shifting
- * all the elements from k to end to the right. */
- memmove(pool+k+1,pool+k,
- sizeof(pool[0])*(REDIS_EVICTION_POOL_SIZE-k-1));
- } else {
- /* No free space on right? Insert at k-1 */
- k--;
- /* Shift all elements on the left of k (included) to the
- * left, so we discard the element with smaller idle time. */
- sdsfree(pool[0].key);
- memmove(pool,pool+1,sizeof(pool[0])*k);
- }
- }
- pool[k].key = sdsdup(key);
- pool[k].idle = idle;
- }
- if (samples != _samples) zfree(samples);
-}
-
-int freeMemoryIfNeeded(void) {
- size_t mem_used, mem_tofree, mem_freed;
- int slaves = listLength(server.slaves);
- mstime_t latency;
-
- /* Remove the size of slaves output buffers and AOF buffer from the
- * count of used memory. */
- mem_used = zmalloc_used_memory();
- if (slaves) {
- listIter li;
- listNode *ln;
-
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
- unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
- if (obuf_bytes > mem_used)
- mem_used = 0;
- else
- mem_used -= obuf_bytes;
- }
- }
- if (server.aof_state != REDIS_AOF_OFF) {
- mem_used -= sdslen(server.aof_buf);
- mem_used -= aofRewriteBufferSize();
- }
-
- /* Check if we are over the memory limit. */
- if (mem_used <= server.maxmemory) return REDIS_OK;
-
- if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
- return REDIS_ERR; /* We need to free memory, but policy forbids. */
-
- /* Compute how much memory we need to free. */
- mem_tofree = mem_used - server.maxmemory;
- mem_freed = 0;
- latencyStartMonitor(latency);
- while (mem_freed < mem_tofree) {
- int j, k, keys_freed = 0;
-
- for (j = 0; j < server.dbnum; j++) {
- long bestval = 0; /* just to prevent warning */
- sds bestkey = NULL;
- dictEntry *de;
- redisDb *db = server.db+j;
- dict *dict;
-
- if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
- server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
- {
- dict = server.db[j].dict;
- } else {
- dict = server.db[j].expires;
- }
- if (dictSize(dict) == 0) continue;
-
- /* volatile-random and allkeys-random policy */
- if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
- server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
- {
- de = dictGetRandomKey(dict);
- bestkey = dictGetKey(de);
- }
-
- /* volatile-lru and allkeys-lru policy */
- else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
- server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
- {
- struct evictionPoolEntry *pool = db->eviction_pool;
-
- while(bestkey == NULL) {
- evictionPoolPopulate(dict, db->dict, db->eviction_pool);
- /* Go backward from best to worst element to evict. */
- for (k = REDIS_EVICTION_POOL_SIZE-1; k >= 0; k--) {
- if (pool[k].key == NULL) continue;
- de = dictFind(dict,pool[k].key);
-
- /* Remove the entry from the pool. */
- sdsfree(pool[k].key);
- /* Shift all elements on its right to left. */
- memmove(pool+k,pool+k+1,
- sizeof(pool[0])*(REDIS_EVICTION_POOL_SIZE-k-1));
- /* Clear the element on the right which is empty
- * since we shifted one position to the left. */
- pool[REDIS_EVICTION_POOL_SIZE-1].key = NULL;
- pool[REDIS_EVICTION_POOL_SIZE-1].idle = 0;
-
- /* If the key exists, is our pick. Otherwise it is
- * a ghost and we need to try the next element. */
- if (de) {
- bestkey = dictGetKey(de);
- break;
- } else {
- /* Ghost... */
- continue;
- }
- }
- }
- }
-
- /* volatile-ttl */
- else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
- for (k = 0; k < server.maxmemory_samples; k++) {
- sds thiskey;
- long thisval;
-
- de = dictGetRandomKey(dict);
- thiskey = dictGetKey(de);
- thisval = (long) dictGetVal(de);
-
- /* Expire sooner (minor expire unix timestamp) is better
- * candidate for deletion */
- if (bestkey == NULL || thisval < bestval) {
- bestkey = thiskey;
- bestval = thisval;
- }
- }
- }
-
- /* Finally remove the selected key. */
- if (bestkey) {
- long long delta;
-
- robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
- propagateExpire(db,keyobj);
- /* We compute the amount of memory freed by dbDelete() alone.
- * It is possible that actually the memory needed to propagate
- * the DEL in AOF and replication link is greater than the one
- * we are freeing removing the key, but we can't account for
- * that otherwise we would never exit the loop.
- *
- * AOF and Output buffer memory will be freed eventually so
- * we only care about memory used by the key space. */
- delta = (long long) zmalloc_used_memory();
- dbDelete(db,keyobj);
- delta -= (long long) zmalloc_used_memory();
- mem_freed += delta;
- server.stat_evictedkeys++;
- notifyKeyspaceEvent(REDIS_NOTIFY_EVICTED, "evicted",
- keyobj, db->id);
- decrRefCount(keyobj);
- keys_freed++;
-
- /* When the memory to free starts to be big enough, we may
- * start spending so much time here that is impossible to
- * deliver data to the slaves fast enough, so we force the
- * transmission here inside the loop. */
- if (slaves) flushSlavesOutputBuffers();
- }
- }
- if (!keys_freed) {
- latencyEndMonitor(latency);
- latencyAddSampleIfNeeded("eviction-cycle",latency);
- return REDIS_ERR; /* nothing to free... */
- }
- }
- latencyEndMonitor(latency);
- latencyAddSampleIfNeeded("eviction-cycle",latency);
- return REDIS_OK;
-}
-
/* =================================== Main! ================================ */
#ifdef __linux__
@@ -3317,14 +3335,21 @@ int linuxOvercommitMemoryValue(void) {
return atoi(buf);
}
-void linuxOvercommitMemoryWarning(void) {
+void linuxMemoryWarnings(void) {
if (linuxOvercommitMemoryValue() == 0) {
- redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
+ serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
+ }
+ if (THPIsEnabled()) {
+ serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled.");
}
}
#endif /* __linux__ */
void createPidFile(void) {
+ /* If pidfile requested, but no pidfile defined, use
+ * default pidfile path */
+ if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
+
/* Try to write the pid file in a best-effort way. */
FILE *fp = fopen(server.pidfile,"w");
if (fp) {
@@ -3350,7 +3375,7 @@ void daemonize(void) {
}
}
-void version() {
+void version(void) {
printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n",
REDIS_VERSION,
redisGitSHA1(),
@@ -3361,7 +3386,7 @@ void version() {
exit(0);
}
-void usage() {
+void usage(void) {
fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
fprintf(stderr," ./redis-server - (read config from stdin)\n");
fprintf(stderr," ./redis-server -v or --version\n");
@@ -3387,22 +3412,60 @@ void redisAsciiArt(void) {
else if (server.sentinel_mode) mode = "sentinel";
else mode = "standalone";
- snprintf(buf,1024*16,ascii_logo,
- REDIS_VERSION,
- redisGitSHA1(),
- strtol(redisGitDirty(),NULL,10) > 0,
- (sizeof(long) == 8) ? "64" : "32",
- mode, server.port,
- (long) getpid()
- );
- redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
+ /* Show the ASCII logo if: log file is stdout AND stdout is a
+ * tty AND syslog logging is disabled. Also show logo if the user
+ * forced us to do so via redis.conf. */
+ int show_logo = ((!server.syslog_enabled &&
+ server.logfile[0] == '\0' &&
+ isatty(fileno(stdout))) ||
+ server.always_show_logo);
+
+ if (!show_logo) {
+ serverLog(LL_NOTICE,
+ "Running mode=%s, port=%d.",
+ mode, server.port
+ );
+ } else {
+ snprintf(buf,1024*16,ascii_logo,
+ REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (sizeof(long) == 8) ? "64" : "32",
+ mode, server.port,
+ (long) getpid()
+ );
+ serverLogRaw(LL_NOTICE|LL_RAW,buf);
+ }
zfree(buf);
}
-static void sigtermHandler(int sig) {
- REDIS_NOTUSED(sig);
-
- redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown...");
+static void sigShutdownHandler(int sig) {
+ char *msg;
+
+ switch (sig) {
+ case SIGINT:
+ msg = "Received SIGINT scheduling shutdown...";
+ break;
+ case SIGTERM:
+ msg = "Received SIGTERM scheduling shutdown...";
+ break;
+ default:
+ msg = "Received shutdown signal, scheduling shutdown...";
+ };
+
+ /* SIGINT is often delivered via Ctrl+C in an interactive session.
+ * If we receive the signal the second time, we interpret this as
+ * the user really wanting to quit ASAP without waiting to persist
+ * on disk. */
+ if (server.shutdown_asap && sig == SIGINT) {
+ serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
+ rdbRemoveTempFile(getpid());
+ exit(1); /* Exit with an error since this was not a clean shutdown. */
+ } else if (server.loading) {
+ exit(0);
+ }
+
+ serverLogFromHandler(LL_WARNING, msg);
server.shutdown_asap = 1;
}
@@ -3413,8 +3476,9 @@ void setupSignalHandlers(void) {
* Otherwise, sa_handler is used. */
sigemptyset(&act.sa_mask);
act.sa_flags = 0;
- act.sa_handler = sigtermHandler;
+ act.sa_handler = sigShutdownHandler;
sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
#ifdef HAVE_BACKTRACE
sigemptyset(&act.sa_mask);
@@ -3431,7 +3495,7 @@ void setupSignalHandlers(void) {
void memtest(size_t megabytes, int passes);
/* Returns 1 if there is --sentinel among the arguments or if
- * argv[0] is exactly "redis-sentinel". */
+ * argv[0] contains "redis-sentinel". */
int checkForSentinelMode(int argc, char **argv) {
int j;
@@ -3444,24 +3508,35 @@ int checkForSentinelMode(int argc, char **argv) {
/* Function called at startup to load RDB or AOF file in memory. */
void loadDataFromDisk(void) {
long long start = ustime();
- if (server.aof_state == REDIS_AOF_ON) {
- if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
- redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
+ if (server.aof_state == AOF_ON) {
+ if (loadAppendOnlyFile(server.aof_filename) == C_OK)
+ serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
} else {
- if (rdbLoad(server.rdb_filename) == REDIS_OK) {
- redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ if (rdbLoad(server.rdb_filename,&rsi) == C_OK) {
+ serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
(float)(ustime()-start)/1000000);
+
+ /* Restore the replication ID / offset from the RDB file. */
+ if (rsi.repl_id_is_set && rsi.repl_offset != -1) {
+ memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
+ server.master_repl_offset = rsi.repl_offset;
+ /* If we are a slave, create a cached master from this
+ * information, in order to allow partial resynchronizations
+ * with masters. */
+ if (server.masterhost) replicationCacheMasterUsingMyself();
+ }
} else if (errno != ENOENT) {
- redisLog(REDIS_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
+ serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
exit(1);
}
}
}
void redisOutOfMemoryHandler(size_t allocation_size) {
- redisLog(REDIS_WARNING,"Out Of Memory allocating %zu bytes!",
+ serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
allocation_size);
- redisPanic("Redis aborting for OUT OF MEMORY");
+ serverPanic("Redis aborting for OUT OF MEMORY");
}
void redisSetProcTitle(char *title) {
@@ -3476,25 +3551,157 @@ void redisSetProcTitle(char *title) {
server.port,
server_mode);
#else
- REDIS_NOTUSED(title);
+ UNUSED(title);
#endif
}
+/*
+ * Check whether systemd or upstart have been used to start redis.
+ */
+
+int redisSupervisedUpstart(void) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+
+ if (!upstart_job) {
+ serverLog(LL_WARNING,
+ "upstart supervision requested, but UPSTART_JOB not found");
+ return 0;
+ }
+
+ serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness");
+ raise(SIGSTOP);
+ unsetenv("UPSTART_JOB");
+ return 1;
+}
+
+int redisSupervisedSystemd(void) {
+ const char *notify_socket = getenv("NOTIFY_SOCKET");
+ int fd = 1;
+ struct sockaddr_un su;
+ struct iovec iov;
+ struct msghdr hdr;
+ int sendto_flags = 0;
+
+ if (!notify_socket) {
+ serverLog(LL_WARNING,
+ "systemd supervision requested, but NOTIFY_SOCKET not found");
+ return 0;
+ }
+
+ if ((strchr("@/", notify_socket[0])) == NULL || strlen(notify_socket) < 2) {
+ return 0;
+ }
+
+ serverLog(LL_NOTICE, "supervised by systemd, will signal readiness");
+ if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
+ serverLog(LL_WARNING,
+ "Can't connect to systemd socket %s", notify_socket);
+ return 0;
+ }
+
+ memset(&su, 0, sizeof(su));
+ su.sun_family = AF_UNIX;
+ strncpy (su.sun_path, notify_socket, sizeof(su.sun_path) -1);
+ su.sun_path[sizeof(su.sun_path) - 1] = '\0';
+
+ if (notify_socket[0] == '@')
+ su.sun_path[0] = '\0';
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = "READY=1";
+ iov.iov_len = strlen("READY=1");
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = &su;
+ hdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) +
+ strlen(notify_socket);
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ unsetenv("NOTIFY_SOCKET");
+#ifdef HAVE_MSG_NOSIGNAL
+ sendto_flags |= MSG_NOSIGNAL;
+#endif
+ if (sendmsg(fd, &hdr, sendto_flags) < 0) {
+ serverLog(LL_WARNING, "Can't send notification to systemd");
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ return 1;
+}
+
+int redisIsSupervised(int mode) {
+ if (mode == SUPERVISED_AUTODETECT) {
+ const char *upstart_job = getenv("UPSTART_JOB");
+ const char *notify_socket = getenv("NOTIFY_SOCKET");
+
+ if (upstart_job) {
+ redisSupervisedUpstart();
+ } else if (notify_socket) {
+ redisSupervisedSystemd();
+ }
+ } else if (mode == SUPERVISED_UPSTART) {
+ return redisSupervisedUpstart();
+ } else if (mode == SUPERVISED_SYSTEMD) {
+ return redisSupervisedSystemd();
+ }
+
+ return 0;
+}
+
+
int main(int argc, char **argv) {
struct timeval tv;
+ int j;
+
+#ifdef REDIS_TEST
+ if (argc == 3 && !strcasecmp(argv[1], "test")) {
+ if (!strcasecmp(argv[2], "ziplist")) {
+ return ziplistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "quicklist")) {
+ quicklistTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "intset")) {
+ return intsetTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "zipmap")) {
+ return zipmapTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sha1test")) {
+ return sha1Test(argc, argv);
+ } else if (!strcasecmp(argv[2], "util")) {
+ return utilTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "sds")) {
+ return sdsTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "endianconv")) {
+ return endianconvTest(argc, argv);
+ } else if (!strcasecmp(argv[2], "crc64")) {
+ return crc64Test(argc, argv);
+ }
+
+ return -1; /* test not found */
+ }
+#endif
/* We need to initialize our libraries, and the server configuration. */
#ifdef INIT_SETPROCTITLE_REPLACEMENT
spt_init(argc, argv);
#endif
setlocale(LC_COLLATE,"");
- zmalloc_enable_thread_safeness();
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
- dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
+ char hashseed[16];
+ getRandomHexChars(hashseed,sizeof(hashseed));
+ dictSetHashFunctionSeed((uint8_t*)hashseed);
server.sentinel_mode = checkForSentinelMode(argc,argv);
initServerConfig();
+ moduleInitModulesSystem();
+
+ /* Store the executable path and arguments in a safe place in order
+ * to be able to restart the server later. */
+ server.executable = getAbsolutePath(argv[0]);
+ server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
+ server.exec_argv[argc] = NULL;
+ for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
/* We need to init sentinel right now as parsing the configuration file
* in sentinel mode will have the effect of populating the sentinel
@@ -3504,8 +3711,16 @@ int main(int argc, char **argv) {
initSentinel();
}
+ /* Check if we need to start in redis-check-rdb/aof mode. We just execute
+ * the program main. However the program is part of the Redis executable
+ * so that we can easily execute an RDB check on loading errors. */
+ if (strstr(argv[0],"redis-check-rdb") != NULL)
+ redis_check_rdb_main(argc,argv,NULL);
+ else if (strstr(argv[0],"redis-check-aof") != NULL)
+ redis_check_aof_main(argc,argv);
+
if (argc >= 2) {
- int j = 1; /* First option to parse in argv[] */
+ j = 1; /* First option to parse in argv[] */
sds options = sdsempty();
char *configfile = NULL;
@@ -3526,8 +3741,16 @@ int main(int argc, char **argv) {
}
/* First argument is the config file name? */
- if (argv[j][0] != '-' || argv[j][1] != '-')
- configfile = argv[j++];
+ if (argv[j][0] != '-' || argv[j][1] != '-') {
+ configfile = argv[j];
+ server.configfile = getAbsolutePath(configfile);
+ /* Replace the config file in server.exec_argv with
+ * its absoulte path. */
+ zfree(server.exec_argv[j]);
+ server.exec_argv[j] = zstrdup(server.configfile);
+ j++;
+ }
+
/* All the other options are parsed and conceptually appended to the
* configuration file. For instance --port 6380 will generate the
* string "port 6380\n" to be parsed after the actual file name
@@ -3535,6 +3758,11 @@ int main(int argc, char **argv) {
while(j != argc) {
if (argv[j][0] == '-' && argv[j][1] == '-') {
/* Option name */
+ if (!strcmp(argv[j], "--check-rdb")) {
+ /* Argument has no options, need to skip for parsing. */
+ j++;
+ continue;
+ }
if (sdslen(options)) options = sdscat(options,"\n");
options = sdscat(options,argv[j]+2);
options = sdscat(options," ");
@@ -3545,48 +3773,74 @@ int main(int argc, char **argv) {
}
j++;
}
- if (configfile) server.configfile = getAbsolutePath(configfile);
+ if (server.sentinel_mode && configfile && *configfile == '-') {
+ serverLog(LL_WARNING,
+ "Sentinel config from STDIN not allowed.");
+ serverLog(LL_WARNING,
+ "Sentinel needs config file on disk to save state. Exiting...");
+ exit(1);
+ }
resetServerSaveParams();
loadServerConfig(configfile,options);
sdsfree(options);
+ }
+
+ serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
+ serverLog(LL_WARNING,
+ "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
+ REDIS_VERSION,
+ (sizeof(long) == 8) ? 64 : 32,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (int)getpid());
+
+ if (argc == 1) {
+ serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
} else {
- redisLog(REDIS_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
+ serverLog(LL_WARNING, "Configuration loaded");
}
- if (server.daemonize) daemonize();
+
+ server.supervised = redisIsSupervised(server.supervised_mode);
+ int background = server.daemonize && !server.supervised;
+ if (background) daemonize();
+
initServer();
- if (server.daemonize) createPidFile();
+ if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
redisAsciiArt();
+ checkTcpBacklogSettings();
if (!server.sentinel_mode) {
/* Things not needed when running in Sentinel mode. */
- redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
+ serverLog(LL_WARNING,"Server initialized");
#ifdef __linux__
- linuxOvercommitMemoryWarning();
+ linuxMemoryWarnings();
#endif
+ moduleLoadFromQueue();
loadDataFromDisk();
if (server.cluster_enabled) {
- if (verifyClusterConfigWithData() == REDIS_ERR) {
- redisLog(REDIS_WARNING,
+ if (verifyClusterConfigWithData() == C_ERR) {
+ serverLog(LL_WARNING,
"You can't have keys in a DB different than DB 0 when in "
"Cluster mode. Exiting.");
exit(1);
}
}
if (server.ipfd_count > 0)
- redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
+ serverLog(LL_NOTICE,"Ready to accept connections");
if (server.sofd > 0)
- redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
+ serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
} else {
sentinelIsRunning();
}
/* Warning the user about suspicious maxmemory setting. */
if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
- redisLog(REDIS_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
+ serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
}
aeSetBeforeSleepProc(server.el,beforeSleep);
+ aeSetAfterSleepProc(server.el,afterSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return 0;
diff --git a/src/server.h b/src/server.h
new file mode 100644
index 000000000..e3b56075a
--- /dev/null
+++ b/src/server.h
@@ -0,0 +1,2022 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_H
+#define __REDIS_H
+
+#include "fmacros.h"
+#include "config.h"
+#include "solarisfixes.h"
+#include "rio.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <lua.h>
+#include <signal.h>
+
+typedef long long mstime_t; /* millisecond time type. */
+
+#include "ae.h" /* Event driven programming library */
+#include "sds.h" /* Dynamic safe strings */
+#include "dict.h" /* Hash tables */
+#include "adlist.h" /* Linked lists */
+#include "zmalloc.h" /* total memory usage aware version of malloc/free */
+#include "anet.h" /* Networking the easy way */
+#include "ziplist.h" /* Compact list data structure */
+#include "intset.h" /* Compact integer set structure */
+#include "version.h" /* Version macro */
+#include "util.h" /* Misc functions useful in many places */
+#include "latency.h" /* Latency monitor API */
+#include "sparkline.h" /* ASCII graphs API */
+#include "quicklist.h" /* Lists are encoded as linked lists of
+ N-elements flat arrays */
+#include "rax.h" /* Radix tree */
+
+/* Following includes allow test functions to be called from Redis main() */
+#include "zipmap.h"
+#include "sha1.h"
+#include "endianconv.h"
+#include "crc64.h"
+
+/* Error codes */
+#define C_OK 0
+#define C_ERR -1
+
+/* Static server configuration */
+#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
+#define CONFIG_MIN_HZ 1
+#define CONFIG_MAX_HZ 500
+#define CONFIG_DEFAULT_SERVER_PORT 6379 /* TCP port */
+#define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog */
+#define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* default client timeout: infinite */
+#define CONFIG_DEFAULT_DBNUM 16
+#define CONFIG_MAX_LINE 1024
+#define CRON_DBS_PER_CALL 16
+#define NET_MAX_WRITES_PER_EVENT (1024*64)
+#define PROTO_SHARED_SELECT_CMDS 10
+#define OBJ_SHARED_INTEGERS 10000
+#define OBJ_SHARED_BULKHDR_LEN 32
+#define LOG_MAX_LEN 1024 /* Default maximum length of syslog messages */
+#define AOF_REWRITE_PERC 100
+#define AOF_REWRITE_MIN_SIZE (64*1024*1024)
+#define AOF_REWRITE_ITEMS_PER_CMD 64
+#define AOF_READ_DIFF_INTERVAL_BYTES (1024*10)
+#define CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN 10000
+#define CONFIG_DEFAULT_SLOWLOG_MAX_LEN 128
+#define CONFIG_DEFAULT_MAX_CLIENTS 10000
+#define CONFIG_AUTHPASS_MAX_LEN 512
+#define CONFIG_DEFAULT_SLAVE_PRIORITY 100
+#define CONFIG_DEFAULT_REPL_TIMEOUT 60
+#define CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD 10
+#define CONFIG_RUN_ID_SIZE 40
+#define RDB_EOF_MARK_SIZE 40
+#define CONFIG_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
+#define CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
+#define CONFIG_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
+#define CONFIG_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
+#define CONFIG_DEFAULT_PID_FILE "/var/run/redis.pid"
+#define CONFIG_DEFAULT_SYSLOG_IDENT "redis"
+#define CONFIG_DEFAULT_CLUSTER_CONFIG_FILE "nodes.conf"
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP NULL /* Auto detect. */
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT 0 /* Use server.port */
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT 0 /* Use +10000 offset. */
+#define CONFIG_DEFAULT_DAEMONIZE 0
+#define CONFIG_DEFAULT_UNIX_SOCKET_PERM 0
+#define CONFIG_DEFAULT_TCP_KEEPALIVE 300
+#define CONFIG_DEFAULT_PROTECTED_MODE 1
+#define CONFIG_DEFAULT_LOGFILE ""
+#define CONFIG_DEFAULT_SYSLOG_ENABLED 0
+#define CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR 1
+#define CONFIG_DEFAULT_RDB_COMPRESSION 1
+#define CONFIG_DEFAULT_RDB_CHECKSUM 1
+#define CONFIG_DEFAULT_RDB_FILENAME "dump.rdb"
+#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0
+#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5
+#define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1
+#define CONFIG_DEFAULT_SLAVE_READ_ONLY 1
+#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP NULL
+#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT 0
+#define CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY 0
+#define CONFIG_DEFAULT_MAXMEMORY 0
+#define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5
+#define CONFIG_DEFAULT_LFU_LOG_FACTOR 10
+#define CONFIG_DEFAULT_LFU_DECAY_TIME 1
+#define CONFIG_DEFAULT_AOF_FILENAME "appendonly.aof"
+#define CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0
+#define CONFIG_DEFAULT_AOF_LOAD_TRUNCATED 1
+#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 0
+#define CONFIG_DEFAULT_ACTIVE_REHASHING 1
+#define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
+#define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0
+#define CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG 10
+#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
+#define NET_PEER_ID_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
+#define CONFIG_BINDADDR_MAX 16
+#define CONFIG_MIN_RESERVED_FDS 32
+#define CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD 0
+#define CONFIG_DEFAULT_SLAVE_LAZY_FLUSH 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
+#define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0
+#define CONFIG_DEFAULT_ACTIVE_DEFRAG 0
+#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */
+#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */
+#define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */
+#define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 25 /* 25% CPU min (at lower threshold) */
+#define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 75 /* 75% CPU max (at upper threshold) */
+
+#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
+#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
+#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
+#define ACTIVE_EXPIRE_CYCLE_SLOW 0
+#define ACTIVE_EXPIRE_CYCLE_FAST 1
+
+/* Instantaneous metrics tracking. */
+#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
+#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
+#define STATS_METRIC_NET_INPUT 1 /* Bytes read to network .*/
+#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
+#define STATS_METRIC_COUNT 3
+
+/* Protocol and I/O related defines */
+#define PROTO_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
+#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
+#define PROTO_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
+#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
+#define PROTO_MBULK_BIG_ARG (1024*32)
+#define LONG_STR_SIZE 21 /* Bytes needed for long -> str + '\0' */
+#define AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
+
+/* When configuring the server eventloop, we setup it so that the total number
+ * of file descriptors we can handle are server.maxclients + RESERVED_FDS +
+ * a few more to stay safe. Since RESERVED_FDS defaults to 32, we add 96
+ * in order to make sure of not over provisioning more than 128 fds. */
+#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96)
+
+/* Hash table parameters */
+#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
+
+/* Command flags. Please check the command table defined in the redis.c file
+ * for more information about the meaning of every flag. */
+#define CMD_WRITE (1<<0) /* "w" flag */
+#define CMD_READONLY (1<<1) /* "r" flag */
+#define CMD_DENYOOM (1<<2) /* "m" flag */
+#define CMD_MODULE (1<<3) /* Command exported by module. */
+#define CMD_ADMIN (1<<4) /* "a" flag */
+#define CMD_PUBSUB (1<<5) /* "p" flag */
+#define CMD_NOSCRIPT (1<<6) /* "s" flag */
+#define CMD_RANDOM (1<<7) /* "R" flag */
+#define CMD_SORT_FOR_SCRIPT (1<<8) /* "S" flag */
+#define CMD_LOADING (1<<9) /* "l" flag */
+#define CMD_STALE (1<<10) /* "t" flag */
+#define CMD_SKIP_MONITOR (1<<11) /* "M" flag */
+#define CMD_ASKING (1<<12) /* "k" flag */
+#define CMD_FAST (1<<13) /* "F" flag */
+#define CMD_MODULE_GETKEYS (1<<14) /* Use the modules getkeys interface. */
+#define CMD_MODULE_NO_CLUSTER (1<<15) /* Deny on Redis Cluster. */
+
+/* AOF states */
+#define AOF_OFF 0 /* AOF is off */
+#define AOF_ON 1 /* AOF is on */
+#define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
+
+/* Client flags */
+#define CLIENT_SLAVE (1<<0) /* This client is a slave server */
+#define CLIENT_MASTER (1<<1) /* This client is a master server */
+#define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
+#define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */
+#define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
+#define CLIENT_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
+#define CLIENT_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
+#define CLIENT_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
+ server.unblocked_clients */
+#define CLIENT_LUA (1<<8) /* This is a non connected client used by Lua */
+#define CLIENT_ASKING (1<<9) /* Client issued the ASKING command */
+#define CLIENT_CLOSE_ASAP (1<<10)/* Close this client ASAP */
+#define CLIENT_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
+#define CLIENT_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
+#define CLIENT_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
+#define CLIENT_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
+#define CLIENT_FORCE_REPL (1<<15) /* Force replication of current cmd. */
+#define CLIENT_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
+#define CLIENT_READONLY (1<<17) /* Cluster client is in read-only state. */
+#define CLIENT_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
+#define CLIENT_PREVENT_AOF_PROP (1<<19) /* Don't propagate to AOF. */
+#define CLIENT_PREVENT_REPL_PROP (1<<20) /* Don't propagate to slaves. */
+#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP|CLIENT_PREVENT_REPL_PROP)
+#define CLIENT_PENDING_WRITE (1<<21) /* Client has output to send but a write
+ handler is yet not installed. */
+#define CLIENT_REPLY_OFF (1<<22) /* Don't send replies to client. */
+#define CLIENT_REPLY_SKIP_NEXT (1<<23) /* Set CLIENT_REPLY_SKIP for next cmd */
+#define CLIENT_REPLY_SKIP (1<<24) /* Don't send just this reply. */
+#define CLIENT_LUA_DEBUG (1<<25) /* Run EVAL in debug mode. */
+#define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */
+#define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */
+
+/* Client block type (btype field in client structure)
+ * if CLIENT_BLOCKED flag is set. */
+#define BLOCKED_NONE 0 /* Not blocked, no CLIENT_BLOCKED flag set. */
+#define BLOCKED_LIST 1 /* BLPOP & co. */
+#define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */
+#define BLOCKED_MODULE 3 /* Blocked by a loadable module. */
+
+/* Client request types */
+#define PROTO_REQ_INLINE 1
+#define PROTO_REQ_MULTIBULK 2
+
+/* Client classes for client limits, currently used only for
+ * the max-client-output-buffer limit implementation. */
+#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
+#define CLIENT_TYPE_SLAVE 1 /* Slaves. */
+#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
+#define CLIENT_TYPE_MASTER 3 /* Master. */
+#define CLIENT_TYPE_OBUF_COUNT 3 /* Number of clients to expose to output
+ buffer configuration. Just the first
+ three: normal, slave, pubsub. */
+
+/* Slave replication state. Used in server.repl_state for slaves to remember
+ * what to do next. */
+#define REPL_STATE_NONE 0 /* No active replication */
+#define REPL_STATE_CONNECT 1 /* Must connect to master */
+#define REPL_STATE_CONNECTING 2 /* Connecting to master */
+/* --- Handshake states, must be ordered --- */
+#define REPL_STATE_RECEIVE_PONG 3 /* Wait for PING reply */
+#define REPL_STATE_SEND_AUTH 4 /* Send AUTH to master */
+#define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */
+#define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */
+#define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */
+#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */
+#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */
+#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */
+/* --- End of handshake states --- */
+#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */
+#define REPL_STATE_CONNECTED 15 /* Connected to master */
+
+/* State of slaves from the POV of the master. Used in client->replstate.
+ * In SEND_BULK and ONLINE state the slave receives new updates
+ * in its output queue. In the WAIT_BGSAVE states instead the server is waiting
+ * to start the next background saving in order to send updates to it. */
+#define SLAVE_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
+#define SLAVE_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
+#define SLAVE_STATE_SEND_BULK 8 /* Sending RDB file to slave. */
+#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
+
+/* Slave capabilities. */
+#define SLAVE_CAPA_NONE 0
+#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
+#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
+
+/* Synchronous read timeout - slave side */
+#define CONFIG_REPL_SYNCIO_TIMEOUT 5
+
+/* List related stuff */
+#define LIST_HEAD 0
+#define LIST_TAIL 1
+
+/* Sort operations */
+#define SORT_OP_GET 0
+
+/* Log levels */
+#define LL_DEBUG 0
+#define LL_VERBOSE 1
+#define LL_NOTICE 2
+#define LL_WARNING 3
+#define LL_RAW (1<<10) /* Modifier to log without timestamp */
+#define CONFIG_DEFAULT_VERBOSITY LL_NOTICE
+
+/* Supervision options */
+#define SUPERVISED_NONE 0
+#define SUPERVISED_AUTODETECT 1
+#define SUPERVISED_SYSTEMD 2
+#define SUPERVISED_UPSTART 3
+
+/* Anti-warning macro... */
+#define UNUSED(V) ((void) V)
+
+#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
+#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
+
+/* Append only defines */
+#define AOF_FSYNC_NO 0
+#define AOF_FSYNC_ALWAYS 1
+#define AOF_FSYNC_EVERYSEC 2
+#define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
+
+/* Zip structure related defaults */
+#define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512
+#define OBJ_HASH_MAX_ZIPLIST_VALUE 64
+#define OBJ_SET_MAX_INTSET_ENTRIES 512
+#define OBJ_ZSET_MAX_ZIPLIST_ENTRIES 128
+#define OBJ_ZSET_MAX_ZIPLIST_VALUE 64
+
+/* List defaults */
+#define OBJ_LIST_MAX_ZIPLIST_SIZE -2
+#define OBJ_LIST_COMPRESS_DEPTH 0
+
+/* HyperLogLog defines */
+#define CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
+
+/* Sets operations codes */
+#define SET_OP_UNION 0
+#define SET_OP_DIFF 1
+#define SET_OP_INTER 2
+
+/* Redis maxmemory strategies. Instead of using just incremental number
+ * for this defines, we use a set of flags so that testing for certain
+ * properties common to multiple policies is faster. */
+#define MAXMEMORY_FLAG_LRU (1<<0)
+#define MAXMEMORY_FLAG_LFU (1<<1)
+#define MAXMEMORY_FLAG_ALLKEYS (1<<2)
+#define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \
+ (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)
+
+#define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU)
+#define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU)
+#define MAXMEMORY_VOLATILE_TTL (2<<8)
+#define MAXMEMORY_VOLATILE_RANDOM (3<<8)
+#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_RANDOM ((6<<8)|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_NO_EVICTION (7<<8)
+
+#define CONFIG_DEFAULT_MAXMEMORY_POLICY MAXMEMORY_NO_EVICTION
+
+/* Scripting */
+#define LUA_SCRIPT_TIME_LIMIT 5000 /* milliseconds */
+
+/* Units */
+#define UNIT_SECONDS 0
+#define UNIT_MILLISECONDS 1
+
+/* SHUTDOWN flags */
+#define SHUTDOWN_NOFLAGS 0 /* No flags. */
+#define SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
+ points are configured. */
+#define SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
+
+/* Command call flags, see call() function */
+#define CMD_CALL_NONE 0
+#define CMD_CALL_SLOWLOG (1<<0)
+#define CMD_CALL_STATS (1<<1)
+#define CMD_CALL_PROPAGATE_AOF (1<<2)
+#define CMD_CALL_PROPAGATE_REPL (1<<3)
+#define CMD_CALL_PROPAGATE (CMD_CALL_PROPAGATE_AOF|CMD_CALL_PROPAGATE_REPL)
+#define CMD_CALL_FULL (CMD_CALL_SLOWLOG | CMD_CALL_STATS | CMD_CALL_PROPAGATE)
+
+/* Command propagation flags, see propagate() function */
+#define PROPAGATE_NONE 0
+#define PROPAGATE_AOF 1
+#define PROPAGATE_REPL 2
+
+/* RDB active child save type. */
+#define RDB_CHILD_TYPE_NONE 0
+#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
+#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
+
+/* Keyspace changes notification classes. Every class is associated with a
+ * character for configuration purposes. */
+#define NOTIFY_KEYSPACE (1<<0) /* K */
+#define NOTIFY_KEYEVENT (1<<1) /* E */
+#define NOTIFY_GENERIC (1<<2) /* g */
+#define NOTIFY_STRING (1<<3) /* $ */
+#define NOTIFY_LIST (1<<4) /* l */
+#define NOTIFY_SET (1<<5) /* s */
+#define NOTIFY_HASH (1<<6) /* h */
+#define NOTIFY_ZSET (1<<7) /* z */
+#define NOTIFY_EXPIRED (1<<8) /* x */
+#define NOTIFY_EVICTED (1<<9) /* e */
+#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED) /* A */
+
+/* Get the first bind addr or NULL */
+#define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL)
+
+/* Using the following macro you can run code inside serverCron() with the
+ * specified period, specified in milliseconds.
+ * The actual resolution depends on server.hz. */
+#define run_with_period(_ms_) if ((_ms_ <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
+
+/* We can print the stacktrace, so our assert is defined this way: */
+#define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
+#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)
+
+/*-----------------------------------------------------------------------------
+ * Data types
+ *----------------------------------------------------------------------------*/
+
+/* A redis object, that is a type able to hold a string / list / set */
+
+/* The actual Redis Object */
+#define OBJ_STRING 0
+#define OBJ_LIST 1
+#define OBJ_SET 2
+#define OBJ_ZSET 3
+#define OBJ_HASH 4
+
+/* The "module" object type is a special one that signals that the object
+ * is one directly managed by a Redis module. In this case the value points
+ * to a moduleValue struct, which contains the object value (which is only
+ * handled by the module itself) and the RedisModuleType struct which lists
+ * function pointers in order to serialize, deserialize, AOF-rewrite and
+ * free the object.
+ *
+ * Inside the RDB file, module types are encoded as OBJ_MODULE followed
+ * by a 64 bit module type ID, which has a 54 bits module-specific signature
+ * in order to dispatch the loading to the right module, plus a 10 bits
+ * encoding version. */
+#define OBJ_MODULE 5
+
+/* Extract encver / signature from a module type ID. */
+#define REDISMODULE_TYPE_ENCVER_BITS 10
+#define REDISMODULE_TYPE_ENCVER_MASK ((1<<REDISMODULE_TYPE_ENCVER_BITS)-1)
+#define REDISMODULE_TYPE_ENCVER(id) (id & REDISMODULE_TYPE_ENCVER_MASK)
+#define REDISMODULE_TYPE_SIGN(id) ((id & ~((uint64_t)REDISMODULE_TYPE_ENCVER_MASK)) >>REDISMODULE_TYPE_ENCVER_BITS)
+
+struct RedisModule;
+struct RedisModuleIO;
+struct RedisModuleDigest;
+struct RedisModuleCtx;
+struct redisObject;
+
+/* Each module type implementation should export a set of methods in order
+ * to serialize and deserialize the value in the RDB file, rewrite the AOF
+ * log, create the digest for "DEBUG DIGEST", and free the value when a key
+ * is deleted. */
+typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver);
+typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value);
+typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value);
+typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value);
+typedef size_t (*moduleTypeMemUsageFunc)(const void *value);
+typedef void (*moduleTypeFreeFunc)(void *value);
+
+/* The module type, which is referenced in each value of a given type, defines
+ * the methods and links to the module exporting the type. */
+typedef struct RedisModuleType {
+ uint64_t id; /* Higher 54 bits of type ID + 10 lower bits of encoding ver. */
+ struct RedisModule *module;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ char name[10]; /* 9 bytes name + null term. Charset: A-Z a-z 0-9 _- */
+} moduleType;
+
+/* In Redis objects 'robj' structures of type OBJ_MODULE, the value pointer
+ * is set to the following structure, referencing the moduleType structure
+ * in order to work with the value, and at the same time providing a raw
+ * pointer to the value, as created by the module commands operating with
+ * the module type.
+ *
+ * So for example in order to free such a value, it is possible to use
+ * the following code:
+ *
+ * if (robj->type == OBJ_MODULE) {
+ * moduleValue *mt = robj->ptr;
+ * mt->type->free(mt->value);
+ * zfree(mt); // We need to release this in-the-middle struct as well.
+ * }
+ */
+typedef struct moduleValue {
+ moduleType *type;
+ void *value;
+} moduleValue;
+
+/* This is a wrapper for the 'rio' streams used inside rdb.c in Redis, so that
+ * the user does not have to take the total count of the written bytes nor
+ * to care about error conditions. */
+typedef struct RedisModuleIO {
+ size_t bytes; /* Bytes read / written so far. */
+ rio *rio; /* Rio stream. */
+ moduleType *type; /* Module type doing the operation. */
+ int error; /* True if error condition happened. */
+ int ver; /* Module serialization version: 1 (old),
+ * 2 (current version with opcodes annotation). */
+ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/
+} RedisModuleIO;
+
+/* Macro to initialize an IO context. Note that the 'ver' field is populated
+ * inside rdb.c according to the version of the value to load. */
+#define moduleInitIOContext(iovar,mtype,rioptr) do { \
+ iovar.rio = rioptr; \
+ iovar.type = mtype; \
+ iovar.bytes = 0; \
+ iovar.error = 0; \
+ iovar.ver = 0; \
+ iovar.ctx = NULL; \
+} while(0);
+
+/* This is a structure used to export DEBUG DIGEST capabilities to Redis
+ * modules. We want to capture both the ordered and unordered elements of
+ * a data structure, so that a digest can be created in a way that correctly
+ * reflects the values. See the DEBUG DIGEST command implementation for more
+ * background. */
+typedef struct RedisModuleDigest {
+ unsigned char o[20]; /* Ordered elements. */
+ unsigned char x[20]; /* Xored elements. */
+} RedisModuleDigest;
+
+/* Just start with a digest composed of all zero bytes. */
+#define moduleInitDigestContext(mdvar) do { \
+ memset(mdvar.o,0,sizeof(mdvar.o)); \
+ memset(mdvar.x,0,sizeof(mdvar.x)); \
+} while(0);
+
+/* Objects encoding. Some kind of objects like Strings and Hashes can be
+ * internally represented in multiple ways. The 'encoding' field of the object
+ * is set to one of this fields for this object. */
+#define OBJ_ENCODING_RAW 0 /* Raw representation */
+#define OBJ_ENCODING_INT 1 /* Encoded as integer */
+#define OBJ_ENCODING_HT 2 /* Encoded as hash table */
+#define OBJ_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
+#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
+#define OBJ_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
+#define OBJ_ENCODING_INTSET 6 /* Encoded as intset */
+#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
+#define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
+#define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
+
+#define LRU_BITS 24
+#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
+#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
+
+#define OBJ_SHARED_REFCOUNT INT_MAX
+typedef struct redisObject {
+ unsigned type:4;
+ unsigned encoding:4;
+ unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
+ * LFU data (least significant 8 bits frequency
+ * and most significant 16 bits decreas time). */
+ int refcount;
+ void *ptr;
+} robj;
+
+/* Macro used to initialize a Redis object allocated on the stack.
+ * Note that this macro is taken near the structure definition to make sure
+ * we'll update it when the structure is changed, to avoid bugs like
+ * bug #85 introduced exactly in this way. */
+#define initStaticStringObject(_var,_ptr) do { \
+ _var.refcount = 1; \
+ _var.type = OBJ_STRING; \
+ _var.encoding = OBJ_ENCODING_RAW; \
+ _var.ptr = _ptr; \
+} while(0)
+
+struct evictionPoolEntry; /* Defined in evict.c */
+
+/* Redis database representation. There are multiple databases identified
+ * by integers from 0 (the default database) up to the max configured
+ * database. The database number is the 'id' field in the structure. */
+typedef struct redisDb {
+ dict *dict; /* The keyspace for this DB */
+ dict *expires; /* Timeout of keys with a timeout set */
+ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
+ dict *ready_keys; /* Blocked keys that received a PUSH */
+ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
+ int id; /* Database ID */
+ long long avg_ttl; /* Average TTL, just for stats */
+} redisDb;
+
+/* Client MULTI/EXEC state */
+typedef struct multiCmd {
+ robj **argv;
+ int argc;
+ struct redisCommand *cmd;
+} multiCmd;
+
+typedef struct multiState {
+ multiCmd *commands; /* Array of MULTI commands */
+ int count; /* Total number of MULTI commands */
+ int minreplicas; /* MINREPLICAS for synchronous replication */
+ time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
+} multiState;
+
+/* This structure holds the blocking operation state for a client.
+ * The fields used depend on client->btype. */
+typedef struct blockingState {
+ /* Generic fields. */
+ mstime_t timeout; /* Blocking operation timeout. If UNIX current time
+ * is > timeout then the operation timed out. */
+
+ /* BLOCKED_LIST */
+ dict *keys; /* The keys we are waiting to terminate a blocking
+ * operation such as BLPOP. Otherwise NULL. */
+ robj *target; /* The key that should receive the element,
+ * for BRPOPLPUSH. */
+
+ /* BLOCKED_WAIT */
+ int numreplicas; /* Number of replicas we are waiting for ACK. */
+ long long reploffset; /* Replication offset to reach. */
+
+ /* BLOCKED_MODULE */
+ void *module_blocked_handle; /* RedisModuleBlockedClient structure.
+ which is opaque for the Redis core, only
+ handled in module.c. */
+} blockingState;
+
+/* The following structure represents a node in the server.ready_keys list,
+ * where we accumulate all the keys that had clients blocked with a blocking
+ * operation such as B[LR]POP, but received new data in the context of the
+ * last executed command.
+ *
+ * After the execution of every command or script, we run this list to check
+ * if as a result we should serve data to clients blocked, unblocking them.
+ * Note that server.ready_keys will not have duplicates as there dictionary
+ * also called ready_keys in every structure representing a Redis database,
+ * where we make sure to remember if a given key was already added in the
+ * server.ready_keys list. */
+typedef struct readyList {
+ redisDb *db;
+ robj *key;
+} readyList;
+
+/* With multiplexing we need to take per-client state.
+ * Clients are taken in a linked list. */
+typedef struct client {
+ uint64_t id; /* Client incremental unique ID. */
+ int fd; /* Client socket. */
+ redisDb *db; /* Pointer to currently SELECTed DB. */
+ robj *name; /* As set by CLIENT SETNAME. */
+ sds querybuf; /* Buffer we use to accumulate client queries. */
+ sds pending_querybuf; /* If this is a master, this buffer represents the
+ yet not applied replication stream that we
+ are receiving from the master. */
+ size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
+ int argc; /* Num of arguments of current command. */
+ robj **argv; /* Arguments of current command. */
+ struct redisCommand *cmd, *lastcmd; /* Last command executed. */
+ int reqtype; /* Request protocol type: PROTO_REQ_* */
+ int multibulklen; /* Number of multi bulk arguments left to read. */
+ long bulklen; /* Length of bulk argument in multi bulk request. */
+ list *reply; /* List of reply objects to send to the client. */
+ unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
+ size_t sentlen; /* Amount of bytes already sent in the current
+ buffer or object being sent. */
+ time_t ctime; /* Client creation time. */
+ time_t lastinteraction; /* Time of the last interaction, used for timeout */
+ time_t obuf_soft_limit_reached_time;
+ int flags; /* Client flags: CLIENT_* macros. */
+ int authenticated; /* When requirepass is non-NULL. */
+ int replstate; /* Replication state if this is a slave. */
+ int repl_put_online_on_ack; /* Install slave write handler on ACK. */
+ int repldbfd; /* Replication DB file descriptor. */
+ off_t repldboff; /* Replication DB file offset. */
+ off_t repldbsize; /* Replication DB file size. */
+ sds replpreamble; /* Replication DB preamble. */
+ long long read_reploff; /* Read replication offset if this is a master. */
+ long long reploff; /* Applied replication offset if this is a master. */
+ long long repl_ack_off; /* Replication ack offset, if this is a slave. */
+ long long repl_ack_time;/* Replication ack time, if this is a slave. */
+ long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
+ copying this slave output buffer
+ should use. */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */
+ int slave_listening_port; /* As configured with: SLAVECONF listening-port */
+ char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */
+ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
+ multiState mstate; /* MULTI/EXEC state */
+ int btype; /* Type of blocking op if CLIENT_BLOCKED. */
+ blockingState bpop; /* blocking state */
+ long long woff; /* Last write global replication offset. */
+ list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
+ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
+ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
+ sds peerid; /* Cached peer ID. */
+
+ /* Response buffer */
+ int bufpos;
+ char buf[PROTO_REPLY_CHUNK_BYTES];
+} client;
+
+struct saveparam {
+ time_t seconds;
+ int changes;
+};
+
+struct moduleLoadQueueEntry {
+ sds path;
+ int argc;
+ robj **argv;
+};
+
+struct sharedObjectsStruct {
+ robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
+ *colon, *nullbulk, *nullmultibulk, *queued,
+ *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
+ *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
+ *masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
+ *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
+ *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
+ *rpop, *lpop, *lpush, *emptyscan,
+ *select[PROTO_SHARED_SELECT_CMDS],
+ *integers[OBJ_SHARED_INTEGERS],
+ *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
+ *bulkhdr[OBJ_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
+ sds minstring, maxstring;
+};
+
+/* ZSETs use a specialized version of Skiplists */
+typedef struct zskiplistNode {
+ sds ele;
+ double score;
+ struct zskiplistNode *backward;
+ struct zskiplistLevel {
+ struct zskiplistNode *forward;
+ unsigned int span;
+ } level[];
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header, *tail;
+ unsigned long length;
+ int level;
+} zskiplist;
+
+typedef struct zset {
+ dict *dict;
+ zskiplist *zsl;
+} zset;
+
+typedef struct clientBufferLimitsConfig {
+ unsigned long long hard_limit_bytes;
+ unsigned long long soft_limit_bytes;
+ time_t soft_limit_seconds;
+} clientBufferLimitsConfig;
+
+extern clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT];
+
+/* The redisOp structure defines a Redis Operation, that is an instance of
+ * a command with an argument vector, database ID, propagation target
+ * (PROPAGATE_*), and command pointer.
+ *
+ * Currently only used to additionally propagate more commands to AOF/Replication
+ * after the propagation of the executed command. */
+typedef struct redisOp {
+ robj **argv;
+ int argc, dbid, target;
+ struct redisCommand *cmd;
+} redisOp;
+
+/* Defines an array of Redis operations. There is an API to add to this
+ * structure in a easy way.
+ *
+ * redisOpArrayInit();
+ * redisOpArrayAppend();
+ * redisOpArrayFree();
+ */
+typedef struct redisOpArray {
+ redisOp *ops;
+ int numops;
+} redisOpArray;
+
+/* This structure is returned by the getMemoryOverheadData() function in
+ * order to return memory overhead information. */
+struct redisMemOverhead {
+ size_t peak_allocated;
+ size_t total_allocated;
+ size_t startup_allocated;
+ size_t repl_backlog;
+ size_t clients_slaves;
+ size_t clients_normal;
+ size_t aof_buffer;
+ size_t overhead_total;
+ size_t dataset;
+ size_t total_keys;
+ size_t bytes_per_key;
+ float dataset_perc;
+ float peak_perc;
+ float fragmentation;
+ size_t num_dbs;
+ struct {
+ size_t dbid;
+ size_t overhead_ht_main;
+ size_t overhead_ht_expires;
+ } *db;
+};
+
+/* This structure can be optionally passed to RDB save/load functions in
+ * order to implement additional functionalities, by storing and loading
+ * metadata to the RDB file.
+ *
+ * Currently the only use is to select a DB at load time, useful in
+ * replication in order to make sure that chained slaves (slaves of slaves)
+ * select the correct DB and are able to accept the stream coming from the
+ * top-level master. */
+typedef struct rdbSaveInfo {
+ /* Used saving and loading. */
+ int repl_stream_db; /* DB to select in server.master client. */
+
+ /* Used only loading. */
+ int repl_id_is_set; /* True if repl_id field is set. */
+ char repl_id[CONFIG_RUN_ID_SIZE+1]; /* Replication ID. */
+ long long repl_offset; /* Replication offset. */
+} rdbSaveInfo;
+
+#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1}
+
+/*-----------------------------------------------------------------------------
+ * Global server state
+ *----------------------------------------------------------------------------*/
+
+struct clusterState;
+
+/* AIX defines hz to __hz, we don't use this define and in order to allow
+ * Redis build on AIX we need to undef it. */
+#ifdef _AIX
+#undef hz
+#endif
+
+#define CHILD_INFO_MAGIC 0xC17DDA7A12345678LL
+#define CHILD_INFO_TYPE_RDB 0
+#define CHILD_INFO_TYPE_AOF 1
+
+struct redisServer {
+ /* General */
+ pid_t pid; /* Main process pid. */
+ char *configfile; /* Absolute config file path, or NULL */
+ char *executable; /* Absolute executable file path. */
+ char **exec_argv; /* Executable argv vector (copy). */
+ int hz; /* serverCron() calls frequency in hertz */
+ redisDb *db;
+ dict *commands; /* Command table */
+ dict *orig_commands; /* Command table before command renaming. */
+ aeEventLoop *el;
+ unsigned int lruclock; /* Clock for LRU eviction */
+ int shutdown_asap; /* SHUTDOWN needed ASAP */
+ int activerehashing; /* Incremental rehash in serverCron() */
+ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */
+ char *requirepass; /* Pass for AUTH command, or NULL */
+ char *pidfile; /* PID file path */
+ int arch_bits; /* 32 or 64 depending on sizeof(long) */
+ int cronloops; /* Number of times the cron function run */
+ char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */
+ int sentinel_mode; /* True if this instance is a Sentinel. */
+ size_t initial_memory_usage; /* Bytes used after initialization. */
+ int always_show_logo; /* Show logo even for non-stdout logging. */
+ /* Modules */
+ dict *moduleapi; /* Exported APIs dictionary for modules. */
+ list *loadmodule_queue; /* List of modules to load at startup. */
+ int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a
+ client blocked on a module command needs
+ to be processed. */
+ /* Networking */
+ int port; /* TCP listening port */
+ int tcp_backlog; /* TCP listen() backlog */
+ char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
+ int bindaddr_count; /* Number of addresses in server.bindaddr[] */
+ char *unixsocket; /* UNIX socket path */
+ mode_t unixsocketperm; /* UNIX socket permission */
+ int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
+ int ipfd_count; /* Used slots in ipfd[] */
+ int sofd; /* Unix socket file descriptor */
+ int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
+ int cfd_count; /* Used slots in cfd[] */
+ list *clients; /* List of active clients */
+ list *clients_to_close; /* Clients to close asynchronously */
+ list *clients_pending_write; /* There is to write or install handler. */
+ list *slaves, *monitors; /* List of slaves and MONITORs */
+ client *current_client; /* Current client, only used on crash report */
+ int clients_paused; /* True if clients are currently paused */
+ mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
+ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
+ dict *migrate_cached_sockets;/* MIGRATE cached sockets */
+ uint64_t next_client_id; /* Next client unique ID. Incremental. */
+ int protected_mode; /* Don't accept external connections. */
+ /* RDB / AOF loading information */
+ int loading; /* We are loading data from disk if true */
+ off_t loading_total_bytes;
+ off_t loading_loaded_bytes;
+ time_t loading_start_time;
+ off_t loading_process_events_interval_bytes;
+ /* Fast pointers to often looked up command */
+ struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
+ *rpopCommand, *sremCommand, *execCommand, *expireCommand,
+ *pexpireCommand;
+ /* Fields used only for stats */
+ time_t stat_starttime; /* Server start time */
+ long long stat_numcommands; /* Number of processed commands */
+ long long stat_numconnections; /* Number of connections received */
+ long long stat_expiredkeys; /* Number of expired keys */
+ long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
+ long long stat_keyspace_hits; /* Number of successful lookups of keys */
+ long long stat_keyspace_misses; /* Number of failed lookups of keys */
+ long long stat_active_defrag_hits; /* number of allocations moved */
+ long long stat_active_defrag_misses; /* number of allocations scanned but not moved */
+ long long stat_active_defrag_key_hits; /* number of keys with moved allocations */
+ long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */
+ size_t stat_peak_memory; /* Max used memory record */
+ long long stat_fork_time; /* Time needed to perform latest fork() */
+ double stat_fork_rate; /* Fork rate in GB/sec. */
+ long long stat_rejected_conn; /* Clients rejected because of maxclients */
+ long long stat_sync_full; /* Number of full resyncs with slaves. */
+ long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
+ long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
+ list *slowlog; /* SLOWLOG list of commands */
+ long long slowlog_entry_id; /* SLOWLOG current entry ID */
+ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
+ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
+ size_t resident_set_size; /* RSS sampled in serverCron(). */
+ long long stat_net_input_bytes; /* Bytes read from network. */
+ long long stat_net_output_bytes; /* Bytes written to network. */
+ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
+ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
+ /* The following two are used to track instantaneous metrics, like
+ * number of operations per second, network traffic. */
+ struct {
+ long long last_sample_time; /* Timestamp of last sample in ms */
+ long long last_sample_count;/* Count in last sample */
+ long long samples[STATS_METRIC_SAMPLES];
+ int idx;
+ } inst_metric[STATS_METRIC_COUNT];
+ /* Configuration */
+ int verbosity; /* Loglevel in redis.conf */
+ int maxidletime; /* Client timeout in seconds */
+ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
+ int active_expire_enabled; /* Can be disabled for testing purposes. */
+ int active_defrag_enabled;
+ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
+ int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
+ int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
+ int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */
+ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */
+ size_t client_max_querybuf_len; /* Limit for client query buffer length */
+ int dbnum; /* Total number of configured DBs */
+ int supervised; /* 1 if supervised, 0 otherwise. */
+ int supervised_mode; /* See SUPERVISED_* */
+ int daemonize; /* True if running as a daemon */
+ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
+ /* AOF persistence */
+ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
+ int aof_fsync; /* Kind of fsync() policy */
+ char *aof_filename; /* Name of the AOF file */
+ int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
+ int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
+ off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
+ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
+ off_t aof_current_size; /* AOF current size. */
+ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
+ pid_t aof_child_pid; /* PID if rewriting process */
+ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
+ sds aof_buf; /* AOF buffer, written before entering the event loop */
+ int aof_fd; /* File descriptor of currently selected AOF file */
+ int aof_selected_db; /* Currently selected DB in AOF */
+ time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
+ time_t aof_last_fsync; /* UNIX time of last fsync() */
+ time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
+ time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
+ int aof_lastbgrewrite_status; /* C_OK or C_ERR */
+ unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
+ int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
+ int aof_last_write_status; /* C_OK or C_ERR */
+ int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
+ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
+ int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */
+ /* AOF pipes used to communicate between parent and child during rewrite. */
+ int aof_pipe_write_data_to_child;
+ int aof_pipe_read_data_from_parent;
+ int aof_pipe_write_ack_to_parent;
+ int aof_pipe_read_ack_from_child;
+ int aof_pipe_write_ack_to_child;
+ int aof_pipe_read_ack_from_parent;
+ int aof_stop_sending_diff; /* If true stop sending accumulated diffs
+ to child process. */
+ sds aof_child_diff; /* AOF diff accumulator child side. */
+ /* RDB persistence */
+ long long dirty; /* Changes to DB from the last save */
+ long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
+ pid_t rdb_child_pid; /* PID of RDB saving child */
+ struct saveparam *saveparams; /* Save points array for RDB */
+ int saveparamslen; /* Number of saving points */
+ char *rdb_filename; /* Name of RDB file */
+ int rdb_compression; /* Use compression in RDB? */
+ int rdb_checksum; /* Use RDB checksum? */
+ time_t lastsave; /* Unix time of last successful save */
+ time_t lastbgsave_try; /* Unix time of last attempted bgsave */
+ time_t rdb_save_time_last; /* Time used by last RDB save run. */
+ time_t rdb_save_time_start; /* Current RDB save start time. */
+ int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
+ int rdb_child_type; /* Type of save by active child. */
+ int lastbgsave_status; /* C_OK or C_ERR */
+ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
+ int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
+ int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
+ /* Pipe and data structures for child -> parent info sharing. */
+ int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
+ struct {
+ int process_type; /* AOF or RDB child? */
+ size_t cow_size; /* Copy on write size. */
+ unsigned long long magic; /* Magic value to make sure data is valid. */
+ } child_info_data;
+ /* Propagation of commands in AOF / replication */
+ redisOpArray also_propagate; /* Additional command to propagate. */
+ /* Logging */
+ char *logfile; /* Path of log file */
+ int syslog_enabled; /* Is syslog enabled? */
+ char *syslog_ident; /* Syslog ident */
+ int syslog_facility; /* Syslog facility */
+ /* Replication (master) */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
+ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
+ long long master_repl_offset; /* My current replication offset */
+ long long second_replid_offset; /* Accept offsets up to this for replid2. */
+ int slaveseldb; /* Last SELECTed DB in replication output */
+ int repl_ping_slave_period; /* Master pings the slave every N seconds */
+ char *repl_backlog; /* Replication backlog for partial syncs */
+ long long repl_backlog_size; /* Backlog circular buffer size */
+ long long repl_backlog_histlen; /* Backlog actual data length */
+ long long repl_backlog_idx; /* Backlog circular buffer current offset,
+ that is the next byte will'll write to.*/
+ long long repl_backlog_off; /* Replication "master offset" of first
+ byte in the replication backlog buffer.*/
+ time_t repl_backlog_time_limit; /* Time without slaves after the backlog
+ gets released. */
+ time_t repl_no_slaves_since; /* We have no slaves since that time.
+ Only valid if server.slaves len is 0. */
+ int repl_min_slaves_to_write; /* Min number of slaves to write. */
+ int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
+ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
+ int repl_diskless_sync; /* Send RDB to slaves sockets directly. */
+ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
+ /* Replication (slave) */
+ char *masterauth; /* AUTH with this password with master */
+ char *masterhost; /* Hostname of master */
+ int masterport; /* Port of master */
+ int repl_timeout; /* Timeout after N seconds of master idle */
+ client *master; /* Client that is master for this slave */
+ client *cached_master; /* Cached master to be reused for PSYNC. */
+ int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
+ int repl_state; /* Replication status if the instance is a slave */
+ off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
+ off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
+ off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
+ int repl_transfer_s; /* Slave -> Master SYNC socket */
+ int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
+ char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
+ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
+ int repl_serve_stale_data; /* Serve stale data when link is down? */
+ int repl_slave_ro; /* Slave is read only? */
+ time_t repl_down_since; /* Unix time at which link with master went down */
+ int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
+ int slave_priority; /* Reported in INFO and used by Sentinel. */
+ int slave_announce_port; /* Give the master this listening port. */
+ char *slave_announce_ip; /* Give the master this ip address. */
+ /* The following two fields is where we store master PSYNC replid/offset
+ * while the PSYNC is in progress. At the end we'll copy the fields into
+ * the server->master client structure. */
+ char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */
+ long long master_initial_offset; /* Master PSYNC offset. */
+ int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
+ /* Replication script cache. */
+ dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
+ list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
+ unsigned int repl_scriptcache_size; /* Max number of elements. */
+ /* Synchronous replication. */
+ list *clients_waiting_acks; /* Clients waiting in WAIT command. */
+ int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
+ /* Limits */
+ unsigned int maxclients; /* Max number of simultaneous clients */
+ unsigned long long maxmemory; /* Max number of memory bytes to use */
+ int maxmemory_policy; /* Policy for key eviction */
+ int maxmemory_samples; /* Pricision of random sampling */
+ unsigned int lfu_log_factor; /* LFU logarithmic counter factor. */
+ unsigned int lfu_decay_time; /* LFU counter decay factor. */
+ /* Blocked clients */
+ unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */
+ list *unblocked_clients; /* list of clients to unblock before next loop */
+ list *ready_keys; /* List of readyList structures for BLPOP & co */
+ /* Sort parameters - qsort_r() is only available under BSD so we
+ * have to take this state global, in order to pass it to sortCompare() */
+ int sort_desc;
+ int sort_alpha;
+ int sort_bypattern;
+ int sort_store;
+ /* Zip structure config, see redis.conf for more information */
+ size_t hash_max_ziplist_entries;
+ size_t hash_max_ziplist_value;
+ size_t set_max_intset_entries;
+ size_t zset_max_ziplist_entries;
+ size_t zset_max_ziplist_value;
+ size_t hll_sparse_max_bytes;
+ /* List parameters */
+ int list_max_ziplist_size;
+ int list_compress_depth;
+ /* time cache */
+ time_t unixtime; /* Unix time sampled every cron cycle. */
+ long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
+ /* Pubsub */
+ dict *pubsub_channels; /* Map channels to list of subscribed clients */
+ list *pubsub_patterns; /* A list of pubsub_patterns */
+ int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
+ xor of NOTIFY_... flags. */
+ /* Cluster */
+ int cluster_enabled; /* Is cluster enabled? */
+ mstime_t cluster_node_timeout; /* Cluster node timeout. */
+ char *cluster_configfile; /* Cluster auto-generated config file name. */
+ struct clusterState *cluster; /* State of the cluster */
+ int cluster_migration_barrier; /* Cluster replicas migration barrier. */
+ int cluster_slave_validity_factor; /* Slave max data age for failover. */
+ int cluster_require_full_coverage; /* If true, put the cluster down if
+ there is at least an uncovered slot.*/
+ char *cluster_announce_ip; /* IP address to announce on cluster bus. */
+ int cluster_announce_port; /* base port to announce on cluster bus. */
+ int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
+ /* Scripting */
+ lua_State *lua; /* The Lua interpreter. We use just one for all clients */
+ client *lua_client; /* The "fake client" to query Redis from Lua */
+ client *lua_caller; /* The client running EVAL right now, or NULL */
+ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
+ mstime_t lua_time_limit; /* Script timeout in milliseconds */
+ mstime_t lua_time_start; /* Start time of script, milliseconds time */
+ int lua_write_dirty; /* True if a write command was called during the
+ execution of the current script. */
+ int lua_random_dirty; /* True if a random command was called during the
+ execution of the current script. */
+ int lua_replicate_commands; /* True if we are doing single commands repl. */
+ int lua_multi_emitted;/* True if we already proagated MULTI. */
+ int lua_repl; /* Script replication flags for redis.set_repl(). */
+ int lua_timedout; /* True if we reached the time limit for script
+ execution. */
+ int lua_kill; /* Kill the script if true. */
+ int lua_always_replicate_commands; /* Default replication type. */
+ /* Lazy free */
+ int lazyfree_lazy_eviction;
+ int lazyfree_lazy_expire;
+ int lazyfree_lazy_server_del;
+ /* Latency monitor */
+ long long latency_monitor_threshold;
+ dict *latency_events;
+ /* Assert & bug reporting */
+ const char *assert_failed;
+ const char *assert_file;
+ int assert_line;
+ int bug_report_start; /* True if bug report header was already logged. */
+ int watchdog_period; /* Software watchdog period in ms. 0 = off */
+ /* System hardware info */
+ size_t system_memory_size; /* Total memory in system as reported by OS */
+
+ /* Mutexes used to protect atomic variables when atomic builtins are
+ * not available. */
+ pthread_mutex_t lruclock_mutex;
+ pthread_mutex_t next_client_id_mutex;
+ pthread_mutex_t unixtime_mutex;
+};
+
+typedef struct pubsubPattern {
+ client *client;
+ robj *pattern;
+} pubsubPattern;
+
+typedef void redisCommandProc(client *c);
+typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+struct redisCommand {
+ char *name;
+ redisCommandProc *proc;
+ int arity;
+ char *sflags; /* Flags as string representation, one char per flag. */
+ int flags; /* The actual flags, obtained from the 'sflags' field. */
+ /* Use a function to determine keys arguments in a command line.
+ * Used for Redis Cluster redirect. */
+ redisGetKeysProc *getkeys_proc;
+ /* What keys should be loaded in background when calling this command? */
+ int firstkey; /* The first argument that's a key (0 = no keys) */
+ int lastkey; /* The last argument that's a key */
+ int keystep; /* The step between first and last key */
+ long long microseconds, calls;
+};
+
+struct redisFunctionSym {
+ char *name;
+ unsigned long pointer;
+};
+
+typedef struct _redisSortObject {
+ robj *obj;
+ union {
+ double score;
+ robj *cmpobj;
+ } u;
+} redisSortObject;
+
+typedef struct _redisSortOperation {
+ int type;
+ robj *pattern;
+} redisSortOperation;
+
+/* Structure to hold list iteration abstraction. */
+typedef struct {
+ robj *subject;
+ unsigned char encoding;
+ unsigned char direction; /* Iteration direction */
+ quicklistIter *iter;
+} listTypeIterator;
+
+/* Structure for an entry while iterating over a list. */
+typedef struct {
+ listTypeIterator *li;
+ quicklistEntry entry; /* Entry in quicklist */
+} listTypeEntry;
+
+/* Structure to hold set iteration abstraction. */
+typedef struct {
+ robj *subject;
+ int encoding;
+ int ii; /* intset iterator */
+ dictIterator *di;
+} setTypeIterator;
+
+/* Structure to hold hash iteration abstraction. Note that iteration over
+ * hashes involves both fields and values. Because it is possible that
+ * not both are required, store pointers in the iterator to avoid
+ * unnecessary memory allocation for fields/values. */
+typedef struct {
+ robj *subject;
+ int encoding;
+
+ unsigned char *fptr, *vptr;
+
+ dictIterator *di;
+ dictEntry *de;
+} hashTypeIterator;
+
+#define OBJ_HASH_KEY 1
+#define OBJ_HASH_VALUE 2
+
+/*-----------------------------------------------------------------------------
+ * Extern declarations
+ *----------------------------------------------------------------------------*/
+
+extern struct redisServer server;
+extern struct sharedObjectsStruct shared;
+extern dictType objectKeyPointerValueDictType;
+extern dictType setDictType;
+extern dictType zsetDictType;
+extern dictType clusterNodesDictType;
+extern dictType clusterNodesBlackListDictType;
+extern dictType dbDictType;
+extern dictType shaScriptObjectDictType;
+extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
+extern dictType hashDictType;
+extern dictType replScriptCacheDictType;
+extern dictType keyptrDictType;
+extern dictType modulesDictType;
+
+/*-----------------------------------------------------------------------------
+ * Functions prototypes
+ *----------------------------------------------------------------------------*/
+
+/* Modules */
+void moduleInitModulesSystem(void);
+int moduleLoad(const char *path, void **argv, int argc);
+void moduleLoadFromQueue(void);
+int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+moduleType *moduleTypeLookupModuleByID(uint64_t id);
+void moduleTypeNameByID(char *name, uint64_t moduleid);
+void moduleFreeContext(struct RedisModuleCtx *ctx);
+void unblockClientFromModule(client *c);
+void moduleHandleBlockedClients(void);
+void moduleBlockedClientTimedOut(client *c);
+void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
+size_t moduleCount(void);
+void moduleAcquireGIL(void);
+void moduleReleaseGIL(void);
+
+/* Utils */
+long long ustime(void);
+long long mstime(void);
+void getRandomHexChars(char *p, unsigned int len);
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+void exitFromChild(int retcode);
+size_t redisPopcount(void *s, long count);
+void redisSetProcTitle(char *title);
+
+/* networking.c -- Networking and Client related operations */
+client *createClient(int fd);
+void closeTimedoutClients(void);
+void freeClient(client *c);
+void freeClientAsync(client *c);
+void resetClient(client *c);
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void *addDeferredMultiBulkLength(client *c);
+void setDeferredMultiBulkLength(client *c, void *node, long length);
+void processInputBuffer(client *c);
+void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void addReplyString(client *c, const char *s, size_t len);
+void addReplyBulk(client *c, robj *obj);
+void addReplyBulkCString(client *c, const char *s);
+void addReplyBulkCBuffer(client *c, const void *p, size_t len);
+void addReplyBulkLongLong(client *c, long long ll);
+void addReply(client *c, robj *obj);
+void addReplySds(client *c, sds s);
+void addReplyBulkSds(client *c, sds s);
+void addReplyError(client *c, const char *err);
+void addReplyStatus(client *c, const char *status);
+void addReplyDouble(client *c, double d);
+void addReplyHumanLongDouble(client *c, long double d);
+void addReplyLongLong(client *c, long long ll);
+void addReplyMultiBulkLen(client *c, long length);
+void copyClientOutputBuffer(client *dst, client *src);
+size_t sdsZmallocSize(sds s);
+size_t getStringObjectSdsUsedMemory(robj *o);
+void *dupClientReplyValue(void *o);
+void getClientsMaxBuffers(unsigned long *longest_output_list,
+ unsigned long *biggest_input_buffer);
+char *getClientPeerId(client *client);
+sds catClientInfoString(sds s, client *client);
+sds getAllClientsInfoString(void);
+void rewriteClientCommandVector(client *c, int argc, ...);
+void rewriteClientCommandArgument(client *c, int i, robj *newval);
+void replaceClientCommandVector(client *c, int argc, robj **argv);
+unsigned long getClientOutputBufferMemoryUsage(client *c);
+void freeClientsInAsyncFreeQueue(void);
+void asyncCloseClientOnOutputBufferLimitReached(client *c);
+int getClientType(client *c);
+int getClientTypeByName(char *name);
+char *getClientTypeName(int class);
+void flushSlavesOutputBuffers(void);
+void disconnectSlaves(void);
+int listenToPort(int port, int *fds, int *count);
+void pauseClients(mstime_t duration);
+int clientsArePaused(void);
+int processEventsWhileBlocked(void);
+int handleClientsWithPendingWrites(void);
+int clientHasPendingReplies(client *c);
+void unlinkClient(client *c);
+int writeToClient(int fd, client *c, int handler_installed);
+
+#ifdef __GNUC__
+void addReplyErrorFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+void addReplyStatusFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void addReplyErrorFormat(client *c, const char *fmt, ...);
+void addReplyStatusFormat(client *c, const char *fmt, ...);
+#endif
+
+/* List data type */
+void listTypeTryConversion(robj *subject, robj *value);
+void listTypePush(robj *subject, robj *value, int where);
+robj *listTypePop(robj *subject, int where);
+unsigned long listTypeLength(const robj *subject);
+listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
+void listTypeReleaseIterator(listTypeIterator *li);
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
+robj *listTypeGet(listTypeEntry *entry);
+void listTypeInsert(listTypeEntry *entry, robj *value, int where);
+int listTypeEqual(listTypeEntry *entry, robj *o);
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
+void listTypeConvert(robj *subject, int enc);
+void unblockClientWaitingData(client *c);
+void handleClientsBlockedOnLists(void);
+void popGenericCommand(client *c, int where);
+void signalListAsReady(redisDb *db, robj *key);
+
+/* MULTI/EXEC/WATCH... */
+void unwatchAllKeys(client *c);
+void initClientMultiState(client *c);
+void freeClientMultiState(client *c);
+void queueMultiCommand(client *c);
+void touchWatchedKey(redisDb *db, robj *key);
+void touchWatchedKeysOnFlush(int dbid);
+void discardTransaction(client *c);
+void flagTransaction(client *c);
+void execCommandPropagateMulti(client *c);
+
+/* Redis object implementation */
+void decrRefCount(robj *o);
+void decrRefCountVoid(void *o);
+void incrRefCount(robj *o);
+robj *makeObjectShared(robj *o);
+robj *resetRefCount(robj *obj);
+void freeStringObject(robj *o);
+void freeListObject(robj *o);
+void freeSetObject(robj *o);
+void freeZsetObject(robj *o);
+void freeHashObject(robj *o);
+robj *createObject(int type, void *ptr);
+robj *createStringObject(const char *ptr, size_t len);
+robj *createRawStringObject(const char *ptr, size_t len);
+robj *createEmbeddedStringObject(const char *ptr, size_t len);
+robj *dupStringObject(const robj *o);
+int isSdsRepresentableAsLongLong(sds s, long long *llval);
+int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
+robj *tryObjectEncoding(robj *o);
+robj *getDecodedObject(robj *o);
+size_t stringObjectLen(robj *o);
+robj *createStringObjectFromLongLong(long long value);
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
+robj *createQuicklistObject(void);
+robj *createZiplistObject(void);
+robj *createSetObject(void);
+robj *createIntsetObject(void);
+robj *createHashObject(void);
+robj *createZsetObject(void);
+robj *createZsetZiplistObject(void);
+robj *createModuleObject(moduleType *mt, void *value);
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg);
+int checkType(client *c, robj *o, int type);
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg);
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg);
+int getDoubleFromObject(const robj *o, double *target);
+int getLongLongFromObject(robj *o, long long *target);
+int getLongDoubleFromObject(robj *o, long double *target);
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg);
+char *strEncoding(int encoding);
+int compareStringObjects(robj *a, robj *b);
+int collateStringObjects(robj *a, robj *b);
+int equalStringObjects(robj *a, robj *b);
+unsigned long long estimateObjectIdleTime(robj *o);
+#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
+
+/* Synchronous I/O with timeout */
+ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
+
+/* Replication */
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
+void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen);
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc);
+void updateSlavesWaitingBgsave(int bgsaveerr, int type);
+void replicationCron(void);
+void replicationHandleMasterDisconnection(void);
+void replicationCacheMaster(client *c);
+void resizeReplicationBacklog(long long newsize);
+void replicationSetMaster(char *ip, int port);
+void replicationUnsetMaster(void);
+void refreshGoodSlavesCount(void);
+void replicationScriptCacheInit(void);
+void replicationScriptCacheFlush(void);
+void replicationScriptCacheAdd(sds sha1);
+int replicationScriptCacheExists(sds sha1);
+void processClientsWaitingReplicas(void);
+void unblockClientWaitingReplicas(client *c);
+int replicationCountAcksByOffset(long long offset);
+void replicationSendNewlineToMaster(void);
+long long replicationGetSlaveOffset(void);
+char *replicationGetSlaveName(client *c);
+long long getPsyncInitialOffset(void);
+int replicationSetupSlaveForFullResync(client *slave, long long offset);
+void changeReplicationId(void);
+void clearReplicationId2(void);
+void chopReplicationBacklog(void);
+void replicationCacheMasterUsingMyself(void);
+void feedReplicationBacklog(void *ptr, size_t len);
+
+/* Generic persistence functions */
+void startLoading(FILE *fp);
+void loadingProgress(off_t pos);
+void stopLoading(void);
+
+/* RDB persistence */
+#include "rdb.h"
+int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi);
+
+/* AOF persistence */
+void flushAppendOnlyFile(int force);
+void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
+void aofRemoveTempFile(pid_t childpid);
+int rewriteAppendOnlyFileBackground(void);
+int loadAppendOnlyFile(char *filename);
+void stopAppendOnly(void);
+int startAppendOnly(void);
+void backgroundRewriteDoneHandler(int exitcode, int bysignal);
+void aofRewriteBufferReset(void);
+unsigned long aofRewriteBufferSize(void);
+ssize_t aofReadDiffFromParent(void);
+
+/* Child info */
+void openChildInfoPipe(void);
+void closeChildInfoPipe(void);
+void sendChildInfo(int process_type);
+void receiveChildInfo(void);
+
+/* Sorted sets data type */
+
+/* Input flags. */
+#define ZADD_NONE 0
+#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
+#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
+#define ZADD_XX (1<<2) /* Only touch elements already exisitng. */
+
+/* Output flags. */
+#define ZADD_NOP (1<<3) /* Operation not performed because of conditionals.*/
+#define ZADD_NAN (1<<4) /* Only touch elements already exisitng. */
+#define ZADD_ADDED (1<<5) /* The element was new and was added. */
+#define ZADD_UPDATED (1<<6) /* The element already existed, score updated. */
+
+/* Flags only used by the ZADD command but not by zsetAdd() API: */
+#define ZADD_CH (1<<16) /* Return num of elements added or updated. */
+
+/* Struct to hold a inclusive/exclusive range spec by score comparison. */
+typedef struct {
+ double min, max;
+ int minex, maxex; /* are min or max exclusive? */
+} zrangespec;
+
+/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
+typedef struct {
+ sds min, max; /* May be set to shared.(minstring|maxstring) */
+ int minex, maxex; /* are min or max exclusive? */
+} zlexrangespec;
+
+zskiplist *zslCreate(void);
+void zslFree(zskiplist *zsl);
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score);
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
+zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
+zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
+double zzlGetScore(unsigned char *sptr);
+void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range);
+unsigned int zsetLength(const robj *zobj);
+void zsetConvert(robj *zobj, int encoding);
+void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen);
+int zsetScore(robj *zobj, sds member, double *score);
+unsigned long zslGetRank(zskiplist *zsl, double score, sds o);
+int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore);
+long zsetRank(robj *zobj, sds ele, int reverse);
+int zsetDel(robj *zobj, sds ele);
+sds ziplistGetObject(unsigned char *sptr);
+int zslValueGteMin(double value, zrangespec *spec);
+int zslValueLteMax(double value, zrangespec *spec);
+void zslFreeLexRange(zlexrangespec *spec);
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec);
+unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range);
+unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range);
+zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
+zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec);
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec);
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
+
+/* Core functions */
+int freeMemoryIfNeeded(void);
+int processCommand(client *c);
+void setupSignalHandlers(void);
+struct redisCommand *lookupCommand(sds name);
+struct redisCommand *lookupCommandByCString(char *s);
+struct redisCommand *lookupCommandOrOriginal(sds name);
+void call(client *c, int flags);
+void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
+void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
+void forceCommandPropagation(client *c, int flags);
+void preventCommandPropagation(client *c);
+void preventCommandAOF(client *c);
+void preventCommandReplication(client *c);
+int prepareForShutdown();
+#ifdef __GNUC__
+void serverLog(int level, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void serverLog(int level, const char *fmt, ...);
+#endif
+void serverLogRaw(int level, const char *msg);
+void serverLogFromHandler(int level, const char *msg);
+void usage(void);
+void updateDictResizePolicy(void);
+int htNeedsResize(dict *dict);
+void populateCommandTable(void);
+void resetCommandTableStats(void);
+void adjustOpenFilesLimit(void);
+void closeListeningSockets(int unlink_unix_socket);
+void updateCachedTime(void);
+void resetServerStats(void);
+void activeDefragCycle(void);
+unsigned int getLRUClock(void);
+unsigned int LRU_CLOCK(void);
+const char *evictPolicyToString(void);
+struct redisMemOverhead *getMemoryOverheadData(void);
+void freeMemoryOverheadData(struct redisMemOverhead *mh);
+
+#define RESTART_SERVER_NONE 0
+#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
+#define RESTART_SERVER_CONFIG_REWRITE (1<<1) /* CONFIG REWRITE before restart.*/
+int restartServer(int flags, mstime_t delay);
+
+/* Set data type */
+robj *setTypeCreate(sds value);
+int setTypeAdd(robj *subject, sds value);
+int setTypeRemove(robj *subject, sds value);
+int setTypeIsMember(robj *subject, sds value);
+setTypeIterator *setTypeInitIterator(robj *subject);
+void setTypeReleaseIterator(setTypeIterator *si);
+int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
+sds setTypeNextObject(setTypeIterator *si);
+int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
+unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
+unsigned long setTypeSize(const robj *subject);
+void setTypeConvert(robj *subject, int enc);
+
+/* Hash data type */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+
+void hashTypeConvert(robj *o, int enc);
+void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
+void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
+int hashTypeExists(robj *o, sds key);
+int hashTypeDelete(robj *o, sds key);
+unsigned long hashTypeLength(const robj *o);
+hashTypeIterator *hashTypeInitIterator(robj *subject);
+void hashTypeReleaseIterator(hashTypeIterator *hi);
+int hashTypeNext(hashTypeIterator *hi);
+void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll);
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
+robj *hashTypeGetValueObject(robj *o, sds field);
+int hashTypeSet(robj *o, sds field, sds value, int flags);
+
+/* Pub / Sub */
+int pubsubUnsubscribeAllChannels(client *c, int notify);
+int pubsubUnsubscribeAllPatterns(client *c, int notify);
+void freePubsubPattern(void *p);
+int listMatchPubsubPattern(void *a, void *b);
+int pubsubPublishMessage(robj *channel, robj *message);
+
+/* Keyspace events notification */
+void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
+int keyspaceEventsStringToFlags(char *classes);
+sds keyspaceEventsFlagsToString(int flags);
+
+/* Configuration */
+void loadServerConfig(char *filename, char *options);
+void appendServerSaveParams(time_t seconds, int changes);
+void resetServerSaveParams(void);
+struct rewriteConfigState; /* Forward declaration to export API. */
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
+int rewriteConfig(char *path);
+
+/* db.c -- Keyspace access API */
+int removeExpire(redisDb *db, robj *key);
+void propagateExpire(redisDb *db, robj *key, int lazy);
+int expireIfNeeded(redisDb *db, robj *key);
+long long getExpire(redisDb *db, robj *key);
+void setExpire(client *c, redisDb *db, robj *key, long long when);
+robj *lookupKey(redisDb *db, robj *key, int flags);
+robj *lookupKeyRead(redisDb *db, robj *key);
+robj *lookupKeyWrite(redisDb *db, robj *key);
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
+robj *objectCommandLookup(client *c, robj *key);
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
+#define LOOKUP_NONE 0
+#define LOOKUP_NOTOUCH (1<<0)
+void dbAdd(redisDb *db, robj *key, robj *val);
+void dbOverwrite(redisDb *db, robj *key, robj *val);
+void setKey(redisDb *db, robj *key, robj *val);
+int dbExists(redisDb *db, robj *key);
+robj *dbRandomKey(redisDb *db);
+int dbSyncDelete(redisDb *db, robj *key);
+int dbDelete(redisDb *db, robj *key);
+robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
+
+#define EMPTYDB_NO_FLAGS 0 /* No flags. */
+#define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */
+long long emptyDb(int dbnum, int flags, void(callback)(void*));
+
+int selectDb(client *c, int id);
+void signalModifiedKey(redisDb *db, robj *key);
+void signalFlushedDb(int dbid);
+unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
+unsigned int countKeysInSlot(unsigned int hashslot);
+unsigned int delKeysInSlot(unsigned int hashslot);
+int verifyClusterConfigWithData(void);
+void scanGenericCommand(client *c, robj *o, unsigned long cursor);
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor);
+void slotToKeyAdd(robj *key);
+void slotToKeyDel(robj *key);
+void slotToKeyFlush(void);
+int dbAsyncDelete(redisDb *db, robj *key);
+void emptyDbAsync(redisDb *db);
+void slotToKeyFlushAsync(void);
+size_t lazyfreeGetPendingObjectsCount(void);
+
+/* API to get key arguments from commands */
+int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+void getKeysFreeResult(int *result);
+int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys);
+int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+
+/* Cluster */
+void clusterInit(void);
+unsigned short crc16(const char *buf, int len);
+unsigned int keyHashSlot(char *key, int keylen);
+void clusterCron(void);
+void clusterPropagatePublish(robj *channel, robj *message);
+void migrateCloseTimedoutSockets(void);
+void clusterBeforeSleep(void);
+
+/* Sentinel */
+void initSentinelConfig(void);
+void initSentinel(void);
+void sentinelTimer(void);
+char *sentinelHandleConfiguration(char **argv, int argc);
+void sentinelIsRunning(void);
+
+/* redis-check-rdb & aof */
+int redis_check_rdb(char *rdbfilename, FILE *fp);
+int redis_check_rdb_main(int argc, char **argv, FILE *fp);
+int redis_check_aof_main(int argc, char **argv);
+
+/* Scripting */
+void scriptingInit(int setup);
+int ldbRemoveChild(pid_t pid);
+void ldbKillForkedSessions(void);
+int ldbPendingChildren(void);
+
+/* Blocked clients */
+void processUnblockedClients(void);
+void blockClient(client *c, int btype);
+void unblockClient(client *c);
+void replyToBlockedClientTimedOut(client *c);
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit);
+void disconnectAllBlockedClients(void);
+
+/* expire.c -- Handling of expired keys */
+void activeExpireCycle(int type);
+void expireSlaveKeys(void);
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
+void flushSlaveKeysWithExpireList(void);
+size_t getSlaveKeyWithExpireCount(void);
+
+/* evict.c -- maxmemory handling and LRU eviction. */
+void evictionPoolAlloc(void);
+#define LFU_INIT_VAL 5
+unsigned long LFUGetTimeInMinutes(void);
+uint8_t LFULogIncr(uint8_t value);
+
+/* Keys hashing / comparison functions for dict.c hash tables. */
+uint64_t dictSdsHash(const void *key);
+int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
+void dictSdsDestructor(void *privdata, void *val);
+
+/* Git SHA1 */
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+uint64_t redisBuildId(void);
+
+/* Commands prototypes */
+void authCommand(client *c);
+void pingCommand(client *c);
+void echoCommand(client *c);
+void commandCommand(client *c);
+void setCommand(client *c);
+void setnxCommand(client *c);
+void setexCommand(client *c);
+void psetexCommand(client *c);
+void getCommand(client *c);
+void delCommand(client *c);
+void unlinkCommand(client *c);
+void existsCommand(client *c);
+void setbitCommand(client *c);
+void getbitCommand(client *c);
+void bitfieldCommand(client *c);
+void setrangeCommand(client *c);
+void getrangeCommand(client *c);
+void incrCommand(client *c);
+void decrCommand(client *c);
+void incrbyCommand(client *c);
+void decrbyCommand(client *c);
+void incrbyfloatCommand(client *c);
+void selectCommand(client *c);
+void swapdbCommand(client *c);
+void randomkeyCommand(client *c);
+void keysCommand(client *c);
+void scanCommand(client *c);
+void dbsizeCommand(client *c);
+void lastsaveCommand(client *c);
+void saveCommand(client *c);
+void bgsaveCommand(client *c);
+void bgrewriteaofCommand(client *c);
+void shutdownCommand(client *c);
+void moveCommand(client *c);
+void renameCommand(client *c);
+void renamenxCommand(client *c);
+void lpushCommand(client *c);
+void rpushCommand(client *c);
+void lpushxCommand(client *c);
+void rpushxCommand(client *c);
+void linsertCommand(client *c);
+void lpopCommand(client *c);
+void rpopCommand(client *c);
+void llenCommand(client *c);
+void lindexCommand(client *c);
+void lrangeCommand(client *c);
+void ltrimCommand(client *c);
+void typeCommand(client *c);
+void lsetCommand(client *c);
+void saddCommand(client *c);
+void sremCommand(client *c);
+void smoveCommand(client *c);
+void sismemberCommand(client *c);
+void scardCommand(client *c);
+void spopCommand(client *c);
+void srandmemberCommand(client *c);
+void sinterCommand(client *c);
+void sinterstoreCommand(client *c);
+void sunionCommand(client *c);
+void sunionstoreCommand(client *c);
+void sdiffCommand(client *c);
+void sdiffstoreCommand(client *c);
+void sscanCommand(client *c);
+void syncCommand(client *c);
+void flushdbCommand(client *c);
+void flushallCommand(client *c);
+void sortCommand(client *c);
+void lremCommand(client *c);
+void rpoplpushCommand(client *c);
+void infoCommand(client *c);
+void mgetCommand(client *c);
+void monitorCommand(client *c);
+void expireCommand(client *c);
+void expireatCommand(client *c);
+void pexpireCommand(client *c);
+void pexpireatCommand(client *c);
+void getsetCommand(client *c);
+void ttlCommand(client *c);
+void touchCommand(client *c);
+void pttlCommand(client *c);
+void persistCommand(client *c);
+void slaveofCommand(client *c);
+void roleCommand(client *c);
+void debugCommand(client *c);
+void msetCommand(client *c);
+void msetnxCommand(client *c);
+void zaddCommand(client *c);
+void zincrbyCommand(client *c);
+void zrangeCommand(client *c);
+void zrangebyscoreCommand(client *c);
+void zrevrangebyscoreCommand(client *c);
+void zrangebylexCommand(client *c);
+void zrevrangebylexCommand(client *c);
+void zcountCommand(client *c);
+void zlexcountCommand(client *c);
+void zrevrangeCommand(client *c);
+void zcardCommand(client *c);
+void zremCommand(client *c);
+void zscoreCommand(client *c);
+void zremrangebyscoreCommand(client *c);
+void zremrangebylexCommand(client *c);
+void multiCommand(client *c);
+void execCommand(client *c);
+void discardCommand(client *c);
+void blpopCommand(client *c);
+void brpopCommand(client *c);
+void brpoplpushCommand(client *c);
+void appendCommand(client *c);
+void strlenCommand(client *c);
+void zrankCommand(client *c);
+void zrevrankCommand(client *c);
+void hsetCommand(client *c);
+void hsetnxCommand(client *c);
+void hgetCommand(client *c);
+void hmsetCommand(client *c);
+void hmgetCommand(client *c);
+void hdelCommand(client *c);
+void hlenCommand(client *c);
+void hstrlenCommand(client *c);
+void zremrangebyrankCommand(client *c);
+void zunionstoreCommand(client *c);
+void zinterstoreCommand(client *c);
+void zscanCommand(client *c);
+void hkeysCommand(client *c);
+void hvalsCommand(client *c);
+void hgetallCommand(client *c);
+void hexistsCommand(client *c);
+void hscanCommand(client *c);
+void configCommand(client *c);
+void hincrbyCommand(client *c);
+void hincrbyfloatCommand(client *c);
+void subscribeCommand(client *c);
+void unsubscribeCommand(client *c);
+void psubscribeCommand(client *c);
+void punsubscribeCommand(client *c);
+void publishCommand(client *c);
+void pubsubCommand(client *c);
+void watchCommand(client *c);
+void unwatchCommand(client *c);
+void clusterCommand(client *c);
+void restoreCommand(client *c);
+void migrateCommand(client *c);
+void askingCommand(client *c);
+void readonlyCommand(client *c);
+void readwriteCommand(client *c);
+void dumpCommand(client *c);
+void objectCommand(client *c);
+void memoryCommand(client *c);
+void clientCommand(client *c);
+void evalCommand(client *c);
+void evalShaCommand(client *c);
+void scriptCommand(client *c);
+void timeCommand(client *c);
+void bitopCommand(client *c);
+void bitcountCommand(client *c);
+void bitposCommand(client *c);
+void replconfCommand(client *c);
+void waitCommand(client *c);
+void geoencodeCommand(client *c);
+void geodecodeCommand(client *c);
+void georadiusbymemberCommand(client *c);
+void georadiusbymemberroCommand(client *c);
+void georadiusCommand(client *c);
+void georadiusroCommand(client *c);
+void geoaddCommand(client *c);
+void geohashCommand(client *c);
+void geoposCommand(client *c);
+void geodistCommand(client *c);
+void pfselftestCommand(client *c);
+void pfaddCommand(client *c);
+void pfcountCommand(client *c);
+void pfmergeCommand(client *c);
+void pfdebugCommand(client *c);
+void latencyCommand(client *c);
+void moduleCommand(client *c);
+void securityWarningCommand(client *c);
+
+#if defined(__GNUC__)
+void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
+void free(void *ptr) __attribute__ ((deprecated));
+void *malloc(size_t size) __attribute__ ((deprecated));
+void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
+#endif
+
+/* Debugging stuff */
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line);
+void _serverAssert(const char *estr, const char *file, int line);
+void _serverPanic(const char *file, int line, const char *msg, ...);
+void bugReportStart(void);
+void serverLogObjectDebugInfo(const robj *o);
+void sigsegvHandler(int sig, siginfo_t *info, void *secret);
+sds genRedisInfoString(char *section);
+void enableWatchdog(int period);
+void disableWatchdog(void);
+void watchdogScheduleSignal(int period);
+void serverLogHexDump(int level, char *descr, void *value, size_t len);
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes);
+void mixDigest(unsigned char *digest, void *ptr, size_t len);
+void xorDigest(unsigned char *digest, void *ptr, size_t len);
+
+#define redisDebug(fmt, ...) \
+ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
+#define redisDebugMark() \
+ printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
+
+#endif
diff --git a/src/sha1.c b/src/sha1.c
index 59e6f461d..ce487e367 100644
--- a/src/sha1.c
+++ b/src/sha1.c
@@ -23,10 +23,8 @@ A million repetitions of "a"
#include <stdio.h>
#include <string.h>
-#include <sys/types.h> /* for u_int*_t */
-#if defined(__sun)
+#include <stdint.h>
#include "solarisfixes.h"
-#endif
#include "sha1.h"
#include "config.h"
@@ -55,12 +53,12 @@ A million repetitions of "a"
/* Hash a single 512-bit block. This is the core of the algorithm. */
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64])
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
- u_int32_t a, b, c, d, e;
+ uint32_t a, b, c, d, e;
typedef union {
unsigned char c[64];
- u_int32_t l[16];
+ uint32_t l[16];
} CHAR64LONG16;
#ifdef SHA1HANDSOFF
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
@@ -130,9 +128,9 @@ void SHA1Init(SHA1_CTX* context)
/* Run your data through this. */
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len)
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len)
{
- u_int32_t i, j;
+ uint32_t i, j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
@@ -170,7 +168,7 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
for (i = 0; i < 2; i++)
{
- u_int32_t t = context->count[i];
+ uint32_t t = context->count[i];
int j;
for (j = 0; j < 4; t >>= 8, j++)
@@ -199,16 +197,19 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
}
/* ================ end of sha1.c ================ */
-#if 0
+#ifdef REDIS_TEST
#define BUFSIZE 4096
-int
-main(int argc, char **argv)
+#define UNUSED(x) (void)(x)
+int sha1Test(int argc, char **argv)
{
SHA1_CTX ctx;
unsigned char hash[20], buf[BUFSIZE];
int i;
+ UNUSED(argc);
+ UNUSED(argv);
+
for(i=0;i<BUFSIZE;i++)
buf[i] = i;
@@ -223,6 +224,4 @@ main(int argc, char **argv)
printf("\n");
return 0;
}
-
#endif
-
diff --git a/src/sha1.h b/src/sha1.h
index 9d6f12965..f41691258 100644
--- a/src/sha1.h
+++ b/src/sha1.h
@@ -1,3 +1,5 @@
+#ifndef SHA1_H
+#define SHA1_H
/* ================ sha1.h ================ */
/*
SHA-1 in C
@@ -6,12 +8,17 @@ By Steve Reid <steve@edmweb.com>
*/
typedef struct {
- u_int32_t state[5];
- u_int32_t count[2];
+ uint32_t state[5];
+ uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX* context);
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
+
+#ifdef REDIS_TEST
+int sha1Test(int argc, char **argv);
+#endif
+#endif
diff --git a/src/siphash.c b/src/siphash.c
new file mode 100644
index 000000000..6c41fe6b6
--- /dev/null
+++ b/src/siphash.c
@@ -0,0 +1,360 @@
+/*
+ SipHash reference C implementation
+
+ Copyright (c) 2012-2016 Jean-Philippe Aumasson
+ <jeanphilippe.aumasson@gmail.com>
+ Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+ Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+ ----------------------------------------------------------------------------
+
+ This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
+ in the following ways:
+
+ 1. We use SipHash 1-2. This is not believed to be as strong as the
+ suggested 2-4 variant, but AFAIK there are not trivial attacks
+ against this reduced-rounds version, and it runs at the same speed
+ as Murmurhash2 that we used previously, why the 2-4 variant slowed
+ down Redis by a 4% figure more or less.
+ 2. Hard-code rounds in the hope the compiler can optimize it more
+ in this raw from. Anyway we always want the standard 2-4 variant.
+ 3. Modify the prototype and implementation so that the function directly
+ returns an uint64_t value, the hash itself, instead of receiving an
+ output buffer. This also means that the output size is set to 8 bytes
+ and the 16 bytes output code handling was removed.
+ 4. Provide a case insensitive variant to be used when hashing strings that
+ must be considered identical by the hash table regardless of the case.
+ If we don't have directly a case insensitive hash function, we need to
+ perform a text transformation in some temporary buffer, which is costly.
+ 5. Remove debugging code.
+ 6. Modified the original test.c file to be a stand-alone function testing
+ the function in the new form (returing an uint64_t) using just the
+ relevant test vector.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+/* Fast tolower() alike function that does not care about locale
+ * but just returns a-z insetad of A-Z. */
+int siptlw(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c+('a'-'A');
+ } else {
+ return c;
+ }
+}
+
+/* Test of the CPU is Little Endian and supports not aligned accesses.
+ * Two interesting conditions to speedup the function that happen to be
+ * in most of x86 servers. */
+#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__)
+#define UNALIGNED_LE_CPU
+#endif
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v) \
+ (p)[0] = (uint8_t)((v)); \
+ (p)[1] = (uint8_t)((v) >> 8); \
+ (p)[2] = (uint8_t)((v) >> 16); \
+ (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v) \
+ U32TO8_LE((p), (uint32_t)((v))); \
+ U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#ifdef UNALIGNED_LE_CPU
+#define U8TO64_LE(p) (*((uint64_t*)(p)))
+#else
+#define U8TO64_LE(p) \
+ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
+ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+#endif
+
+#define U8TO64_LE_NOCASE(p) \
+ (((uint64_t)(siptlw((p)[0]))) | \
+ ((uint64_t)(siptlw((p)[1])) << 8) | \
+ ((uint64_t)(siptlw((p)[2])) << 16) | \
+ ((uint64_t)(siptlw((p)[3])) << 24) | \
+ ((uint64_t)(siptlw((p)[4])) << 32) | \
+ ((uint64_t)(siptlw((p)[5])) << 40) | \
+ ((uint64_t)(siptlw((p)[6])) << 48) | \
+ ((uint64_t)(siptlw((p)[7])) << 56))
+
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 32); \
+ } while (0)
+
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)in[6]) << 48;
+ case 6: b |= ((uint64_t)in[5]) << 40;
+ case 5: b |= ((uint64_t)in[4]) << 32;
+ case 4: b |= ((uint64_t)in[3]) << 24;
+ case 3: b |= ((uint64_t)in[2]) << 16;
+ case 2: b |= ((uint64_t)in[1]) << 8;
+ case 1: b |= ((uint64_t)in[0]); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
+{
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE_NOCASE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)siptlw(in[6])) << 48;
+ case 6: b |= ((uint64_t)siptlw(in[5])) << 40;
+ case 5: b |= ((uint64_t)siptlw(in[4])) << 32;
+ case 4: b |= ((uint64_t)siptlw(in[3])) << 24;
+ case 3: b |= ((uint64_t)siptlw(in[2])) << 16;
+ case 2: b |= ((uint64_t)siptlw(in[1])) << 8;
+ case 1: b |= ((uint64_t)siptlw(in[0])); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+
+/* --------------------------------- TEST ------------------------------------ */
+
+#ifdef SIPHASH_TEST
+
+const uint8_t vectors_sip64[64][8] = {
+ { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
+ { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
+ { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
+ { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
+ { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
+ { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
+ { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
+ { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
+ { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
+ { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
+ { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
+ { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
+ { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
+ { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
+ { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
+ { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
+ { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
+ { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
+ { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
+ { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
+ { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
+ { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
+ { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
+ { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
+ { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
+ { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
+ { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
+ { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
+ { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
+ { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
+ { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
+ { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
+ { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
+ { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
+ { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
+ { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
+ { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
+ { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
+ { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
+ { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
+ { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
+ { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
+ { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
+ { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
+ { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
+ { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
+ { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
+ { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
+ { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
+ { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
+ { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
+ { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
+ { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
+ { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
+ { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
+ { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
+ { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
+ { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
+ { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
+ { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
+ { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
+ { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
+ { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
+ { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
+};
+
+
+/* Test siphash using a test vector. Returns 0 if the function passed
+ * all the tests, otherwise 1 is returned.
+ *
+ * IMPORTANT: The test vector is for SipHash 2-4. Before running
+ * the test revert back the siphash() function to 2-4 rounds since
+ * now it uses 1-2 rounds. */
+int siphash_test(void) {
+ uint8_t in[64], k[16];
+ int i;
+ int fails = 0;
+
+ for (i = 0; i < 16; ++i)
+ k[i] = i;
+
+ for (i = 0; i < 64; ++i) {
+ in[i] = i;
+ uint64_t hash = siphash(in, i, k);
+ const uint8_t *v = NULL;
+ v = (uint8_t *)vectors_sip64;
+ if (memcmp(&hash, v + (i * 8), 8)) {
+ /* printf("fail for %d bytes\n", i); */
+ fails++;
+ }
+ }
+
+ /* Run a few basic tests with the case insensitive version. */
+ uint64_t h1, h2;
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 == h2) fails++;
+
+ if (!fails) return 0;
+ return 1;
+}
+
+int main(void) {
+ if (siphash_test() == 0) {
+ printf("SipHash test: OK\n");
+ return 0;
+ } else {
+ printf("SipHash test: FAILED\n");
+ return 1;
+ }
+}
+
+#endif
diff --git a/src/slowlog.c b/src/slowlog.c
index ff6ccf472..805ee1d77 100644
--- a/src/slowlog.c
+++ b/src/slowlog.c
@@ -39,13 +39,13 @@
*/
-#include "redis.h"
+#include "server.h"
#include "slowlog.h"
/* Create a new slowlog entry.
* Incrementing the ref count of all the objects retained is up to
* this function. */
-slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
+slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long duration) {
slowlogEntry *se = zmalloc(sizeof(*se));
int j, slargc = argc;
@@ -57,12 +57,12 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
* at SLOWLOG_ENTRY_MAX_ARGC, but use the last argument to specify
* how many remaining arguments there were in the original command. */
if (slargc != argc && j == slargc-1) {
- se->argv[j] = createObject(REDIS_STRING,
+ se->argv[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"... (%d more arguments)",
argc-slargc+1));
} else {
/* Trim too long strings as well... */
- if (argv[j]->type == REDIS_STRING &&
+ if (argv[j]->type == OBJ_STRING &&
sdsEncodedObject(argv[j]) &&
sdslen(argv[j]->ptr) > SLOWLOG_ENTRY_MAX_STRING)
{
@@ -71,7 +71,7 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
s = sdscatprintf(s,"... (%lu more bytes)",
(unsigned long)
sdslen(argv[j]->ptr) - SLOWLOG_ENTRY_MAX_STRING);
- se->argv[j] = createObject(REDIS_STRING,s);
+ se->argv[j] = createObject(OBJ_STRING,s);
} else {
se->argv[j] = argv[j];
incrRefCount(argv[j]);
@@ -81,6 +81,8 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
se->time = time(NULL);
se->duration = duration;
se->id = server.slowlog_entry_id++;
+ se->peerid = sdsnew(getClientPeerId(c));
+ se->cname = c->name ? sdsnew(c->name->ptr) : sdsempty();
return se;
}
@@ -95,6 +97,8 @@ void slowlogFreeEntry(void *septr) {
for (j = 0; j < se->argc; j++)
decrRefCount(se->argv[j]);
zfree(se->argv);
+ sdsfree(se->peerid);
+ sdsfree(se->cname);
zfree(se);
}
@@ -109,10 +113,11 @@ void slowlogInit(void) {
/* Push a new entry into the slow log.
* This function will make sure to trim the slow log accordingly to the
* configured max length. */
-void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration) {
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration) {
if (server.slowlog_log_slower_than < 0) return; /* Slowlog disabled */
if (duration >= server.slowlog_log_slower_than)
- listAddNodeHead(server.slowlog,slowlogCreateEntry(argv,argc,duration));
+ listAddNodeHead(server.slowlog,
+ slowlogCreateEntry(c,argv,argc,duration));
/* Remove old entries if needed. */
while (listLength(server.slowlog) > server.slowlog_max_len)
@@ -127,7 +132,7 @@ void slowlogReset(void) {
/* The SLOWLOG command. Implements all the subcommands needed to handle the
* Redis slow log. */
-void slowlogCommand(redisClient *c) {
+void slowlogCommand(client *c) {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) {
slowlogReset();
addReply(c,shared.ok);
@@ -143,7 +148,7 @@ void slowlogCommand(redisClient *c) {
slowlogEntry *se;
if (c->argc == 3 &&
- getLongFromObjectOrReply(c,c->argv[2],&count,NULL) != REDIS_OK)
+ getLongFromObjectOrReply(c,c->argv[2],&count,NULL) != C_OK)
return;
listRewind(server.slowlog,&li);
@@ -152,13 +157,15 @@ void slowlogCommand(redisClient *c) {
int j;
se = ln->value;
- addReplyMultiBulkLen(c,4);
+ addReplyMultiBulkLen(c,6);
addReplyLongLong(c,se->id);
addReplyLongLong(c,se->time);
addReplyLongLong(c,se->duration);
addReplyMultiBulkLen(c,se->argc);
for (j = 0; j < se->argc; j++)
addReplyBulk(c,se->argv[j]);
+ addReplyBulkCBuffer(c,se->peerid,sdslen(se->peerid));
+ addReplyBulkCBuffer(c,se->cname,sdslen(se->cname));
sent++;
}
setDeferredMultiBulkLength(c,totentries,sent);
diff --git a/src/slowlog.h b/src/slowlog.h
index e3067de91..655fb25f4 100644
--- a/src/slowlog.h
+++ b/src/slowlog.h
@@ -35,13 +35,15 @@ typedef struct slowlogEntry {
robj **argv;
int argc;
long long id; /* Unique entry identifier. */
- long long duration; /* Time spent by the query, in nanoseconds. */
+ long long duration; /* Time spent by the query, in microseconds. */
time_t time; /* Unix time at which the query was executed. */
+ sds cname; /* Client name. */
+ sds peerid; /* Client network address. */
} slowlogEntry;
/* Exported API */
void slowlogInit(void);
-void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration);
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration);
/* Exported commands */
-void slowlogCommand(redisClient *c);
+void slowlogCommand(client *c);
diff --git a/src/solarisfixes.h b/src/solarisfixes.h
index 23025257a..3e53ba67c 100644
--- a/src/solarisfixes.h
+++ b/src/solarisfixes.h
@@ -28,6 +28,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#if defined(__sun)
+
#if defined(__GNUC__)
#include <math.h>
#undef isnan
@@ -48,3 +50,5 @@
#define u_int uint
#define u_int32_t uint32_t
#endif /* __GNUC__ */
+
+#endif /* __sun */
diff --git a/src/sort.c b/src/sort.c
index fedf0cf3a..7ddd37d95 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -29,7 +29,7 @@
*/
-#include "redis.h"
+#include "server.h"
#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
#include <math.h> /* isnan() */
@@ -110,13 +110,13 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
if (o == NULL) goto noobj;
if (fieldobj) {
- if (o->type != REDIS_HASH) goto noobj;
+ if (o->type != OBJ_HASH) goto noobj;
- /* Retrieve value from hash by the field name. This operation
- * already increases the refcount of the returned object. */
- o = hashTypeGetObject(o, fieldobj);
+ /* Retrieve value from hash by the field name. The returend object
+ * is a new object with refcount already incremented. */
+ o = hashTypeGetValueObject(o, fieldobj->ptr);
} else {
- if (o->type != REDIS_STRING) goto noobj;
+ if (o->type != OBJ_STRING) goto noobj;
/* Every object that this function returns needs to have its refcount
* increased. sortCommand decreases it again. */
@@ -186,7 +186,7 @@ int sortCompare(const void *s1, const void *s2) {
/* The SORT command is the most complex command in Redis. Warning: this code
* is optimized for speed and a bit less for readability */
-void sortCommand(redisClient *c) {
+void sortCommand(client *c) {
list *operations;
unsigned int outputlen = 0;
int desc = 0, alpha = 0;
@@ -200,16 +200,16 @@ void sortCommand(redisClient *c) {
/* Lookup the key to sort. It must be of the right types */
sortval = lookupKeyRead(c->db,c->argv[1]);
- if (sortval && sortval->type != REDIS_SET &&
- sortval->type != REDIS_LIST &&
- sortval->type != REDIS_ZSET)
+ if (sortval && sortval->type != OBJ_SET &&
+ sortval->type != OBJ_LIST &&
+ sortval->type != OBJ_ZSET)
{
addReply(c,shared.wrongtypeerr);
return;
}
/* Create a list of operations to perform for every sorted element.
- * Operations can be GET/DEL/INCR/DECR */
+ * Operations can be GET */
operations = listCreate();
listSetFreeMethod(operations,zfree);
j = 2; /* options start at argv[2] */
@@ -220,7 +220,7 @@ void sortCommand(redisClient *c) {
if (sortval)
incrRefCount(sortval);
else
- sortval = createListObject();
+ sortval = createQuicklistObject();
/* The SORT command has an SQL-alike syntax, parse it */
while(j < c->argc) {
@@ -233,9 +233,9 @@ void sortCommand(redisClient *c) {
alpha = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
if ((getLongFromObjectOrReply(c, c->argv[j+1], &limit_start, NULL)
- != REDIS_OK) ||
+ != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[j+2], &limit_count, NULL)
- != REDIS_OK))
+ != C_OK))
{
syntax_error++;
break;
@@ -267,7 +267,7 @@ void sortCommand(redisClient *c) {
break;
}
listAddNodeTail(operations,createSortOperation(
- REDIS_SORT_GET,c->argv[j+1]));
+ SORT_OP_GET,c->argv[j+1]));
getop++;
j++;
} else {
@@ -285,16 +285,15 @@ void sortCommand(redisClient *c) {
return;
}
- /* For the STORE option, or when SORT is called from a Lua script,
- * we want to force a specific ordering even when no explicit ordering
- * was asked (SORT BY nosort). This guarantees that replication / AOF
- * is deterministic.
+ /* When sorting a set with no sort specified, we must sort the output
+ * so the result is consistent across scripting and replication.
*
- * However in the case 'dontsort' is true, but the type to sort is a
- * sorted set, we don't need to do anything as ordering is guaranteed
- * in this special case. */
- if ((storekey || c->flags & REDIS_LUA_CLIENT) &&
- (dontsort && sortval->type != REDIS_ZSET))
+ * The other types (list, sorted set) will retain their native order
+ * even if no sort order is requested, so they remain stable across
+ * scripting and replication. */
+ if (dontsort &&
+ sortval->type == OBJ_SET &&
+ (storekey || c->flags & CLIENT_LUA))
{
/* Force ALPHA sorting */
dontsort = 0;
@@ -303,15 +302,15 @@ void sortCommand(redisClient *c) {
}
/* Destructively convert encoded sorted sets for SORT. */
- if (sortval->type == REDIS_ZSET)
- zsetConvert(sortval, REDIS_ENCODING_SKIPLIST);
+ if (sortval->type == OBJ_ZSET)
+ zsetConvert(sortval, OBJ_ENCODING_SKIPLIST);
/* Objtain the length of the object to sort. */
switch(sortval->type) {
- case REDIS_LIST: vectorlen = listTypeLength(sortval); break;
- case REDIS_SET: vectorlen = setTypeSize(sortval); break;
- case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
- default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
+ case OBJ_LIST: vectorlen = listTypeLength(sortval); break;
+ case OBJ_SET: vectorlen = setTypeSize(sortval); break;
+ case OBJ_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
+ default: vectorlen = 0; serverPanic("Bad SORT type"); /* Avoid GCC warning */
}
/* Perform LIMIT start,count sanity checking. */
@@ -323,17 +322,17 @@ void sortCommand(redisClient *c) {
}
if (end >= vectorlen) end = vectorlen-1;
- /* Optimization:
+ /* Whenever possible, we load elements into the output array in a more
+ * direct way. This is possible if:
*
- * 1) if the object to sort is a sorted set.
+ * 1) The object to sort is a sorted set or a list (internally sorted).
* 2) There is nothing to sort as dontsort is true (BY <constant string>).
- * 3) We have a LIMIT option that actually reduces the number of elements
- * to fetch.
*
- * In this case to load all the objects in the vector is a huge waste of
- * resources. We just allocate a vector that is big enough for the selected
- * range length, and make sure to load just this part in the vector. */
- if (sortval->type == REDIS_ZSET &&
+ * In this special case, if we have a LIMIT option that actually reduces
+ * the number of elements to fetch, we also optimize to just load the
+ * range we are interested in and allocating a vector that is big enough
+ * for the selected range length. */
+ if ((sortval->type == OBJ_ZSET || sortval->type == OBJ_LIST) &&
dontsort &&
(start != 0 || end != vectorlen-1))
{
@@ -344,8 +343,33 @@ void sortCommand(redisClient *c) {
vector = zmalloc(sizeof(redisSortObject)*vectorlen);
j = 0;
- if (sortval->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL);
+ if (sortval->type == OBJ_LIST && dontsort) {
+ /* Special handling for a list, if 'dontsort' is true.
+ * This makes sure we return elements in the list original
+ * ordering, accordingly to DESC / ASC options.
+ *
+ * Note that in this case we also handle LIMIT here in a direct
+ * way, just getting the required range, as an optimization. */
+ if (end >= start) {
+ listTypeIterator *li;
+ listTypeEntry entry;
+ li = listTypeInitIterator(sortval,
+ desc ? (long)(listTypeLength(sortval) - start - 1) : start,
+ desc ? LIST_HEAD : LIST_TAIL);
+
+ while(j < vectorlen && listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ /* Fix start/end: output code is not aware of this optimization. */
+ end -= start;
+ start = 0;
+ }
+ } else if (sortval->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(sortval,0,LIST_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
vector[j].obj = listTypeGet(&entry);
@@ -354,17 +378,17 @@ void sortCommand(redisClient *c) {
j++;
}
listTypeReleaseIterator(li);
- } else if (sortval->type == REDIS_SET) {
+ } else if (sortval->type == OBJ_SET) {
setTypeIterator *si = setTypeInitIterator(sortval);
- robj *ele;
- while((ele = setTypeNextObject(si)) != NULL) {
- vector[j].obj = ele;
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ vector[j].obj = createObject(OBJ_STRING,sdsele);
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
}
setTypeReleaseIterator(si);
- } else if (sortval->type == REDIS_ZSET && dontsort) {
+ } else if (sortval->type == OBJ_ZSET && dontsort) {
/* Special handling for a sorted set, if 'dontsort' is true.
* This makes sure we return elements in the sorted set original
* ordering, accordingly to DESC / ASC options.
@@ -375,7 +399,7 @@ void sortCommand(redisClient *c) {
zset *zs = sortval->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
- robj *ele;
+ sds sdsele;
int rangelen = vectorlen;
/* Check if starting point is trivial, before doing log(N) lookup. */
@@ -392,36 +416,35 @@ void sortCommand(redisClient *c) {
}
while(rangelen--) {
- redisAssertWithInfo(c,sortval,ln != NULL);
- ele = ln->obj;
- vector[j].obj = ele;
+ serverAssertWithInfo(c,sortval,ln != NULL);
+ sdsele = ln->ele;
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
ln = desc ? ln->backward : ln->level[0].forward;
}
- /* The code producing the output does not know that in the case of
- * sorted set, 'dontsort', and LIMIT, we are able to get just the
- * range, already sorted, so we need to adjust "start" and "end"
- * to make sure start is set to 0. */
+ /* Fix start/end: output code is not aware of this optimization. */
end -= start;
start = 0;
- } else if (sortval->type == REDIS_ZSET) {
+ } else if (sortval->type == OBJ_ZSET) {
dict *set = ((zset*)sortval->ptr)->dict;
dictIterator *di;
dictEntry *setele;
+ sds sdsele;
di = dictGetIterator(set);
while((setele = dictNext(di)) != NULL) {
- vector[j].obj = dictGetKey(setele);
+ sdsele = dictGetKey(setele);
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown type");
+ serverPanic("Unknown type");
}
- redisAssertWithInfo(c,sortval,j == vectorlen);
+ serverAssertWithInfo(c,sortval,j == vectorlen);
/* Now it's time to load the right scores in the sorting vector */
if (dontsort == 0) {
@@ -448,13 +471,13 @@ void sortCommand(redisClient *c) {
{
int_convertion_error = 1;
}
- } else if (byval->encoding == REDIS_ENCODING_INT) {
+ } else if (byval->encoding == OBJ_ENCODING_INT) {
/* Don't need to decode the object if it's
* integer-encoded (the only encoding supported) so
* far. We can just cast it */
vector[j].u.score = (long)byval->ptr;
} else {
- redisAssertWithInfo(c,sortval,1 != 1);
+ serverAssertWithInfo(c,sortval,1 != 1);
}
}
@@ -496,7 +519,7 @@ void sortCommand(redisClient *c) {
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
- if (sop->type == REDIS_SORT_GET) {
+ if (sop->type == SORT_OP_GET) {
if (!val) {
addReply(c,shared.nullbulk);
} else {
@@ -505,12 +528,12 @@ void sortCommand(redisClient *c) {
}
} else {
/* Always fails */
- redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET);
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
}
}
}
} else {
- robj *sobj = createZiplistObject();
+ robj *sobj = createQuicklistObject();
/* STORE option specified, set the sorting result as a List object */
for (j = start; j <= end; j++) {
@@ -518,7 +541,7 @@ void sortCommand(redisClient *c) {
listIter li;
if (!getop) {
- listTypePush(sobj,vector[j].obj,REDIS_TAIL);
+ listTypePush(sobj,vector[j].obj,LIST_TAIL);
} else {
listRewind(operations,&li);
while((ln = listNext(&li))) {
@@ -526,29 +549,29 @@ void sortCommand(redisClient *c) {
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
- if (sop->type == REDIS_SORT_GET) {
+ if (sop->type == SORT_OP_GET) {
if (!val) val = createStringObject("",0);
/* listTypePush does an incrRefCount, so we should take care
* care of the incremented refcount caused by either
* lookupKeyByPattern or createStringObject("",0) */
- listTypePush(sobj,val,REDIS_TAIL);
+ listTypePush(sobj,val,LIST_TAIL);
decrRefCount(val);
} else {
/* Always fails */
- redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET);
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
}
}
}
}
if (outputlen) {
setKey(c->db,storekey,sobj);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"sortstore",storekey,
+ notifyKeyspaceEvent(NOTIFY_LIST,"sortstore",storekey,
c->db->id);
server.dirty += outputlen;
} else if (dbDelete(c->db,storekey)) {
signalModifiedKey(c->db,storekey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",storekey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
server.dirty++;
}
decrRefCount(sobj);
@@ -556,9 +579,9 @@ void sortCommand(redisClient *c) {
}
/* Cleanup */
- if (sortval->type == REDIS_LIST || sortval->type == REDIS_SET)
- for (j = 0; j < vectorlen; j++)
- decrRefCount(vector[j].obj);
+ for (j = 0; j < vectorlen; j++)
+ decrRefCount(vector[j].obj);
+
decrRefCount(sortval);
listRelease(operations);
for (j = 0; j < vectorlen; j++) {
diff --git a/src/sparkline.c b/src/sparkline.c
index 900f26ab7..0a986883d 100644
--- a/src/sparkline.c
+++ b/src/sparkline.c
@@ -30,7 +30,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
@@ -49,7 +49,7 @@ static int label_margin_top = 1;
* sparklineSequenceAddSample(seq, 10, NULL);
* sparklineSequenceAddSample(seq, 20, NULL);
* sparklineSequenceAddSample(seq, 30, "last sample label");
- * sds output = sparklineRender(seq, 80, 4);
+ * sds output = sparklineRender(sdsempty(), seq, 80, 4, SPARKLINE_FILL);
* freeSparklineSequence(seq);
* ------------------------------------------------------------------------- */
@@ -63,6 +63,7 @@ struct sequence *createSparklineSequence(void) {
/* Add a new sample into a sequence. */
void sparklineSequenceAddSample(struct sequence *seq, double value, char *label) {
+ label = (label == NULL || label[0] == '\0') ? NULL : zstrdup(label);
if (seq->length == 0) {
seq->min = seq->max = value;
} else {
diff --git a/src/syncio.c b/src/syncio.c
index 8810a842c..b2843d5fb 100644
--- a/src/syncio.c
+++ b/src/syncio.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* ----------------- Blocking sockets I/O with timeouts --------------------- */
@@ -40,7 +40,7 @@
*
* All the functions take the timeout in milliseconds. */
-#define REDIS_SYNCIO_RESOLUTION 10 /* Resolution in milliseconds */
+#define SYNCIO__RESOLUTION 10 /* Resolution in milliseconds */
/* Write the specified payload to 'fd'. If writing the whole payload will be
* done within 'timeout' milliseconds the operation succeeds and 'size' is
@@ -52,8 +52,8 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
long long remaining = timeout;
while(1) {
- long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ?
- remaining : REDIS_SYNCIO_RESOLUTION;
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
long long elapsed;
/* Optimistically try to write before checking if the file descriptor
@@ -89,8 +89,8 @@ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) {
if (size == 0) return 0;
while(1) {
- long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ?
- remaining : REDIS_SYNCIO_RESOLUTION;
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
long long elapsed;
/* Optimistically try to read before checking if the file descriptor
@@ -139,6 +139,7 @@ ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout) {
*ptr = '\0';
nread++;
}
+ size--;
}
return nread;
}
diff --git a/src/t_hash.c b/src/t_hash.c
index f5ceb36e9..700a6233a 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
/*-----------------------------------------------------------------------------
@@ -40,29 +40,21 @@
void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
int i;
- if (o->encoding != REDIS_ENCODING_ZIPLIST) return;
+ if (o->encoding != OBJ_ENCODING_ZIPLIST) return;
for (i = start; i <= end; i++) {
if (sdsEncodedObject(argv[i]) &&
sdslen(argv[i]->ptr) > server.hash_max_ziplist_value)
{
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
}
}
}
-/* Encode given objects in-place when the hash uses a dict. */
-void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) {
- if (subject->encoding == REDIS_ENCODING_HT) {
- if (o1) *o1 = tryObjectEncoding(*o1);
- if (o2) *o2 = tryObjectEncoding(*o2);
- }
-}
-
/* Get the value from a ziplist encoded hash, identified by field.
* Returns -1 when the field cannot be found. */
-int hashTypeGetFromZiplist(robj *o, robj *field,
+int hashTypeGetFromZiplist(robj *o, sds field,
unsigned char **vstr,
unsigned int *vlen,
long long *vll)
@@ -70,26 +62,22 @@ int hashTypeGetFromZiplist(robj *o, robj *field,
unsigned char *zl, *fptr = NULL, *vptr = NULL;
int ret;
- redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST);
-
- field = getDecodedObject(field);
+ serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST);
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
}
}
- decrRefCount(field);
-
if (vptr != NULL) {
ret = ziplistGet(vptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
return 0;
}
@@ -97,142 +85,207 @@ int hashTypeGetFromZiplist(robj *o, robj *field,
}
/* Get the value from a hash table encoded hash, identified by field.
- * Returns -1 when the field cannot be found. */
-int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) {
+ * Returns NULL when the field cannot be found, otherwise the SDS value
+ * is returned. */
+sds hashTypeGetFromHashTable(robj *o, sds field) {
dictEntry *de;
- redisAssert(o->encoding == REDIS_ENCODING_HT);
+ serverAssert(o->encoding == OBJ_ENCODING_HT);
de = dictFind(o->ptr, field);
- if (de == NULL) return -1;
- *value = dictGetVal(de);
- return 0;
+ if (de == NULL) return NULL;
+ return dictGetVal(de);
}
-/* Higher level function of hashTypeGet*() that always returns a Redis
- * object (either new or with refcount incremented), so that the caller
- * can retain a reference or call decrRefCount after the usage.
+/* Higher level function of hashTypeGet*() that returns the hash value
+ * associated with the specified field. If the field is found C_OK
+ * is returned, otherwise C_ERR. The returned object is returned by
+ * reference in either *vstr and *vlen if it's returned in string form,
+ * or stored in *vll if it's returned as a number.
*
- * The lower level function can prevent copy on write so it is
- * the preferred way of doing read operations. */
-robj *hashTypeGetObject(robj *o, robj *field) {
- robj *value = NULL;
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * for C_OK and checking if vll (or vstr) is NULL. */
+int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ *vstr = NULL;
+ if (hashTypeGetFromZiplist(o, field, vstr, vlen, vll) == 0)
+ return C_OK;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds value;
+ if ((value = hashTypeGetFromHashTable(o, field)) != NULL) {
+ *vstr = (unsigned char*) value;
+ *vlen = sdslen(value);
+ return C_OK;
+ }
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return C_ERR;
+}
+
+/* Like hashTypeGetValue() but returns a Redis object, which is useful for
+ * interaction with the hash type outside t_hash.c.
+ * The function returns NULL if the field is not found in the hash. Otherwise
+ * a newly allocated string object with the value is returned. */
+robj *hashTypeGetValueObject(robj *o, sds field) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hashTypeGetValue(o,field,&vstr,&vlen,&vll) == C_ERR) return NULL;
+ if (vstr) return createStringObject((char*)vstr,vlen);
+ else return createStringObjectFromLongLong(vll);
+}
+
+/* Higher level function using hashTypeGet*() to return the length of the
+ * object associated with the requested field, or 0 if the field does not
+ * exist. */
+size_t hashTypeGetValueLength(robj *o, sds field) {
+ size_t len = 0;
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
- if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) {
- if (vstr) {
- value = createStringObject((char*)vstr, vlen);
- } else {
- value = createStringObjectFromLongLong(vll);
- }
- }
-
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *aux;
+ if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0)
+ len = vstr ? vlen : sdigits10(vll);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds aux;
- if (hashTypeGetFromHashTable(o, field, &aux) == 0) {
- incrRefCount(aux);
- value = aux;
- }
+ if ((aux = hashTypeGetFromHashTable(o, field)) != NULL)
+ len = sdslen(aux);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return value;
+ return len;
}
/* Test if the specified field exists in the given hash. Returns 1 if the field
* exists, and 0 when it doesn't. */
-int hashTypeExists(robj *o, robj *field) {
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+int hashTypeExists(robj *o, sds field) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) return 1;
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *aux;
-
- if (hashTypeGetFromHashTable(o, field, &aux) == 0) return 1;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ if (hashTypeGetFromHashTable(o, field) != NULL) return 1;
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
return 0;
}
-/* Add an element, discard the old if the key already exists.
+/* Add a new field, overwrite the old with the new value if it already exists.
* Return 0 on insert and 1 on update.
- * This function will take care of incrementing the reference count of the
- * retained fields and value objects. */
-int hashTypeSet(robj *o, robj *field, robj *value) {
+ *
+ * By default, the key and value SDS strings are copied if needed, so the
+ * caller retains ownership of the strings passed. However this behavior
+ * can be effected by passing appropriate flags (possibly bitwise OR-ed):
+ *
+ * HASH_SET_TAKE_FIELD -- The SDS field ownership passes to the function.
+ * HASH_SET_TAKE_VALUE -- The SDS value ownership passes to the function.
+ *
+ * When the flags are used the caller does not need to release the passed
+ * SDS string(s). It's up to the function to use the string to create a new
+ * entry or to free the SDS string before returning to the caller.
+ *
+ * HASH_SET_COPY corresponds to no flags passed, and means the default
+ * semantics of copying the values if needed.
+ *
+ */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+int hashTypeSet(robj *o, sds field, sds value, int flags) {
int update = 0;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl, *fptr, *vptr;
- field = getDecodedObject(field);
- value = getDecodedObject(value);
-
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
update = 1;
/* Delete value */
zl = ziplistDelete(zl, &vptr);
/* Insert new value */
- zl = ziplistInsert(zl, vptr, value->ptr, sdslen(value->ptr));
+ zl = ziplistInsert(zl, vptr, (unsigned char*)value,
+ sdslen(value));
}
}
if (!update) {
/* Push new field/value pair onto the tail of the ziplist */
- zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL);
- zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)field, sdslen(field),
+ ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)value, sdslen(value),
+ ZIPLIST_TAIL);
}
o->ptr = zl;
- decrRefCount(field);
- decrRefCount(value);
/* Check if the ziplist needs to be converted to a hash table */
if (hashTypeLength(o) > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
- } else if (o->encoding == REDIS_ENCODING_HT) {
- if (dictReplace(o->ptr, field, value)) { /* Insert */
- incrRefCount(field);
- } else { /* Update */
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictFind(o->ptr,field);
+ if (de) {
+ sdsfree(dictGetVal(de));
+ if (flags & HASH_SET_TAKE_VALUE) {
+ dictGetVal(de) = value;
+ value = NULL;
+ } else {
+ dictGetVal(de) = sdsdup(value);
+ }
update = 1;
+ } else {
+ sds f,v;
+ if (flags & HASH_SET_TAKE_FIELD) {
+ f = field;
+ field = NULL;
+ } else {
+ f = sdsdup(field);
+ }
+ if (flags & HASH_SET_TAKE_VALUE) {
+ v = value;
+ value = NULL;
+ } else {
+ v = sdsdup(value);
+ }
+ dictAdd(o->ptr,f,v);
}
- incrRefCount(value);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
+
+ /* Free SDS strings we did not referenced elsewhere if the flags
+ * want this function to be responsible. */
+ if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field);
+ if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value);
return update;
}
/* Delete an element from a hash.
* Return 1 on deleted and 0 on not found. */
-int hashTypeDelete(robj *o, robj *field) {
+int hashTypeDelete(robj *o, sds field) {
int deleted = 0;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl, *fptr;
- field = getDecodedObject(field);
-
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
zl = ziplistDelete(zl,&fptr);
zl = ziplistDelete(zl,&fptr);
@@ -240,11 +293,8 @@ int hashTypeDelete(robj *o, robj *field) {
deleted = 1;
}
}
-
- decrRefCount(field);
-
- } else if (o->encoding == REDIS_ENCODING_HT) {
- if (dictDelete((dict*)o->ptr, field) == REDIS_OK) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ if (dictDelete((dict*)o->ptr, field) == C_OK) {
deleted = 1;
/* Always check if the dictionary needs a resize after a delete. */
@@ -252,24 +302,22 @@ int hashTypeDelete(robj *o, robj *field) {
}
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return deleted;
}
/* Return the number of elements in a hash. */
-unsigned long hashTypeLength(robj *o) {
+unsigned long hashTypeLength(const robj *o) {
unsigned long length = ULONG_MAX;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
length = ziplistLen(o->ptr) / 2;
- } else if (o->encoding == REDIS_ENCODING_HT) {
- length = dictSize((dict*)o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ length = dictSize((const dict*)o->ptr);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return length;
}
@@ -278,30 +326,27 @@ hashTypeIterator *hashTypeInitIterator(robj *subject) {
hi->subject = subject;
hi->encoding = subject->encoding;
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
hi->fptr = NULL;
hi->vptr = NULL;
- } else if (hi->encoding == REDIS_ENCODING_HT) {
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
hi->di = dictGetIterator(subject->ptr);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return hi;
}
void hashTypeReleaseIterator(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_HT) {
+ if (hi->encoding == OBJ_ENCODING_HT)
dictReleaseIterator(hi->di);
- }
-
zfree(hi);
}
-/* Move to the next entry in the hash. Return REDIS_OK when the next entry
- * could be found and REDIS_ERR when the iterator reaches the end. */
+/* Move to the next entry in the hash. Return C_OK when the next entry
+ * could be found and C_ERR when the iterator reaches the end. */
int hashTypeNext(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl;
unsigned char *fptr, *vptr;
@@ -311,28 +356,28 @@ int hashTypeNext(hashTypeIterator *hi) {
if (fptr == NULL) {
/* Initialize cursor */
- redisAssert(vptr == NULL);
+ serverAssert(vptr == NULL);
fptr = ziplistIndex(zl, 0);
} else {
/* Advance cursor */
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
fptr = ziplistNext(zl, vptr);
}
- if (fptr == NULL) return REDIS_ERR;
+ if (fptr == NULL) return C_ERR;
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
/* fptr, vptr now point to the first or next pair */
hi->fptr = fptr;
hi->vptr = vptr;
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR;
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ if ((hi->de = dictNext(hi->di)) == NULL) return C_ERR;
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return REDIS_OK;
+ return C_OK;
}
/* Get the field or value at iterator cursor, for an iterator on a hash value
@@ -344,62 +389,72 @@ void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
{
int ret;
- redisAssert(hi->encoding == REDIS_ENCODING_ZIPLIST);
+ serverAssert(hi->encoding == OBJ_ENCODING_ZIPLIST);
- if (what & REDIS_HASH_KEY) {
+ if (what & OBJ_HASH_KEY) {
ret = ziplistGet(hi->fptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
} else {
ret = ziplistGet(hi->vptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
}
}
/* Get the field or value at iterator cursor, for an iterator on a hash value
- * encoded as a ziplist. Prototype is similar to `hashTypeGetFromHashTable`. */
-void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst) {
- redisAssert(hi->encoding == REDIS_ENCODING_HT);
+ * encoded as a hash table. Prototype is similar to
+ * `hashTypeGetFromHashTable`. */
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) {
+ serverAssert(hi->encoding == OBJ_ENCODING_HT);
- if (what & REDIS_HASH_KEY) {
- *dst = dictGetKey(hi->de);
+ if (what & OBJ_HASH_KEY) {
+ return dictGetKey(hi->de);
} else {
- *dst = dictGetVal(hi->de);
+ return dictGetVal(hi->de);
}
}
-/* A non copy-on-write friendly but higher level version of hashTypeCurrent*()
- * that returns an object with incremented refcount (or a new object). It is up
- * to the caller to decrRefCount() the object if no reference is retained. */
-robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) {
- robj *dst;
-
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr = NULL;
- unsigned int vlen = UINT_MAX;
- long long vll = LLONG_MAX;
-
- hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
- dst = createStringObject((char*)vstr, vlen);
- } else {
- dst = createStringObjectFromLongLong(vll);
- }
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- hashTypeCurrentFromHashTable(hi, what, &dst);
- incrRefCount(dst);
+/* Higher level function of hashTypeCurrent*() that returns the hash value
+ * at current iterator position.
+ *
+ * The returned element is returned by reference in either *vstr and *vlen if
+ * it's returned in string form, or stored in *vll if it's returned as
+ * a number.
+ *
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * type checking if vstr == NULL. */
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
+ *vstr = NULL;
+ hashTypeCurrentFromZiplist(hi, what, vstr, vlen, vll);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds ele = hashTypeCurrentFromHashTable(hi, what);
+ *vstr = (unsigned char*) ele;
+ *vlen = sdslen(ele);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return dst;
}
-robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
+/* Return the key or value at the current iterator position as a new
+ * SDS string. */
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+
+ hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll);
+ if (vstr) return sdsnewlen(vstr,vlen);
+ return sdsfromlonglong(vll);
+}
+
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {
robj *o = lookupKeyWrite(c->db,key);
if (o == NULL) {
o = createHashObject();
dbAdd(c->db,key,o);
} else {
- if (o->type != REDIS_HASH) {
+ if (o->type != OBJ_HASH) {
addReply(c,shared.wrongtypeerr);
return NULL;
}
@@ -408,12 +463,12 @@ robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
}
void hashTypeConvertZiplist(robj *o, int enc) {
- redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST);
+ serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST);
- if (enc == REDIS_ENCODING_ZIPLIST) {
+ if (enc == OBJ_ENCODING_ZIPLIST) {
/* Nothing to do... */
- } else if (enc == REDIS_ENCODING_HT) {
+ } else if (enc == OBJ_ENCODING_HT) {
hashTypeIterator *hi;
dict *dict;
int ret;
@@ -421,39 +476,34 @@ void hashTypeConvertZiplist(robj *o, int enc) {
hi = hashTypeInitIterator(o);
dict = dictCreate(&hashDictType, NULL);
- while (hashTypeNext(hi) != REDIS_ERR) {
- robj *field, *value;
+ while (hashTypeNext(hi) != C_ERR) {
+ sds key, value;
- field = hashTypeCurrentObject(hi, REDIS_HASH_KEY);
- field = tryObjectEncoding(field);
- value = hashTypeCurrentObject(hi, REDIS_HASH_VALUE);
- value = tryObjectEncoding(value);
- ret = dictAdd(dict, field, value);
+ key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ ret = dictAdd(dict, key, value);
if (ret != DICT_OK) {
- redisLogHexDump(REDIS_WARNING,"ziplist with dup elements dump",
+ serverLogHexDump(LL_WARNING,"ziplist with dup elements dump",
o->ptr,ziplistBlobLen(o->ptr));
- redisAssert(ret == DICT_OK);
+ serverPanic("Ziplist corruption detected");
}
}
-
hashTypeReleaseIterator(hi);
zfree(o->ptr);
-
- o->encoding = REDIS_ENCODING_HT;
+ o->encoding = OBJ_ENCODING_HT;
o->ptr = dict;
-
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
void hashTypeConvert(robj *o, int enc) {
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
hashTypeConvertZiplist(o, enc);
- } else if (o->encoding == REDIS_ENCODING_HT) {
- redisPanic("Not implemented");
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ serverPanic("Not implemented");
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
@@ -461,39 +511,24 @@ void hashTypeConvert(robj *o, int enc) {
* Hash type commands
*----------------------------------------------------------------------------*/
-void hsetCommand(redisClient *c) {
- int update;
- robj *o;
-
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- hashTypeTryConversion(o,c->argv,2,3);
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- update = hashTypeSet(o,c->argv[2],c->argv[3]);
- addReply(c, update ? shared.czero : shared.cone);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
- server.dirty++;
-}
-
-void hsetnxCommand(redisClient *c) {
+void hsetnxCommand(client *c) {
robj *o;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
hashTypeTryConversion(o,c->argv,2,3);
- if (hashTypeExists(o, c->argv[2])) {
+ if (hashTypeExists(o, c->argv[2]->ptr)) {
addReply(c, shared.czero);
} else {
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- hashTypeSet(o,c->argv[2],c->argv[3]);
+ hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY);
addReply(c, shared.cone);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty++;
}
}
-void hmsetCommand(redisClient *c) {
- int i;
+void hsetCommand(client *c) {
+ int i, created = 0;
robj *o;
if ((c->argc % 2) == 1) {
@@ -503,29 +538,40 @@ void hmsetCommand(redisClient *c) {
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
hashTypeTryConversion(o,c->argv,2,c->argc-1);
- for (i = 2; i < c->argc; i += 2) {
- hashTypeTryObjectEncoding(o,&c->argv[i], &c->argv[i+1]);
- hashTypeSet(o,c->argv[i],c->argv[i+1]);
+
+ for (i = 2; i < c->argc; i += 2)
+ created += !hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY);
+
+ /* HMSET (deprecated) and HSET return value is different. */
+ char *cmdname = c->argv[0]->ptr;
+ if (cmdname[1] == 's' || cmdname[1] == 'S') {
+ /* HSET */
+ addReplyLongLong(c, created);
+ } else {
+ /* HMSET */
+ addReply(c, shared.ok);
}
- addReply(c, shared.ok);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty++;
}
-void hincrbyCommand(redisClient *c) {
+void hincrbyCommand(client *c) {
long long value, incr, oldvalue;
- robj *o, *current, *new;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
- if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- if ((current = hashTypeGetObject(o,c->argv[2])) != NULL) {
- if (getLongLongFromObjectOrReply(c,current,&value,
- "hash value is not an integer") != REDIS_OK) {
- decrRefCount(current);
- return;
- }
- decrRefCount(current);
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&value) == C_OK) {
+ if (vstr) {
+ if (string2ll((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not an integer");
+ return;
+ }
+ } /* Else hashTypeGetValue() already stored it into &value */
} else {
value = 0;
}
@@ -537,53 +583,61 @@ void hincrbyCommand(redisClient *c) {
return;
}
value += incr;
- new = createStringObjectFromLongLong(value);
- hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
- hashTypeSet(o,c->argv[2],new);
- decrRefCount(new);
+ new = sdsfromlonglong(value);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
addReplyLongLong(c,value);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
server.dirty++;
}
-void hincrbyfloatCommand(redisClient *c) {
- double long value, incr;
- robj *o, *current, *new, *aux;
+void hincrbyfloatCommand(client *c) {
+ long double value, incr;
+ long long ll;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
- if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
+ if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- if ((current = hashTypeGetObject(o,c->argv[2])) != NULL) {
- if (getLongDoubleFromObjectOrReply(c,current,&value,
- "hash value is not a valid float") != REDIS_OK) {
- decrRefCount(current);
- return;
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&ll) == C_OK) {
+ if (vstr) {
+ if (string2ld((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not a float");
+ return;
+ }
+ } else {
+ value = (long double)ll;
}
- decrRefCount(current);
} else {
value = 0;
}
value += incr;
- new = createStringObjectFromLongDouble(value);
- hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
- hashTypeSet(o,c->argv[2],new);
- addReplyBulk(c,new);
+
+ char buf[256];
+ int len = ld2string(buf,sizeof(buf),value,1);
+ new = sdsnewlen(buf,len);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+ addReplyBulkCBuffer(c,buf,len);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
server.dirty++;
/* Always replicate HINCRBYFLOAT as an HSET command with the final value
* in order to make sure that differences in float pricision or formatting
* will not create differences in replicas or after an AOF restart. */
+ robj *aux, *newobj;
aux = createStringObject("HSET",4);
+ newobj = createRawStringObject(buf,len);
rewriteClientCommandArgument(c,0,aux);
decrRefCount(aux);
- rewriteClientCommandArgument(c,3,new);
- decrRefCount(new);
+ rewriteClientCommandArgument(c,3,newobj);
+ decrRefCount(newobj);
}
-static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
+static void addHashFieldToReply(client *c, robj *o, sds field) {
int ret;
if (o == NULL) {
@@ -591,7 +645,7 @@ static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
return;
}
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
@@ -607,57 +661,53 @@ static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
}
}
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- ret = hashTypeGetFromHashTable(o, field, &value);
- if (ret < 0) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeGetFromHashTable(o, field);
+ if (value == NULL)
addReply(c, shared.nullbulk);
- } else {
- addReplyBulk(c, value);
- }
-
+ else
+ addReplyBulkCBuffer(c, value, sdslen(value));
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
-void hgetCommand(redisClient *c) {
+void hgetCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
- addHashFieldToReply(c, o, c->argv[2]);
+ addHashFieldToReply(c, o, c->argv[2]->ptr);
}
-void hmgetCommand(redisClient *c) {
+void hmgetCommand(client *c) {
robj *o;
int i;
/* Don't abort when the key cannot be found. Non-existing keys are empty
* hashes, where HMGET should respond with a series of null bulks. */
o = lookupKeyRead(c->db, c->argv[1]);
- if (o != NULL && o->type != REDIS_HASH) {
+ if (o != NULL && o->type != OBJ_HASH) {
addReply(c, shared.wrongtypeerr);
return;
}
addReplyMultiBulkLen(c, c->argc-2);
for (i = 2; i < c->argc; i++) {
- addHashFieldToReply(c, o, c->argv[i]);
+ addHashFieldToReply(c, o, c->argv[i]->ptr);
}
}
-void hdelCommand(redisClient *c) {
+void hdelCommand(client *c) {
robj *o;
int j, deleted = 0, keyremoved = 0;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
for (j = 2; j < c->argc; j++) {
- if (hashTypeDelete(o,c->argv[j])) {
+ if (hashTypeDelete(o,c->argv[j]->ptr)) {
deleted++;
if (hashTypeLength(o) == 0) {
dbDelete(c->db,c->argv[1]);
@@ -668,104 +718,108 @@ void hdelCommand(redisClient *c) {
}
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
-void hlenCommand(redisClient *c) {
+void hlenCommand(client *c) {
robj *o;
+
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
addReplyLongLong(c,hashTypeLength(o));
}
-static void addHashIteratorCursorToReply(redisClient *c, hashTypeIterator *hi, int what) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+void hstrlenCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+ addReplyLongLong(c,hashTypeGetValueLength(o,c->argv[2]->ptr));
+}
+
+static void addHashIteratorCursorToReply(client *c, hashTypeIterator *hi, int what) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
+ if (vstr)
addReplyBulkCBuffer(c, vstr, vlen);
- } else {
+ else
addReplyBulkLongLong(c, vll);
- }
-
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- hashTypeCurrentFromHashTable(hi, what, &value);
- addReplyBulk(c, value);
-
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ addReplyBulkCBuffer(c, value, sdslen(value));
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
-void genericHgetallCommand(redisClient *c, int flags) {
+void genericHgetallCommand(client *c, int flags) {
robj *o;
hashTypeIterator *hi;
int multiplier = 0;
int length, count = 0;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_HASH)) return;
+ || checkType(c,o,OBJ_HASH)) return;
- if (flags & REDIS_HASH_KEY) multiplier++;
- if (flags & REDIS_HASH_VALUE) multiplier++;
+ if (flags & OBJ_HASH_KEY) multiplier++;
+ if (flags & OBJ_HASH_VALUE) multiplier++;
length = hashTypeLength(o) * multiplier;
addReplyMultiBulkLen(c, length);
hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
- if (flags & REDIS_HASH_KEY) {
- addHashIteratorCursorToReply(c, hi, REDIS_HASH_KEY);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (flags & OBJ_HASH_KEY) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
count++;
}
- if (flags & REDIS_HASH_VALUE) {
- addHashIteratorCursorToReply(c, hi, REDIS_HASH_VALUE);
+ if (flags & OBJ_HASH_VALUE) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
count++;
}
}
hashTypeReleaseIterator(hi);
- redisAssert(count == length);
+ serverAssert(count == length);
}
-void hkeysCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY);
+void hkeysCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY);
}
-void hvalsCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_VALUE);
+void hvalsCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_VALUE);
}
-void hgetallCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY|REDIS_HASH_VALUE);
+void hgetallCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY|OBJ_HASH_VALUE);
}
-void hexistsCommand(redisClient *c) {
+void hexistsCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
- addReply(c, hashTypeExists(o,c->argv[2]) ? shared.cone : shared.czero);
+ addReply(c, hashTypeExists(o,c->argv[2]->ptr) ? shared.cone : shared.czero);
}
-void hscanCommand(redisClient *c) {
+void hscanCommand(client *c) {
robj *o;
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
scanGenericCommand(c,o,cursor);
}
diff --git a/src/t_list.c b/src/t_list.c
index 7c79185fd..a0a30998d 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -27,116 +27,82 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* List API
*----------------------------------------------------------------------------*/
-/* Check the argument length to see if it requires us to convert the ziplist
- * to a real list. Only check raw-encoded objects because integer encoded
- * objects are never too long. */
-void listTypeTryConversion(robj *subject, robj *value) {
- if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
- if (sdsEncodedObject(value) &&
- sdslen(value->ptr) > server.list_max_ziplist_value)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-}
-
/* The function pushes an element to the specified list object 'subject',
* at head or tail position as specified by 'where'.
*
* There is no need for the caller to increment the refcount of 'value' as
* the function takes care of it if needed. */
void listTypePush(robj *subject, robj *value, int where) {
- /* Check if we need to convert the ziplist */
- listTypeTryConversion(subject,value);
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ int pos = (where == LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
value = getDecodedObject(value);
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
+ size_t len = sdslen(value->ptr);
+ quicklistPush(subject->ptr, value->ptr, len, pos);
decrRefCount(value);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_HEAD) {
- listAddNodeHead(subject->ptr,value);
- } else {
- listAddNodeTail(subject->ptr,value);
- }
- incrRefCount(value);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
+void *listPopSaver(unsigned char *data, unsigned int sz) {
+ return createStringObject((char*)data,sz);
+}
+
robj *listTypePop(robj *subject, int where) {
+ long long vlong;
robj *value = NULL;
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- int pos = (where == REDIS_HEAD) ? 0 : -1;
- p = ziplistIndex(subject->ptr,pos);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
+
+ int ql_where = where == LIST_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
+ NULL, &vlong, listPopSaver)) {
+ if (!value)
value = createStringObjectFromLongLong(vlong);
- }
- /* We only need to delete an element when it exists */
- subject->ptr = ziplistDelete(subject->ptr,&p);
- }
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = subject->ptr;
- listNode *ln;
- if (where == REDIS_HEAD) {
- ln = listFirst(list);
- } else {
- ln = listLast(list);
- }
- if (ln != NULL) {
- value = listNodeValue(ln);
- incrRefCount(value);
- listDelNode(list,ln);
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return value;
}
-unsigned long listTypeLength(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- return ziplistLen(subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- return listLength((list*)subject->ptr);
+unsigned long listTypeLength(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ return quicklistCount(subject->ptr);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Initialize an iterator at the specified index. */
-listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction) {
+listTypeIterator *listTypeInitIterator(robj *subject, long index,
+ unsigned char direction) {
listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
li->subject = subject;
li->encoding = subject->encoding;
li->direction = direction;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- li->zi = ziplistIndex(subject->ptr,index);
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- li->ln = listIndex(subject->ptr,index);
+ li->iter = NULL;
+ /* LIST_HEAD means start at TAIL and move *towards* head.
+ * LIST_TAIL means start at HEAD and move *towards tail. */
+ int iter_direction =
+ direction == LIST_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
+ li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
+ iter_direction, index);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return li;
}
/* Clean up the iterator. */
void listTypeReleaseIterator(listTypeIterator *li) {
+ zfree(li->iter);
zfree(li);
}
@@ -145,146 +111,82 @@ void listTypeReleaseIterator(listTypeIterator *li) {
* entry is in fact an entry, 0 otherwise. */
int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Protect from converting when iterating */
- redisAssert(li->subject->encoding == li->encoding);
+ serverAssert(li->subject->encoding == li->encoding);
entry->li = li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- entry->zi = li->zi;
- if (entry->zi != NULL) {
- if (li->direction == REDIS_TAIL)
- li->zi = ziplistNext(li->subject->ptr,li->zi);
- else
- li->zi = ziplistPrev(li->subject->ptr,li->zi);
- return 1;
- }
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- entry->ln = li->ln;
- if (entry->ln != NULL) {
- if (li->direction == REDIS_TAIL)
- li->ln = li->ln->next;
- else
- li->ln = li->ln->prev;
- return 1;
- }
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
+ return quicklistNext(li->iter, &entry->entry);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return 0;
}
/* Return entry or NULL at the current position of the iterator. */
robj *listTypeGet(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
robj *value = NULL;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- redisAssert(entry->zi != NULL);
- if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ if (entry->entry.value) {
+ value = createStringObject((char *)entry->entry.value,
+ entry->entry.sz);
+ } else {
+ value = createStringObjectFromLongLong(entry->entry.longval);
}
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- redisAssert(entry->ln != NULL);
- value = listNodeValue(entry->ln);
- incrRefCount(value);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return value;
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
- robj *subject = entry->li->subject;
- if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
value = getDecodedObject(value);
- if (where == REDIS_TAIL) {
- unsigned char *next = ziplistNext(subject->ptr,entry->zi);
-
- /* When we insert after the current element, but the current element
- * is the tail of the list, we need to do a push. */
- if (next == NULL) {
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
- } else {
- subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
- }
- } else {
- subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
+ sds str = value->ptr;
+ size_t len = sdslen(str);
+ if (where == LIST_TAIL) {
+ quicklistInsertAfter((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
+ } else if (where == LIST_HEAD) {
+ quicklistInsertBefore((quicklist *)entry->entry.quicklist,
+ &entry->entry, str, len);
}
decrRefCount(value);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_TAIL) {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
- } else {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
- }
- incrRefCount(value);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Compare the given object with the entry at the current position. */
int listTypeEqual(listTypeEntry *entry, robj *o) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- redisAssertWithInfo(NULL,o,sdsEncodedObject(o));
- return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- return equalStringObjects(o,listNodeValue(entry->ln));
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ serverAssertWithInfo(NULL,o,sdsEncodedObject(o));
+ return quicklistCompare(entry->entry.zi,o->ptr,sdslen(o->ptr));
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Delete the element pointed to. */
-void listTypeDelete(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = entry->zi;
- li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
-
- /* Update position of the iterator depending on the direction */
- if (li->direction == REDIS_TAIL)
- li->zi = p;
- else
- li->zi = ziplistPrev(li->subject->ptr,p);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *next;
- if (li->direction == REDIS_TAIL)
- next = entry->ln->next;
- else
- next = entry->ln->prev;
- listDelNode(li->subject->ptr,entry->ln);
- li->ln = next;
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistDelEntry(iter->iter, &entry->entry);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
+/* Create a quicklist from a single ziplist */
void listTypeConvert(robj *subject, int enc) {
- listTypeIterator *li;
- listTypeEntry entry;
- redisAssertWithInfo(NULL,subject,subject->type == REDIS_LIST);
-
- if (enc == REDIS_ENCODING_LINKEDLIST) {
- list *l = listCreate();
- listSetFreeMethod(l,decrRefCountVoid);
-
- /* listTypeGet returns a robj with incremented refcount */
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
- listTypeReleaseIterator(li);
-
- subject->encoding = REDIS_ENCODING_LINKEDLIST;
- zfree(subject->ptr);
- subject->ptr = l;
+ serverAssertWithInfo(NULL,subject,subject->type==OBJ_LIST);
+ serverAssertWithInfo(NULL,subject,subject->encoding==OBJ_ENCODING_ZIPLIST);
+
+ if (enc == OBJ_ENCODING_QUICKLIST) {
+ size_t zlen = server.list_max_ziplist_size;
+ int depth = server.list_compress_depth;
+ subject->ptr = quicklistCreateFromZiplist(zlen, depth, subject->ptr);
+ subject->encoding = OBJ_ENCODING_QUICKLIST;
} else {
- redisPanic("Unsupported list conversion");
+ serverPanic("Unsupported list conversion");
}
}
@@ -292,220 +194,191 @@ void listTypeConvert(robj *subject, int enc) {
* List Commands
*----------------------------------------------------------------------------*/
-void pushGenericCommand(redisClient *c, int where) {
- int j, waiting = 0, pushed = 0;
+void pushGenericCommand(client *c, int where) {
+ int j, pushed = 0;
robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
- if (lobj && lobj->type != REDIS_LIST) {
+ if (lobj && lobj->type != OBJ_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
for (j = 2; j < c->argc; j++) {
- c->argv[j] = tryObjectEncoding(c->argv[j]);
if (!lobj) {
- lobj = createZiplistObject();
+ lobj = createQuicklistObject();
+ quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,c->argv[1],lobj);
}
listTypePush(lobj,c->argv[j],where);
pushed++;
}
- addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0));
+ addReplyLongLong(c, (lobj ? listTypeLength(lobj) : 0));
if (pushed) {
- char *event = (where == REDIS_HEAD) ? "lpush" : "rpush";
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
}
server.dirty += pushed;
}
-void lpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_HEAD);
+void lpushCommand(client *c) {
+ pushGenericCommand(c,LIST_HEAD);
}
-void rpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_TAIL);
+void rpushCommand(client *c) {
+ pushGenericCommand(c,LIST_TAIL);
}
-void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
+void pushxGenericCommand(client *c, int where) {
+ int j, pushed = 0;
robj *subject;
- listTypeIterator *iter;
- listTypeEntry entry;
- int inserted = 0;
-
- if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,subject,REDIS_LIST)) return;
-
- if (refval != NULL) {
- /* We're not sure if this value can be inserted yet, but we cannot
- * convert the list inside the iterator. We don't want to loop over
- * the list twice (once to see if the value can be inserted and once
- * to do the actual insert), so we assume this value can be inserted
- * and convert the ziplist to a regular list if necessary. */
- listTypeTryConversion(subject,val);
-
- /* Seek refval from head to tail */
- iter = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(iter,&entry)) {
- if (listTypeEqual(&entry,refval)) {
- listTypeInsert(&entry,val,where);
- inserted = 1;
- break;
- }
- }
- listTypeReleaseIterator(iter);
- if (inserted) {
- /* Check if the length exceeds the ziplist length threshold. */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"linsert",
- c->argv[1],c->db->id);
- server.dirty++;
- } else {
- /* Notify client of a failed insert */
- addReply(c,shared.cnegone);
- return;
- }
- } else {
- char *event = (where == REDIS_HEAD) ? "lpush" : "rpush";
+ if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,subject,OBJ_LIST)) return;
- listTypePush(subject,val,where);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
- server.dirty++;
+ for (j = 2; j < c->argc; j++) {
+ listTypePush(subject,c->argv[j],where);
+ pushed++;
}
addReplyLongLong(c,listTypeLength(subject));
+
+ if (pushed) {
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
+ }
+ server.dirty += pushed;
}
-void lpushxCommand(redisClient *c) {
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
+void lpushxCommand(client *c) {
+ pushxGenericCommand(c,LIST_HEAD);
}
-void rpushxCommand(redisClient *c) {
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
+void rpushxCommand(client *c) {
+ pushxGenericCommand(c,LIST_TAIL);
}
-void linsertCommand(redisClient *c) {
- c->argv[4] = tryObjectEncoding(c->argv[4]);
+void linsertCommand(client *c) {
+ int where;
+ robj *subject;
+ listTypeIterator *iter;
+ listTypeEntry entry;
+ int inserted = 0;
+
if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
+ where = LIST_TAIL;
} else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
+ where = LIST_HEAD;
} else {
addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,subject,OBJ_LIST)) return;
+
+ /* Seek pivot from head to tail */
+ iter = listTypeInitIterator(subject,0,LIST_TAIL);
+ while (listTypeNext(iter,&entry)) {
+ if (listTypeEqual(&entry,c->argv[3])) {
+ listTypeInsert(&entry,c->argv[4],where);
+ inserted = 1;
+ break;
+ }
}
+ listTypeReleaseIterator(iter);
+
+ if (inserted) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
+ c->argv[1],c->db->id);
+ server.dirty++;
+ } else {
+ /* Notify client of a failed insert */
+ addReply(c,shared.cnegone);
+ return;
+ }
+
+ addReplyLongLong(c,listTypeLength(subject));
}
-void llenCommand(redisClient *c) {
+void llenCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
addReplyLongLong(c,listTypeLength(o));
}
-void lindexCommand(redisClient *c) {
+void lindexCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
long index;
robj *value = NULL;
- if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
return;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- p = ziplistIndex(o->ptr,index);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistEntry entry;
+ if (quicklistIndex(o->ptr, index, &entry)) {
+ if (entry.value) {
+ value = createStringObject((char*)entry.value,entry.sz);
} else {
- value = createStringObjectFromLongLong(vlong);
+ value = createStringObjectFromLongLong(entry.longval);
}
addReplyBulk(c,value);
decrRefCount(value);
} else {
addReply(c,shared.nullbulk);
}
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln != NULL) {
- value = listNodeValue(ln);
- addReplyBulk(c,value);
- } else {
- addReply(c,shared.nullbulk);
- }
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
-void lsetCommand(redisClient *c) {
+void lsetCommand(client *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
long index;
- robj *value = (c->argv[3] = tryObjectEncoding(c->argv[3]));
+ robj *value = c->argv[3];
- if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
return;
- listTypeTryConversion(o,value);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p, *zl = o->ptr;
- p = ziplistIndex(zl,index);
- if (p == NULL) {
- addReply(c,shared.outofrangeerr);
- } else {
- o->ptr = ziplistDelete(o->ptr,&p);
- value = getDecodedObject(value);
- o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
- decrRefCount(value);
- addReply(c,shared.ok);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
- server.dirty++;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln == NULL) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ int replaced = quicklistReplaceAtIndex(ql, index,
+ value->ptr, sdslen(value->ptr));
+ if (!replaced) {
addReply(c,shared.outofrangeerr);
} else {
- decrRefCount((robj*)listNodeValue(ln));
- listNodeValue(ln) = value;
- incrRefCount(value);
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id);
server.dirty++;
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
-void popGenericCommand(redisClient *c, int where) {
+void popGenericCommand(client *c, int where) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
robj *value = listTypePop(o,where);
if (value == NULL) {
addReply(c,shared.nullbulk);
} else {
- char *event = (where == REDIS_HEAD) ? "lpop" : "rpop";
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
addReplyBulk(c,value);
decrRefCount(value);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
if (listTypeLength(o) == 0) {
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[1],c->db->id);
dbDelete(c->db,c->argv[1]);
}
@@ -514,23 +387,23 @@ void popGenericCommand(redisClient *c, int where) {
}
}
-void lpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_HEAD);
+void lpopCommand(client *c) {
+ popGenericCommand(c,LIST_HEAD);
}
-void rpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_TAIL);
+void rpopCommand(client *c) {
+ popGenericCommand(c,LIST_TAIL);
}
-void lrangeCommand(redisClient *c) {
+void lrangeCommand(client *c) {
robj *o;
long start, end, llen, rangelen;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_LIST)) return;
+ || checkType(c,o,OBJ_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
@@ -549,49 +422,34 @@ void lrangeCommand(redisClient *c) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = ziplistIndex(o->ptr,start);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ listTypeIterator *iter = listTypeInitIterator(o, start, LIST_TAIL);
while(rangelen--) {
- ziplistGet(p,&vstr,&vlen,&vlong);
- if (vstr) {
- addReplyBulkCBuffer(c,vstr,vlen);
+ listTypeEntry entry;
+ listTypeNext(iter, &entry);
+ quicklistEntry *qe = &entry.entry;
+ if (qe->value) {
+ addReplyBulkCBuffer(c,qe->value,qe->sz);
} else {
- addReplyBulkLongLong(c,vlong);
+ addReplyBulkLongLong(c,qe->longval);
}
- p = ziplistNext(o->ptr,p);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln;
-
- /* If we are nearest to the end of the list, reach the element
- * starting from tail and going backward, as it is faster. */
- if (start > llen/2) start -= llen;
- ln = listIndex(o->ptr,start);
-
- while(rangelen--) {
- addReplyBulk(c,ln->value);
- ln = ln->next;
}
+ listTypeReleaseIterator(iter);
} else {
- redisPanic("List encoding is not LINKEDLIST nor ZIPLIST!");
+ serverPanic("List encoding is not QUICKLIST!");
}
}
-void ltrimCommand(redisClient *c) {
+void ltrimCommand(client *c) {
robj *o;
- long start, end, llen, j, ltrim, rtrim;
- list *list;
- listNode *ln;
+ long start, end, llen, ltrim, rtrim;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
- checkType(c,o,REDIS_LIST)) return;
+ checkType(c,o,OBJ_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
@@ -612,61 +470,47 @@ void ltrimCommand(redisClient *c) {
}
/* Remove list elements to perform the trim */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
- o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list = o->ptr;
- for (j = 0; j < ltrim; j++) {
- ln = listFirst(list);
- listDelNode(list,ln);
- }
- for (j = 0; j < rtrim; j++) {
- ln = listLast(list);
- listDelNode(list,ln);
- }
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklistDelRange(o->ptr,0,ltrim);
+ quicklistDelRange(o->ptr,-rtrim,rtrim);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
if (listTypeLength(o) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.ok);
}
-void lremCommand(redisClient *c) {
+void lremCommand(client *c) {
robj *subject, *obj;
- obj = c->argv[3] = tryObjectEncoding(c->argv[3]);
+ obj = c->argv[3];
long toremove;
long removed = 0;
- listTypeEntry entry;
- if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != C_OK))
return;
subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
- if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
-
- /* Make sure obj is raw when we're dealing with a ziplist */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- obj = getDecodedObject(obj);
+ if (subject == NULL || checkType(c,subject,OBJ_LIST)) return;
listTypeIterator *li;
if (toremove < 0) {
toremove = -toremove;
- li = listTypeInitIterator(subject,-1,REDIS_HEAD);
+ li = listTypeInitIterator(subject,-1,LIST_HEAD);
} else {
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
+ li = listTypeInitIterator(subject,0,LIST_TAIL);
}
+ listTypeEntry entry;
while (listTypeNext(li,&entry)) {
if (listTypeEqual(&entry,obj)) {
- listTypeDelete(&entry);
+ listTypeDelete(li, &entry);
server.dirty++;
removed++;
if (toremove && removed == toremove) break;
@@ -674,13 +518,17 @@ void lremCommand(redisClient *c) {
}
listTypeReleaseIterator(li);
- /* Clean up raw encoded object */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- decrRefCount(obj);
+ if (removed) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"lrem",c->argv[1],c->db->id);
+ }
+
+ if (listTypeLength(subject) == 0) {
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ }
- if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
addReplyLongLong(c,removed);
- if (removed) signalModifiedKey(c->db,c->argv[1]);
}
/* This is the semantic of this command:
@@ -699,23 +547,25 @@ void lremCommand(redisClient *c) {
* as well. This command was originally proposed by Ezra Zygmuntowicz.
*/
-void rpoplpushHandlePush(redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
+void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) {
/* Create the list if the key does not exist */
if (!dstobj) {
- dstobj = createZiplistObject();
+ dstobj = createQuicklistObject();
+ quicklistSetOptions(dstobj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
dbAdd(c->db,dstkey,dstobj);
}
signalModifiedKey(c->db,dstkey);
- listTypePush(dstobj,value,REDIS_HEAD);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lpush",dstkey,c->db->id);
+ listTypePush(dstobj,value,LIST_HEAD);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lpush",dstkey,c->db->id);
/* Always send the pushed value to the client. */
addReplyBulk(c,value);
}
-void rpoplpushCommand(redisClient *c) {
+void rpoplpushCommand(client *c) {
robj *sobj, *value;
if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,sobj,REDIS_LIST)) return;
+ checkType(c,sobj,OBJ_LIST)) return;
if (listTypeLength(sobj) == 0) {
/* This may only happen after loading very old RDB files. Recent
@@ -725,8 +575,8 @@ void rpoplpushCommand(redisClient *c) {
robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
robj *touchedkey = c->argv[1];
- if (dobj && checkType(c,dobj,REDIS_LIST)) return;
- value = listTypePop(sobj,REDIS_TAIL);
+ if (dobj && checkType(c,dobj,OBJ_LIST)) return;
+ value = listTypePop(sobj,LIST_TAIL);
/* We saved touched key, and protect it, since rpoplpushHandlePush
* may change the client command argument vector (it does not
* currently). */
@@ -737,10 +587,10 @@ void rpoplpushCommand(redisClient *c) {
decrRefCount(value);
/* Delete the source list when it is empty */
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"rpop",touchedkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"rpop",touchedkey,c->db->id);
if (listTypeLength(sobj) == 0) {
dbDelete(c->db,touchedkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
touchedkey,c->db->id);
}
signalModifiedKey(c->db,touchedkey);
@@ -772,7 +622,7 @@ void rpoplpushCommand(redisClient *c) {
/* Set a client in blocking mode for the specified key, with the specified
* timeout */
-void blockForKeys(redisClient *c, robj **keys, int numkeys, mstime_t timeout, robj *target) {
+void blockForKeys(client *c, robj **keys, int numkeys, mstime_t timeout, robj *target) {
dictEntry *de;
list *l;
int j;
@@ -796,23 +646,23 @@ void blockForKeys(redisClient *c, robj **keys, int numkeys, mstime_t timeout, ro
l = listCreate();
retval = dictAdd(c->db->blocking_keys,keys[j],l);
incrRefCount(keys[j]);
- redisAssertWithInfo(c,keys[j],retval == DICT_OK);
+ serverAssertWithInfo(c,keys[j],retval == DICT_OK);
} else {
l = dictGetVal(de);
}
listAddNodeTail(l,c);
}
- blockClient(c,REDIS_BLOCKED_LIST);
+ blockClient(c,BLOCKED_LIST);
}
/* Unblock a client that's waiting in a blocking operation such as BLPOP.
* You should never call this function directly, but unblockClient() instead. */
-void unblockClientWaitingData(redisClient *c) {
+void unblockClientWaitingData(client *c) {
dictEntry *de;
dictIterator *di;
list *l;
- redisAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
+ serverAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
di = dictGetIterator(c->bpop.keys);
/* The client may wait for multiple keys, so unblock it for every key. */
while((de = dictNext(di)) != NULL) {
@@ -820,7 +670,7 @@ void unblockClientWaitingData(redisClient *c) {
/* Remove this client from the list of clients waiting for this key. */
l = dictFetchValue(c->db->blocking_keys,key);
- redisAssertWithInfo(c,key,l != NULL);
+ serverAssertWithInfo(c,key,l != NULL);
listDelNode(l,listSearchKey(l,c));
/* If the list is empty we need to remove it to avoid wasting memory */
if (listLength(l) == 0)
@@ -863,7 +713,7 @@ void signalListAsReady(redisDb *db, robj *key) {
* to avoid adding it multiple times into a list with a simple O(1)
* check. */
incrRefCount(key);
- redisAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
+ serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
}
/* This is a helper function for handleClientsBlockedOnLists(). It's work
@@ -876,27 +726,27 @@ void signalListAsReady(redisDb *db, robj *key) {
* 3) Propagate the resulting BRPOP, BLPOP and additional LPUSH if any into
* the AOF and replication channel.
*
- * The argument 'where' is REDIS_TAIL or REDIS_HEAD, and indicates if the
+ * The argument 'where' is LIST_TAIL or LIST_HEAD, and indicates if the
* 'value' element was popped fron the head (BLPOP) or tail (BRPOP) so that
* we can propagate the command properly.
*
- * The function returns REDIS_OK if we are able to serve the client, otherwise
- * REDIS_ERR is returned to signal the caller that the list POP operation
+ * The function returns C_OK if we are able to serve the client, otherwise
+ * C_ERR is returned to signal the caller that the list POP operation
* should be undone as the client was not served: This only happens for
* BRPOPLPUSH that fails to push the value to the destination key as it is
* of the wrong type. */
-int serveClientBlockedOnList(redisClient *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where)
+int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where)
{
robj *argv[3];
if (dstkey == NULL) {
/* Propagate the [LR]POP operation. */
- argv[0] = (where == REDIS_HEAD) ? shared.lpop :
+ argv[0] = (where == LIST_HEAD) ? shared.lpop :
shared.rpop;
argv[1] = key;
- propagate((where == REDIS_HEAD) ?
+ propagate((where == LIST_HEAD) ?
server.lpopCommand : server.rpopCommand,
- db->id,argv,2,REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+ db->id,argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
/* BRPOP/BLPOP */
addReplyMultiBulkLen(receiver,2);
@@ -907,15 +757,15 @@ int serveClientBlockedOnList(redisClient *receiver, robj *key, robj *dstkey, red
robj *dstobj =
lookupKeyWrite(receiver->db,dstkey);
if (!(dstobj &&
- checkType(receiver,dstobj,REDIS_LIST)))
+ checkType(receiver,dstobj,OBJ_LIST)))
{
/* Propagate the RPOP operation. */
argv[0] = shared.rpop;
argv[1] = key;
propagate(server.rpopCommand,
db->id,argv,2,
- REDIS_PROPAGATE_AOF|
- REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|
+ PROPAGATE_REPL);
rpoplpushHandlePush(receiver,dstkey,dstobj,
value);
/* Propagate the LPUSH operation. */
@@ -924,15 +774,15 @@ int serveClientBlockedOnList(redisClient *receiver, robj *key, robj *dstkey, red
argv[2] = value;
propagate(server.lpushCommand,
db->id,argv,3,
- REDIS_PROPAGATE_AOF|
- REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|
+ PROPAGATE_REPL);
} else {
/* BRPOPLPUSH failed because of wrong
* destination type. */
- return REDIS_ERR;
+ return C_ERR;
}
}
- return REDIS_OK;
+ return C_OK;
}
/* This function should be called by Redis every time a single command,
@@ -967,7 +817,7 @@ void handleClientsBlockedOnLists(void) {
/* If the key exists and it's a list, serve blocked clients
* with data. */
robj *o = lookupKeyWrite(rl->db,rl->key);
- if (o != NULL && o->type == REDIS_LIST) {
+ if (o != NULL && o->type == OBJ_LIST) {
dictEntry *de;
/* We serve clients in the same order they blocked for
@@ -979,11 +829,11 @@ void handleClientsBlockedOnLists(void) {
while(numclients--) {
listNode *clientnode = listFirst(clients);
- redisClient *receiver = clientnode->value;
+ client *receiver = clientnode->value;
robj *dstkey = receiver->bpop.target;
int where = (receiver->lastcmd &&
receiver->lastcmd->proc == blpopCommand) ?
- REDIS_HEAD : REDIS_TAIL;
+ LIST_HEAD : LIST_TAIL;
robj *value = listTypePop(o,where);
if (value) {
@@ -995,7 +845,7 @@ void handleClientsBlockedOnLists(void) {
if (serveClientBlockedOnList(receiver,
rl->key,dstkey,rl->db,value,
- where) == REDIS_ERR)
+ where) == C_ERR)
{
/* If we failed serving the client we need
* to also undo the POP operation. */
@@ -1010,7 +860,9 @@ void handleClientsBlockedOnLists(void) {
}
}
- if (listTypeLength(o) == 0) dbDelete(rl->db,rl->key);
+ if (listTypeLength(o) == 0) {
+ dbDelete(rl->db,rl->key);
+ }
/* We don't call signalModifiedKey() as it was already called
* when an element was pushed on the list. */
}
@@ -1025,36 +877,36 @@ void handleClientsBlockedOnLists(void) {
}
/* Blocking RPOP/LPOP */
-void blockingPopGenericCommand(redisClient *c, int where) {
+void blockingPopGenericCommand(client *c, int where) {
robj *o;
mstime_t timeout;
int j;
if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout,UNIT_SECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
for (j = 1; j < c->argc-1; j++) {
o = lookupKeyWrite(c->db,c->argv[j]);
if (o != NULL) {
- if (o->type != REDIS_LIST) {
+ if (o->type != OBJ_LIST) {
addReply(c,shared.wrongtypeerr);
return;
} else {
if (listTypeLength(o) != 0) {
/* Non empty list, this is like a non normal [LR]POP. */
- char *event = (where == REDIS_HEAD) ? "lpop" : "rpop";
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
robj *value = listTypePop(o,where);
- redisAssert(value != NULL);
+ serverAssert(value != NULL);
addReplyMultiBulkLen(c,2);
addReplyBulk(c,c->argv[j]);
addReplyBulk(c,value);
decrRefCount(value);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,
+ notifyKeyspaceEvent(NOTIFY_LIST,event,
c->argv[j],c->db->id);
if (listTypeLength(o) == 0) {
dbDelete(c->db,c->argv[j]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[j],c->db->id);
}
signalModifiedKey(c->db,c->argv[j]);
@@ -1062,7 +914,7 @@ void blockingPopGenericCommand(redisClient *c, int where) {
/* Replicate it as an [LR]POP instead of B[LR]POP. */
rewriteClientCommandVector(c,2,
- (where == REDIS_HEAD) ? shared.lpop : shared.rpop,
+ (where == LIST_HEAD) ? shared.lpop : shared.rpop,
c->argv[j]);
return;
}
@@ -1072,7 +924,7 @@ void blockingPopGenericCommand(redisClient *c, int where) {
/* If we are inside a MULTI/EXEC and the list is empty the only thing
* we can do is treating it as a timeout (even with timeout 0). */
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
addReply(c,shared.nullmultibulk);
return;
}
@@ -1081,24 +933,24 @@ void blockingPopGenericCommand(redisClient *c, int where) {
blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL);
}
-void blpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_HEAD);
+void blpopCommand(client *c) {
+ blockingPopGenericCommand(c,LIST_HEAD);
}
-void brpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_TAIL);
+void brpopCommand(client *c) {
+ blockingPopGenericCommand(c,LIST_TAIL);
}
-void brpoplpushCommand(redisClient *c) {
+void brpoplpushCommand(client *c) {
mstime_t timeout;
if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout,UNIT_SECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
robj *key = lookupKeyWrite(c->db, c->argv[1]);
if (key == NULL) {
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
/* Blocking against an empty list in a multi state
* returns immediately. */
addReply(c, shared.nullbulk);
@@ -1107,12 +959,12 @@ void brpoplpushCommand(redisClient *c) {
blockForKeys(c, c->argv + 1, 1, timeout, c->argv[2]);
}
} else {
- if (key->type != REDIS_LIST) {
+ if (key->type != OBJ_LIST) {
addReply(c, shared.wrongtypeerr);
} else {
/* The list exists and has elements, so
* the regular rpoplpushCommand is executed. */
- redisAssertWithInfo(c,key,listTypeLength(key) > 0);
+ serverAssertWithInfo(c,key,listTypeLength(key) > 0);
rpoplpushCommand(c);
}
}
diff --git a/src/t_set.c b/src/t_set.c
index c530d6923..d5a801e11 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -27,86 +27,93 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* Set Commands
*----------------------------------------------------------------------------*/
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
+ robj *dstkey, int op);
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
-robj *setTypeCreate(robj *value) {
- if (isObjectRepresentableAsLongLong(value,NULL) == REDIS_OK)
+robj *setTypeCreate(sds value) {
+ if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
return createIntsetObject();
return createSetObject();
}
-int setTypeAdd(robj *subject, robj *value) {
+/* Add the specified value into a set.
+ *
+ * If the value was already member of the set, nothing is done and 0 is
+ * returned, otherwise the new element is added and 1 is returned. */
+int setTypeAdd(robj *subject, sds value) {
long long llval;
- if (subject->encoding == REDIS_ENCODING_HT) {
- if (dictAdd(subject->ptr,value,NULL) == DICT_OK) {
- incrRefCount(value);
+ if (subject->encoding == OBJ_ENCODING_HT) {
+ dict *ht = subject->ptr;
+ dictEntry *de = dictAddRaw(ht,value,NULL);
+ if (de) {
+ dictSetKey(ht,de,sdsdup(value));
+ dictSetVal(ht,de,NULL);
return 1;
}
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
uint8_t success = 0;
subject->ptr = intsetAdd(subject->ptr,llval,&success);
if (success) {
/* Convert to regular set when the intset contains
* too many entries. */
if (intsetLen(subject->ptr) > server.set_max_intset_entries)
- setTypeConvert(subject,REDIS_ENCODING_HT);
+ setTypeConvert(subject,OBJ_ENCODING_HT);
return 1;
}
} else {
/* Failed to get integer from object, convert to regular set. */
- setTypeConvert(subject,REDIS_ENCODING_HT);
+ setTypeConvert(subject,OBJ_ENCODING_HT);
/* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
- redisAssertWithInfo(NULL,value,dictAdd(subject->ptr,value,NULL) == DICT_OK);
- incrRefCount(value);
+ serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
return 1;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
-int setTypeRemove(robj *setobj, robj *value) {
+int setTypeRemove(robj *setobj, sds value) {
long long llval;
- if (setobj->encoding == REDIS_ENCODING_HT) {
+ if (setobj->encoding == OBJ_ENCODING_HT) {
if (dictDelete(setobj->ptr,value) == DICT_OK) {
if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
return 1;
}
- } else if (setobj->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
int success;
setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
if (success) return 1;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
-int setTypeIsMember(robj *subject, robj *value) {
+int setTypeIsMember(robj *subject, sds value) {
long long llval;
- if (subject->encoding == REDIS_ENCODING_HT) {
+ if (subject->encoding == OBJ_ENCODING_HT) {
return dictFind((dict*)subject->ptr,value) != NULL;
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
return intsetFind((intset*)subject->ptr,llval);
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
@@ -115,18 +122,18 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
setTypeIterator *si = zmalloc(sizeof(setTypeIterator));
si->subject = subject;
si->encoding = subject->encoding;
- if (si->encoding == REDIS_ENCODING_HT) {
+ if (si->encoding == OBJ_ENCODING_HT) {
si->di = dictGetIterator(subject->ptr);
- } else if (si->encoding == REDIS_ENCODING_INTSET) {
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
si->ii = 0;
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return si;
}
void setTypeReleaseIterator(setTypeIterator *si) {
- if (si->encoding == REDIS_ENCODING_HT)
+ if (si->encoding == OBJ_ENCODING_HT)
dictReleaseIterator(si->di);
zfree(si);
}
@@ -134,55 +141,60 @@ void setTypeReleaseIterator(setTypeIterator *si) {
/* Move to the next entry in the set. Returns the object at the current
* position.
*
- * Since set elements can be internally be stored as redis objects or
+ * Since set elements can be internally be stored as SDS strings or
* simple arrays of integers, setTypeNext returns the encoding of the
* set object you are iterating, and will populate the appropriate pointer
- * (eobj) or (llobj) accordingly.
+ * (sdsele) or (llele) accordingly.
+ *
+ * Note that both the sdsele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused.
*
- * When there are no longer elements -1 is returned.
- * Returned objects ref count is not incremented, so this function is
- * copy on write friendly. */
-int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele) {
- if (si->encoding == REDIS_ENCODING_HT) {
+ * When there are no longer elements -1 is returned. */
+int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
+ if (si->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -1;
- *objele = dictGetKey(de);
- } else if (si->encoding == REDIS_ENCODING_INTSET) {
+ *sdsele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -1;
+ *sdsele = NULL; /* Not needed. Defensive. */
+ } else {
+ serverPanic("Wrong set encoding in setTypeNext");
}
return si->encoding;
}
/* The not copy on write friendly version but easy to use version
- * of setTypeNext() is setTypeNextObject(), returning new objects
- * or incrementing the ref count of returned objects. So if you don't
- * retain a pointer to this object you should call decrRefCount() against it.
+ * of setTypeNext() is setTypeNextObject(), returning new SDS
+ * strings. So if you don't retain a pointer to this object you should call
+ * sdsfree() against it.
*
* This function is the way to go for write operations where COW is not
- * an issue as the result will be anyway of incrementing the ref count. */
-robj *setTypeNextObject(setTypeIterator *si) {
+ * an issue. */
+sds setTypeNextObject(setTypeIterator *si) {
int64_t intele;
- robj *objele;
+ sds sdsele;
int encoding;
- encoding = setTypeNext(si,&objele,&intele);
+ encoding = setTypeNext(si,&sdsele,&intele);
switch(encoding) {
case -1: return NULL;
- case REDIS_ENCODING_INTSET:
- return createStringObjectFromLongLong(intele);
- case REDIS_ENCODING_HT:
- incrRefCount(objele);
- return objele;
+ case OBJ_ENCODING_INTSET:
+ return sdsfromlonglong(intele);
+ case OBJ_ENCODING_HT:
+ return sdsdup(sdsele);
default:
- redisPanic("Unsupported encoding");
+ serverPanic("Unsupported encoding");
}
return NULL; /* just to suppress warnings */
}
/* Return random element from a non empty set.
* The returned element can be a int64_t value if the set is encoded
- * as an "intset" blob of integers, or a redis object if the set
+ * as an "intset" blob of integers, or an SDS string if the set
* is a regular set.
*
* The caller provides both pointers to be populated with the right
@@ -190,28 +202,30 @@ robj *setTypeNextObject(setTypeIterator *si) {
* field of the object and is used by the caller to check if the
* int64_t pointer or the redis object pointer was populated.
*
- * When an object is returned (the set was a real set) the ref count
- * of the object is not incremented so this function can be considered
- * copy on write friendly. */
-int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele) {
- if (setobj->encoding == REDIS_ENCODING_HT) {
+ * Note that both the sdsele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused. */
+int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
+ if (setobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetRandomKey(setobj->ptr);
- *objele = dictGetKey(de);
- } else if (setobj->encoding == REDIS_ENCODING_INTSET) {
+ *sdsele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
*llele = intsetRandom(setobj->ptr);
+ *sdsele = NULL; /* Not needed. Defensive. */
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return setobj->encoding;
}
-unsigned long setTypeSize(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_HT) {
- return dictSize((dict*)subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- return intsetLen((intset*)subject->ptr);
+unsigned long setTypeSize(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_HT) {
+ return dictSize((const dict*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ return intsetLen((const intset*)subject->ptr);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
}
@@ -220,69 +234,68 @@ unsigned long setTypeSize(robj *subject) {
* set. */
void setTypeConvert(robj *setobj, int enc) {
setTypeIterator *si;
- redisAssertWithInfo(NULL,setobj,setobj->type == REDIS_SET &&
- setobj->encoding == REDIS_ENCODING_INTSET);
+ serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
+ setobj->encoding == OBJ_ENCODING_INTSET);
- if (enc == REDIS_ENCODING_HT) {
+ if (enc == OBJ_ENCODING_HT) {
int64_t intele;
dict *d = dictCreate(&setDictType,NULL);
- robj *element;
+ sds element;
/* Presize the dict to avoid rehashing */
dictExpand(d,intsetLen(setobj->ptr));
/* To add the elements we extract integers and create redis objects */
si = setTypeInitIterator(setobj);
- while (setTypeNext(si,NULL,&intele) != -1) {
- element = createStringObjectFromLongLong(intele);
- redisAssertWithInfo(NULL,element,dictAdd(d,element,NULL) == DICT_OK);
+ while (setTypeNext(si,&element,&intele) != -1) {
+ element = sdsfromlonglong(intele);
+ serverAssert(dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
- setobj->encoding = REDIS_ENCODING_HT;
+ setobj->encoding = OBJ_ENCODING_HT;
zfree(setobj->ptr);
setobj->ptr = d;
} else {
- redisPanic("Unsupported set conversion");
+ serverPanic("Unsupported set conversion");
}
}
-void saddCommand(redisClient *c) {
+void saddCommand(client *c) {
robj *set;
int j, added = 0;
set = lookupKeyWrite(c->db,c->argv[1]);
if (set == NULL) {
- set = setTypeCreate(c->argv[2]);
+ set = setTypeCreate(c->argv[2]->ptr);
dbAdd(c->db,c->argv[1],set);
} else {
- if (set->type != REDIS_SET) {
+ if (set->type != OBJ_SET) {
addReply(c,shared.wrongtypeerr);
return;
}
}
for (j = 2; j < c->argc; j++) {
- c->argv[j] = tryObjectEncoding(c->argv[j]);
- if (setTypeAdd(set,c->argv[j])) added++;
+ if (setTypeAdd(set,c->argv[j]->ptr)) added++;
}
if (added) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
}
server.dirty += added;
addReplyLongLong(c,added);
}
-void sremCommand(redisClient *c) {
+void sremCommand(client *c) {
robj *set;
int j, deleted = 0, keyremoved = 0;
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
for (j = 2; j < c->argc; j++) {
- if (setTypeRemove(set,c->argv[j])) {
+ if (setTypeRemove(set,c->argv[j]->ptr)) {
deleted++;
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
@@ -293,20 +306,20 @@ void sremCommand(redisClient *c) {
}
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"srem",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
-void smoveCommand(redisClient *c) {
+void smoveCommand(client *c) {
robj *srcset, *dstset, *ele;
srcset = lookupKeyWrite(c->db,c->argv[1]);
dstset = lookupKeyWrite(c->db,c->argv[2]);
- ele = c->argv[3] = tryObjectEncoding(c->argv[3]);
+ ele = c->argv[3];
/* If the source key does not exist return 0 */
if (srcset == NULL) {
@@ -316,96 +329,278 @@ void smoveCommand(redisClient *c) {
/* If the source key has the wrong type, or the destination key
* is set and has the wrong type, return with an error. */
- if (checkType(c,srcset,REDIS_SET) ||
- (dstset && checkType(c,dstset,REDIS_SET))) return;
+ if (checkType(c,srcset,OBJ_SET) ||
+ (dstset && checkType(c,dstset,OBJ_SET))) return;
/* If srcset and dstset are equal, SMOVE is a no-op */
if (srcset == dstset) {
- addReply(c,shared.cone);
+ addReply(c,setTypeIsMember(srcset,ele->ptr) ?
+ shared.cone : shared.czero);
return;
}
/* If the element cannot be removed from the src set, return 0. */
- if (!setTypeRemove(srcset,ele)) {
+ if (!setTypeRemove(srcset,ele->ptr)) {
addReply(c,shared.czero);
return;
}
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"srem",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
/* Remove the src set from the database when empty */
if (setTypeSize(srcset) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
- signalModifiedKey(c->db,c->argv[1]);
- signalModifiedKey(c->db,c->argv[2]);
- server.dirty++;
/* Create the destination set when it doesn't exist */
if (!dstset) {
- dstset = setTypeCreate(ele);
+ dstset = setTypeCreate(ele->ptr);
dbAdd(c->db,c->argv[2],dstset);
}
+ signalModifiedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[2]);
+ server.dirty++;
+
/* An extra key has changed when ele was successfully added to dstset */
- if (setTypeAdd(dstset,ele)) {
+ if (setTypeAdd(dstset,ele->ptr)) {
server.dirty++;
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sadd",c->argv[2],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id);
}
addReply(c,shared.cone);
}
-void sismemberCommand(redisClient *c) {
+void sismemberCommand(client *c) {
robj *set;
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- if (setTypeIsMember(set,c->argv[2]))
+ if (setTypeIsMember(set,c->argv[2]->ptr))
addReply(c,shared.cone);
else
addReply(c,shared.czero);
}
-void scardCommand(redisClient *c) {
+void scardCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_SET)) return;
+ checkType(c,o,OBJ_SET)) return;
addReplyLongLong(c,setTypeSize(o));
}
-void spopCommand(redisClient *c) {
+/* Handle the "SPOP key <count>" variant. The normal version of the
+ * command is handled by the spopCommand() function itself. */
+
+/* How many times bigger should be the set compared to the remaining size
+ * for us to use the "create new set" strategy? Read later in the
+ * implementation for more info. */
+#define SPOP_MOVE_STRATEGY_MUL 5
+
+void spopWithCountCommand(client *c) {
+ long l;
+ unsigned long count, size;
+ robj *set;
+
+ /* Get the count argument */
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
+ if (l >= 0) {
+ count = (unsigned) l;
+ } else {
+ addReply(c,shared.outofrangeerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set. Otherwise, return nil */
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
+ == NULL || checkType(c,set,OBJ_SET)) return;
+
+ /* If count is zero, serve an empty multibulk ASAP to avoid special
+ * cases later. */
+ if (count == 0) {
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+
+ size = setTypeSize(set);
+
+ /* Generate an SPOP keyspace notification */
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ server.dirty += count;
+
+ /* CASE 1:
+ * The number of requested elements is greater than or equal to
+ * the number of elements inside the set: simply return the whole set. */
+ if (count >= size) {
+ /* We just return the entire set */
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
+
+ /* Delete the set as it is now empty */
+ dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+
+ /* Propagate this command as an DEL operation */
+ rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
+ server.dirty++;
+ return;
+ }
+
+ /* Case 2 and 3 require to replicate SPOP as a set of SREM commands.
+ * Prepare our replication argument vector. Also send the array length
+ * which is common to both the code paths. */
+ robj *propargv[3];
+ propargv[0] = createStringObject("SREM",4);
+ propargv[1] = c->argv[1];
+ addReplyMultiBulkLen(c,count);
+
+ /* Common iteration vars. */
+ sds sdsele;
+ robj *objele;
+ int encoding;
+ int64_t llele;
+ unsigned long remaining = size-count; /* Elements left after SPOP. */
+
+ /* If we are here, the number of requested elements is less than the
+ * number of elements inside the set. Also we are sure that count < size.
+ * Use two different strategies.
+ *
+ * CASE 2: The number of elements to return is small compared to the
+ * set size. We can just extract random elements and return them to
+ * the set. */
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ while(count--) {
+ /* Emit and remove. */
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ addReplyBulkLongLong(c,llele);
+ objele = createStringObjectFromLongLong(llele);
+ set->ptr = intsetRemove(set->ptr,llele,NULL);
+ } else {
+ addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
+ objele = createStringObject(sdsele,sdslen(sdsele));
+ setTypeRemove(set,sdsele);
+ }
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
+ }
+ } else {
+ /* CASE 3: The number of elements to return is very big, approaching
+ * the size of the set itself. After some time extracting random elements
+ * from such a set becomes computationally expensive, so we use
+ * a different strategy, we extract random elements that we don't
+ * want to return (the elements that will remain part of the set),
+ * creating a new set as we do this (that will be stored as the original
+ * set). Then we return the elements left in the original set and
+ * release it. */
+ robj *newset = NULL;
+
+ /* Create a new set with just the remaining elements. */
+ while(remaining--) {
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ sdsele = sdsfromlonglong(llele);
+ } else {
+ sdsele = sdsdup(sdsele);
+ }
+ if (!newset) newset = setTypeCreate(sdsele);
+ setTypeAdd(newset,sdsele);
+ setTypeRemove(set,sdsele);
+ sdsfree(sdsele);
+ }
+
+ /* Assign the new set as the key value. */
+ incrRefCount(set); /* Protect the old set value. */
+ dbOverwrite(c->db,c->argv[1],newset);
+
+ /* Tranfer the old set to the client and release it. */
+ setTypeIterator *si;
+ si = setTypeInitIterator(set);
+ while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) {
+ if (encoding == OBJ_ENCODING_INTSET) {
+ addReplyBulkLongLong(c,llele);
+ objele = createStringObjectFromLongLong(llele);
+ } else {
+ addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
+ objele = createStringObject(sdsele,sdslen(sdsele));
+ }
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
+ }
+ setTypeReleaseIterator(si);
+ decrRefCount(set);
+ }
+
+ /* Don't propagate the command itself even if we incremented the
+ * dirty counter. We don't want to propagate an SPOP command since
+ * we propagated the command as a set of SREMs operations using
+ * the alsoPropagate() API. */
+ decrRefCount(propargv[0]);
+ preventCommandPropagation(c);
+ signalModifiedKey(c->db,c->argv[1]);
+ server.dirty++;
+}
+
+void spopCommand(client *c) {
robj *set, *ele, *aux;
+ sds sdsele;
int64_t llele;
int encoding;
+ if (c->argc == 3) {
+ spopWithCountCommand(c);
+ return;
+ } else if (c->argc > 3) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Make sure a key with the name inputted exists, and that it's type is
+ * indeed a set */
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
+ /* Get a random element from the set */
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
+
+ /* Remove the element from the set */
+ if (encoding == OBJ_ENCODING_INTSET) {
ele = createStringObjectFromLongLong(llele);
set->ptr = intsetRemove(set->ptr,llele,NULL);
} else {
- incrRefCount(ele);
- setTypeRemove(set,ele);
+ ele = createStringObject(sdsele,sdslen(sdsele));
+ setTypeRemove(set,ele->ptr);
}
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
+
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
/* Replicate/AOF this command as an SREM operation */
aux = createStringObject("SREM",4);
rewriteClientCommandVector(c,3,aux,c->argv[1],ele);
- decrRefCount(ele);
decrRefCount(aux);
+ /* Add the element to the reply */
addReplyBulk(c,ele);
+ decrRefCount(ele);
+
+ /* Delete the set if it's empty */
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
+
+ /* Set has been modified */
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
}
@@ -418,17 +613,18 @@ void spopCommand(redisClient *c) {
* implementation for more info. */
#define SRANDMEMBER_SUB_STRATEGY_MUL 3
-void srandmemberWithCountCommand(redisClient *c) {
+void srandmemberWithCountCommand(client *c) {
long l;
unsigned long count, size;
int uniq = 1;
- robj *set, *ele;
+ robj *set;
+ sds ele;
int64_t llele;
int encoding;
dict *d;
- if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
if (l >= 0) {
count = (unsigned) l;
} else {
@@ -439,7 +635,7 @@ void srandmemberWithCountCommand(redisClient *c) {
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
- == NULL || checkType(c,set,REDIS_SET)) return;
+ == NULL || checkType(c,set,OBJ_SET)) return;
size = setTypeSize(set);
/* If count is zero, serve it ASAP to avoid special cases later. */
@@ -456,10 +652,10 @@ void srandmemberWithCountCommand(redisClient *c) {
addReplyMultiBulkLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulk(c,ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
}
}
return;
@@ -469,12 +665,12 @@ void srandmemberWithCountCommand(redisClient *c) {
* The number of requested elements is greater than the number of
* elements inside the set: simply return the whole set. */
if (count >= size) {
- sunionDiffGenericCommand(c,c->argv+1,1,NULL,REDIS_OP_UNION);
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
return;
}
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
- d = dictCreate(&setDictType,NULL);
+ d = dictCreate(&objectKeyPointerValueDictType,NULL);
/* CASE 3:
* The number of elements inside the set is not greater than
@@ -493,15 +689,15 @@ void srandmemberWithCountCommand(redisClient *c) {
while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval = DICT_ERR;
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else {
- retval = dictAdd(d,dupStringObject(ele),NULL);
+ retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
}
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
setTypeReleaseIterator(si);
- redisAssert(dictSize(d) == size);
+ serverAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
while(size > count) {
@@ -519,21 +715,22 @@ void srandmemberWithCountCommand(redisClient *c) {
* to reach the specified count. */
else {
unsigned long added = 0;
+ robj *objele;
while(added < count) {
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
- ele = createStringObjectFromLongLong(llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ objele = createStringObjectFromLongLong(llele);
} else {
- ele = dupStringObject(ele);
+ objele = createStringObject(ele,sdslen(ele));
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
- if (dictAdd(d,ele,NULL) == DICT_OK)
+ if (dictAdd(d,objele,NULL) == DICT_OK)
added++;
else
- decrRefCount(ele);
+ decrRefCount(objele);
}
}
@@ -551,8 +748,9 @@ void srandmemberWithCountCommand(redisClient *c) {
}
}
-void srandmemberCommand(redisClient *c) {
- robj *set, *ele;
+void srandmemberCommand(client *c) {
+ robj *set;
+ sds ele;
int64_t llele;
int encoding;
@@ -565,13 +763,13 @@ void srandmemberCommand(redisClient *c) {
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulk(c,ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
}
}
@@ -587,10 +785,12 @@ int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
return (o2 ? setTypeSize(o2) : 0) - (o1 ? setTypeSize(o1) : 0);
}
-void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum, robj *dstkey) {
+void sinterGenericCommand(client *c, robj **setkeys,
+ unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
- robj *eleobj, *dstset = NULL;
+ robj *dstset = NULL;
+ sds elesds;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
@@ -613,7 +813,7 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum,
}
return;
}
- if (checkType(c,setobj,REDIS_SET)) {
+ if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
@@ -640,38 +840,28 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum,
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
si = setTypeInitIterator(sets[0]);
- while((encoding = setTypeNext(si,&eleobj,&intobj)) != -1) {
+ while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
for (j = 1; j < setnum; j++) {
if (sets[j] == sets[0]) continue;
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
/* intset with intset is simple... and fast */
- if (sets[j]->encoding == REDIS_ENCODING_INTSET &&
+ if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,intobj))
{
break;
/* in order to compare an integer with an object we
* have to use the generic function, creating an object
* for this */
- } else if (sets[j]->encoding == REDIS_ENCODING_HT) {
- eleobj = createStringObjectFromLongLong(intobj);
- if (!setTypeIsMember(sets[j],eleobj)) {
- decrRefCount(eleobj);
+ } else if (sets[j]->encoding == OBJ_ENCODING_HT) {
+ elesds = sdsfromlonglong(intobj);
+ if (!setTypeIsMember(sets[j],elesds)) {
+ sdsfree(elesds);
break;
}
- decrRefCount(eleobj);
+ sdsfree(elesds);
}
- } else if (encoding == REDIS_ENCODING_HT) {
- /* Optimization... if the source object is integer
- * encoded AND the target set is an intset, we can get
- * a much faster path. */
- if (eleobj->encoding == REDIS_ENCODING_INT &&
- sets[j]->encoding == REDIS_ENCODING_INTSET &&
- !intsetFind((intset*)sets[j]->ptr,(long)eleobj->ptr))
- {
- break;
- /* else... object to object check is easy as we use the
- * type agnostic API here. */
- } else if (!setTypeIsMember(sets[j],eleobj)) {
+ } else if (encoding == OBJ_ENCODING_HT) {
+ if (!setTypeIsMember(sets[j],elesds)) {
break;
}
}
@@ -680,18 +870,18 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum,
/* Only take action when all sets contain the member */
if (j == setnum) {
if (!dstkey) {
- if (encoding == REDIS_ENCODING_HT)
- addReplyBulk(c,eleobj);
+ if (encoding == OBJ_ENCODING_HT)
+ addReplyBulkCBuffer(c,elesds,sdslen(elesds));
else
addReplyBulkLongLong(c,intobj);
cardinality++;
} else {
- if (encoding == REDIS_ENCODING_INTSET) {
- eleobj = createStringObjectFromLongLong(intobj);
- setTypeAdd(dstset,eleobj);
- decrRefCount(eleobj);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ elesds = sdsfromlonglong(intobj);
+ setTypeAdd(dstset,elesds);
+ sdsfree(elesds);
} else {
- setTypeAdd(dstset,eleobj);
+ setTypeAdd(dstset,elesds);
}
}
}
@@ -705,13 +895,13 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum,
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sinterstore",
+ notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
@@ -722,22 +912,24 @@ void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long setnum,
zfree(sets);
}
-void sinterCommand(redisClient *c) {
+void sinterCommand(client *c) {
sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
}
-void sinterstoreCommand(redisClient *c) {
+void sinterstoreCommand(client *c) {
sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
}
-#define REDIS_OP_UNION 0
-#define REDIS_OP_DIFF 1
-#define REDIS_OP_INTER 2
+#define SET_OP_UNION 0
+#define SET_OP_DIFF 1
+#define SET_OP_INTER 2
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op) {
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
+ robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
- robj *ele, *dstset = NULL;
+ robj *dstset = NULL;
+ sds ele;
int j, cardinality = 0;
int diff_algo = 1;
@@ -749,7 +941,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
sets[j] = NULL;
continue;
}
- if (checkType(c,setobj,REDIS_SET)) {
+ if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
@@ -765,7 +957,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
* the sets.
*
* We compute what is the best bet with the current input here. */
- if (op == REDIS_OP_DIFF && sets[0]) {
+ if (op == SET_OP_DIFF && sets[0]) {
long long algo_one_work = 0, algo_two_work = 0;
for (j = 0; j < setnum; j++) {
@@ -794,7 +986,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
* this set object will be the resulting object to set into the target key*/
dstset = createIntsetObject();
- if (op == REDIS_OP_UNION) {
+ if (op == SET_OP_UNION) {
/* Union is trivial, just add every element of every set to the
* temporary set. */
for (j = 0; j < setnum; j++) {
@@ -803,11 +995,11 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (setTypeAdd(dstset,ele)) cardinality++;
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
}
- } else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 1) {
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 1) {
/* DIFF Algorithm 1:
*
* We perform the diff by iterating all the elements of the first set,
@@ -828,10 +1020,10 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
setTypeAdd(dstset,ele);
cardinality++;
}
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
- } else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 2) {
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
/* DIFF Algorithm 2:
*
* Add all the elements of the first set to the auxiliary set.
@@ -849,7 +1041,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
} else {
if (setTypeRemove(dstset,ele)) cardinality--;
}
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
@@ -864,8 +1056,8 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
addReplyMultiBulkLen(c,cardinality);
si = setTypeInitIterator(dstset);
while((ele = setTypeNextObject(si)) != NULL) {
- addReplyBulk(c,ele);
- decrRefCount(ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
decrRefCount(dstset);
@@ -876,14 +1068,14 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,
- op == REDIS_OP_UNION ? "sunionstore" : "sdiffstore",
+ notifyKeyspaceEvent(NOTIFY_SET,
+ op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
@@ -892,28 +1084,28 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *
zfree(sets);
}
-void sunionCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
+void sunionCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_UNION);
}
-void sunionstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
+void sunionstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_UNION);
}
-void sdiffCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
+void sdiffCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_DIFF);
}
-void sdiffstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
+void sdiffstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_DIFF);
}
-void sscanCommand(redisClient *c) {
+void sscanCommand(client *c) {
robj *set;
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
scanGenericCommand(c,set,cursor);
}
diff --git a/src/t_string.c b/src/t_string.c
index 41e4b3b71..75375f446 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -27,19 +27,19 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h> /* isnan(), isinf() */
/*-----------------------------------------------------------------------------
* String Commands
*----------------------------------------------------------------------------*/
-static int checkStringLength(redisClient *c, long long size) {
+static int checkStringLength(client *c, long long size) {
if (size > 512*1024*1024) {
addReplyError(c,"string exceeds maximum allowed size (512MB)");
- return REDIS_ERR;
+ return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* The setGenericCommand() function implements the SET operation with different
@@ -58,62 +58,74 @@ static int checkStringLength(redisClient *c, long long size) {
* If ok_reply is NULL "+OK" is used.
* If abort_reply is NULL, "$-1" is used. */
-#define REDIS_SET_NO_FLAGS 0
-#define REDIS_SET_NX (1<<0) /* Set if key not exists. */
-#define REDIS_SET_XX (1<<1) /* Set if key exists. */
+#define OBJ_SET_NO_FLAGS 0
+#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
+#define OBJ_SET_XX (1<<1) /* Set if key exists. */
+#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */
+#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */
-void setGenericCommand(redisClient *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
+void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
if (expire) {
- if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK)
return;
if (milliseconds <= 0) {
- addReplyError(c,"invalid expire time in SETEX");
+ addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name);
return;
}
if (unit == UNIT_SECONDS) milliseconds *= 1000;
}
- if ((flags & REDIS_SET_NX && lookupKeyWrite(c->db,key) != NULL) ||
- (flags & REDIS_SET_XX && lookupKeyWrite(c->db,key) == NULL))
+ if ((flags & OBJ_SET_NX && lookupKeyWrite(c->db,key) != NULL) ||
+ (flags & OBJ_SET_XX && lookupKeyWrite(c->db,key) == NULL))
{
addReply(c, abort_reply ? abort_reply : shared.nullbulk);
return;
}
setKey(c->db,key,val);
server.dirty++;
- if (expire) setExpire(c->db,key,mstime()+milliseconds);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",key,c->db->id);
- if (expire) notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,
+ if (expire) setExpire(c,c->db,key,mstime()+milliseconds);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
+ if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC,
"expire",key,c->db->id);
addReply(c, ok_reply ? ok_reply : shared.ok);
}
/* SET key value [NX] [XX] [EX <seconds>] [PX <milliseconds>] */
-void setCommand(redisClient *c) {
+void setCommand(client *c) {
int j;
robj *expire = NULL;
int unit = UNIT_SECONDS;
- int flags = REDIS_SET_NO_FLAGS;
+ int flags = OBJ_SET_NO_FLAGS;
for (j = 3; j < c->argc; j++) {
char *a = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((a[0] == 'n' || a[0] == 'N') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
- flags |= REDIS_SET_NX;
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_XX))
+ {
+ flags |= OBJ_SET_NX;
} else if ((a[0] == 'x' || a[0] == 'X') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
- flags |= REDIS_SET_XX;
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_NX))
+ {
+ flags |= OBJ_SET_XX;
} else if ((a[0] == 'e' || a[0] == 'E') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_PX) && next)
+ {
+ flags |= OBJ_SET_EX;
unit = UNIT_SECONDS;
expire = next;
j++;
} else if ((a[0] == 'p' || a[0] == 'P') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_EX) && next)
+ {
+ flags |= OBJ_SET_PX;
unit = UNIT_MILLISECONDS;
expire = next;
j++;
@@ -127,54 +139,54 @@ void setCommand(redisClient *c) {
setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
}
-void setnxCommand(redisClient *c) {
+void setnxCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
- setGenericCommand(c,REDIS_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
+ setGenericCommand(c,OBJ_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
}
-void setexCommand(redisClient *c) {
+void setexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,REDIS_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
}
-void psetexCommand(redisClient *c) {
+void psetexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,REDIS_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
}
-int getGenericCommand(redisClient *c) {
+int getGenericCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
- return REDIS_OK;
+ return C_OK;
- if (o->type != REDIS_STRING) {
+ if (o->type != OBJ_STRING) {
addReply(c,shared.wrongtypeerr);
- return REDIS_ERR;
+ return C_ERR;
} else {
addReplyBulk(c,o);
- return REDIS_OK;
+ return C_OK;
}
}
-void getCommand(redisClient *c) {
+void getCommand(client *c) {
getGenericCommand(c);
}
-void getsetCommand(redisClient *c) {
- if (getGenericCommand(c) == REDIS_ERR) return;
+void getsetCommand(client *c) {
+ if (getGenericCommand(c) == C_ERR) return;
c->argv[2] = tryObjectEncoding(c->argv[2]);
setKey(c->db,c->argv[1],c->argv[2]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[1],c->db->id);
server.dirty++;
}
-void setrangeCommand(redisClient *c) {
+void setrangeCommand(client *c) {
robj *o;
long offset;
sds value = c->argv[3]->ptr;
- if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != C_OK)
return;
if (offset < 0) {
@@ -191,16 +203,16 @@ void setrangeCommand(redisClient *c) {
}
/* Return when the resulting string exceeds allowed size */
- if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
+ if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
- o = createObject(REDIS_STRING,sdsempty());
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+sdslen(value)));
dbAdd(c->db,c->argv[1],o);
} else {
size_t olen;
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
+ if (checkType(c,o,OBJ_STRING))
return;
/* Return existing string length when setting nothing */
@@ -211,7 +223,7 @@ void setrangeCommand(redisClient *c) {
}
/* Return when the resulting string exceeds allowed size */
- if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
+ if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
/* Create a copy when the object is shared or encoded. */
@@ -222,27 +234,27 @@ void setrangeCommand(redisClient *c) {
o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
memcpy((char*)o->ptr+offset,value,sdslen(value));
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,
+ notifyKeyspaceEvent(NOTIFY_STRING,
"setrange",c->argv[1],c->db->id);
server.dirty++;
}
addReplyLongLong(c,sdslen(o->ptr));
}
-void getrangeCommand(redisClient *c) {
+void getrangeCommand(client *c) {
robj *o;
- long start, end;
+ long long start, end;
char *str, llbuf[32];
size_t strlen;
- if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
- if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptybulk)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
str = llbuf;
strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
} else {
@@ -251,22 +263,26 @@ void getrangeCommand(redisClient *c) {
}
/* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.emptybulk);
+ return;
+ }
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
if (end < 0) end = 0;
- if ((unsigned)end >= strlen) end = strlen-1;
+ if ((unsigned long long)end >= strlen) end = strlen-1;
/* Precondition: end >= 0 && end < strlen, so the only condition where
* nothing can be returned is: start > end. */
- if (start > end) {
+ if (start > end || strlen == 0) {
addReply(c,shared.emptybulk);
} else {
addReplyBulkCBuffer(c,(char*)str+start,end-start+1);
}
}
-void mgetCommand(redisClient *c) {
+void mgetCommand(client *c) {
int j;
addReplyMultiBulkLen(c,c->argc-1);
@@ -275,7 +291,7 @@ void mgetCommand(redisClient *c) {
if (o == NULL) {
addReply(c,shared.nullbulk);
} else {
- if (o->type != REDIS_STRING) {
+ if (o->type != OBJ_STRING) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,o);
@@ -284,7 +300,7 @@ void mgetCommand(redisClient *c) {
}
}
-void msetGenericCommand(redisClient *c, int nx) {
+void msetGenericCommand(client *c, int nx) {
int j, busykeys = 0;
if ((c->argc % 2) == 0) {
@@ -308,27 +324,27 @@ void msetGenericCommand(redisClient *c, int nx) {
for (j = 1; j < c->argc; j += 2) {
c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
setKey(c->db,c->argv[j],c->argv[j+1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",c->argv[j],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id);
}
server.dirty += (c->argc-1)/2;
addReply(c, nx ? shared.cone : shared.ok);
}
-void msetCommand(redisClient *c) {
+void msetCommand(client *c) {
msetGenericCommand(c,0);
}
-void msetnxCommand(redisClient *c) {
+void msetnxCommand(client *c) {
msetGenericCommand(c,1);
}
-void incrDecrCommand(redisClient *c, long long incr) {
+void incrDecrCommand(client *c, long long incr) {
long long value, oldvalue;
robj *o, *new;
o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL && checkType(c,o,REDIS_STRING)) return;
- if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ if (getLongLongFromObjectOrReply(c,o,&value,NULL) != C_OK) return;
oldvalue = value;
if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
@@ -337,49 +353,59 @@ void incrDecrCommand(redisClient *c, long long incr) {
return;
}
value += incr;
- new = createStringObjectFromLongLong(value);
- if (o)
- dbOverwrite(c->db,c->argv[1],new);
- else
- dbAdd(c->db,c->argv[1],new);
+
+ if (o && o->refcount == 1 && o->encoding == OBJ_ENCODING_INT &&
+ (value < 0 || value >= OBJ_SHARED_INTEGERS) &&
+ value >= LONG_MIN && value <= LONG_MAX)
+ {
+ new = o;
+ o->ptr = (void*)((long)value);
+ } else {
+ new = createStringObjectFromLongLong(value);
+ if (o) {
+ dbOverwrite(c->db,c->argv[1],new);
+ } else {
+ dbAdd(c->db,c->argv[1],new);
+ }
+ }
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
server.dirty++;
addReply(c,shared.colon);
addReply(c,new);
addReply(c,shared.crlf);
}
-void incrCommand(redisClient *c) {
+void incrCommand(client *c) {
incrDecrCommand(c,1);
}
-void decrCommand(redisClient *c) {
+void decrCommand(client *c) {
incrDecrCommand(c,-1);
}
-void incrbyCommand(redisClient *c) {
+void incrbyCommand(client *c) {
long long incr;
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,incr);
}
-void decrbyCommand(redisClient *c) {
+void decrbyCommand(client *c) {
long long incr;
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,-incr);
}
-void incrbyfloatCommand(redisClient *c) {
+void incrbyfloatCommand(client *c) {
long double incr, value;
robj *o, *new, *aux;
o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL && checkType(c,o,REDIS_STRING)) return;
- if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != REDIS_OK ||
- getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != REDIS_OK)
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != C_OK ||
+ getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != C_OK)
return;
value += incr;
@@ -387,13 +413,13 @@ void incrbyfloatCommand(redisClient *c) {
addReplyError(c,"increment would produce NaN or Infinity");
return;
}
- new = createStringObjectFromLongDouble(value);
+ new = createStringObjectFromLongDouble(value,1);
if (o)
dbOverwrite(c->db,c->argv[1],new);
else
dbAdd(c->db,c->argv[1],new);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
server.dirty++;
addReplyBulk(c,new);
@@ -406,7 +432,7 @@ void incrbyfloatCommand(redisClient *c) {
rewriteClientCommandArgument(c,2,new);
}
-void appendCommand(redisClient *c) {
+void appendCommand(client *c) {
size_t totlen;
robj *o, *append;
@@ -419,13 +445,13 @@ void appendCommand(redisClient *c) {
totlen = stringObjectLen(c->argv[2]);
} else {
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
+ if (checkType(c,o,OBJ_STRING))
return;
/* "append" is an argument, so always an sds */
append = c->argv[2];
totlen = stringObjectLen(o)+sdslen(append->ptr);
- if (checkStringLength(c,totlen) != REDIS_OK)
+ if (checkStringLength(c,totlen) != C_OK)
return;
/* Append the value */
@@ -434,14 +460,14 @@ void appendCommand(redisClient *c) {
totlen = sdslen(o->ptr);
}
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"append",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
server.dirty++;
addReplyLongLong(c,totlen);
}
-void strlenCommand(redisClient *c) {
+void strlenCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
addReplyLongLong(c,stringObjectLen(o));
}
diff --git a/src/t_zset.c b/src/t_zset.c
index 20ef62e27..f7f4c6eb2 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -38,9 +38,16 @@
*
* The elements are added to a hash table mapping Redis objects to scores.
* At the same time the elements are added to a skip list mapping scores
- * to Redis objects (so objects are sorted by scores in this "view"). */
-
-/* This skiplist implementation is almost a C translation of the original
+ * to Redis objects (so objects are sorted by scores in this "view").
+ *
+ * Note that the SDS string representing the element is the same in both
+ * the hash table and skiplist in order to save memory. What we do in order
+ * to manage the shared SDS string more easily is to free the SDS string
+ * only in zslFreeNode(). The dictionary has no value free method set.
+ * So we should always remove an element from the dictionary, and later from
+ * the skiplist.
+ *
+ * This skiplist implementation is almost a C translation of the original
* algorithm described by William Pugh in "Skip Lists: A Probabilistic
* Alternative to Balanced Trees", modified in three ways:
* a) this implementation allows for repeated scores.
@@ -49,19 +56,27 @@
* pointers being only at "level 1". This allows to traverse the list
* from tail to head, useful for ZREVRANGE. */
-#include "redis.h"
+#include "server.h"
#include <math.h>
-static int zslLexValueGteMin(robj *value, zlexrangespec *spec);
-static int zslLexValueLteMax(robj *value, zlexrangespec *spec);
+/*-----------------------------------------------------------------------------
+ * Skiplist implementation of the low level API
+ *----------------------------------------------------------------------------*/
-zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
- zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
+
+/* Create a skiplist node with the specified number of levels.
+ * The SDS string 'ele' is referenced by the node after the call. */
+zskiplistNode *zslCreateNode(int level, double score, sds ele) {
+ zskiplistNode *zn =
+ zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
zn->score = score;
- zn->obj = obj;
+ zn->ele = ele;
return zn;
}
+/* Create a new skiplist. */
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
@@ -79,11 +94,15 @@ zskiplist *zslCreate(void) {
return zsl;
}
+/* Free the specified skiplist node. The referenced SDS string representation
+ * of the element is freed too, unless node->ele is set to NULL before calling
+ * this function. */
void zslFreeNode(zskiplistNode *node) {
- decrRefCount(node->obj);
+ sdsfree(node->ele);
zfree(node);
}
+/* Free a whole skiplist. */
void zslFree(zskiplist *zsl) {
zskiplistNode *node = zsl->header->level[0].forward, *next;
@@ -107,29 +126,33 @@ int zslRandomLevel(void) {
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
-zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
+/* Insert a new node in the skiplist. Assumes the element does not already
+ * exist (up to the caller to enforce that). The skiplist takes ownership
+ * of the passed SDS string 'ele'. */
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
- redisAssert(!isnan(score));
+ serverAssert(!isnan(score));
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->level[i].forward &&
- (x->level[i].forward->score < score ||
- (x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,obj) < 0))) {
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
- /* we assume the key is not already inside, since we allow duplicated
- * scores, and the re-insertion of score and redis object should never
- * happen since the caller of zslInsert() should test in the hash table
- * if the element is already inside or not. */
+ /* we assume the element is not already inside, since we allow duplicated
+ * scores, reinserting the same element should never happen since the
+ * caller of zslInsert() should test in the hash table if the element is
+ * already inside or not. */
level = zslRandomLevel();
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
@@ -139,7 +162,7 @@ zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
}
zsl->level = level;
}
- x = zslCreateNode(level,score,obj);
+ x = zslCreateNode(level,score,ele);
for (i = 0; i < level; i++) {
x->level[i].forward = update[i]->level[i].forward;
update[i]->level[i].forward = x;
@@ -184,38 +207,48 @@ void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
zsl->length--;
}
-/* Delete an element with matching score/object from the skiplist. */
-int zslDelete(zskiplist *zsl, double score, robj *obj) {
+/* Delete an element with matching score/element from the skiplist.
+ * The function returns 1 if the node was found and deleted, otherwise
+ * 0 is returned.
+ *
+ * If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise
+ * it is not freed (but just unlinked) and *node is set to the node pointer,
+ * so that it is possible for the caller to reuse the node (including the
+ * referenced SDS string at node->ele). */
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
- (x->level[i].forward->score < score ||
- (x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,obj) < 0)))
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
x = x->level[i].forward;
+ }
update[i] = x;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
x = x->level[0].forward;
- if (x && score == x->score && equalStringObjects(x->obj,obj)) {
+ if (x && score == x->score && sdscmp(x->ele,ele) == 0) {
zslDeleteNode(zsl, x, update);
- zslFreeNode(x);
+ if (!node)
+ zslFreeNode(x);
+ else
+ *node = x;
return 1;
- } else {
- return 0; /* not found */
}
return 0; /* not found */
}
-static int zslValueGteMin(double value, zrangespec *spec) {
+int zslValueGteMin(double value, zrangespec *spec) {
return spec->minex ? (value > spec->min) : (value >= spec->min);
}
-static int zslValueLteMax(double value, zrangespec *spec) {
+int zslValueLteMax(double value, zrangespec *spec) {
return spec->maxex ? (value < spec->max) : (value <= spec->max);
}
@@ -255,7 +288,7 @@ zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslValueLteMax(x->score,range)) return NULL;
@@ -280,7 +313,7 @@ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
}
/* This is an inner range, so this node cannot be NULL. */
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score >= min. */
if (!zslValueGteMin(x->score,range)) return NULL;
@@ -314,8 +347,8 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dic
{
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
removed++;
x = next;
}
@@ -331,7 +364,7 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
- !zslLexValueGteMin(x->level[i].forward->obj,range))
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
x = x->level[i].forward;
update[i] = x;
}
@@ -340,11 +373,11 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di
x = x->level[0].forward;
/* Delete nodes while in range. */
- while (x && zslLexValueLteMax(x->obj,range)) {
+ while (x && zslLexValueLteMax(x->ele,range)) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
removed++;
x = next;
}
@@ -372,7 +405,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned
while (x && traversed <= end) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
+ dictDelete(dict,x->ele);
zslFreeNode(x);
removed++;
traversed++;
@@ -385,7 +418,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned
* Returns 0 when the element cannot be found, rank otherwise.
* Note that the rank is 1-based due to the span of zsl->header to the
* first element. */
-unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
+unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
zskiplistNode *x;
unsigned long rank = 0;
int i;
@@ -395,13 +428,13 @@ unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,o) <= 0))) {
+ sdscmp(x->level[i].forward->ele,ele) <= 0))) {
rank += x->level[i].span;
x = x->level[i].forward;
}
/* x might be equal to zsl->header, so test if obj is non-NULL */
- if (x->obj && equalStringObjects(x->obj,o)) {
+ if (x->ele && sdscmp(x->ele,ele) == 0) {
return rank;
}
}
@@ -437,32 +470,32 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
* by the "(" character, it's considered "open". For instance
* ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
* ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
- if (min->encoding == REDIS_ENCODING_INT) {
+ if (min->encoding == OBJ_ENCODING_INT) {
spec->min = (long)min->ptr;
} else {
if (((char*)min->ptr)[0] == '(') {
spec->min = strtod((char*)min->ptr+1,&eptr);
- if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
spec->minex = 1;
} else {
spec->min = strtod((char*)min->ptr,&eptr);
- if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
}
}
- if (max->encoding == REDIS_ENCODING_INT) {
+ if (max->encoding == OBJ_ENCODING_INT) {
spec->max = (long)max->ptr;
} else {
if (((char*)max->ptr)[0] == '(') {
spec->max = strtod((char*)max->ptr+1,&eptr);
- if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
spec->maxex = 1;
} else {
spec->max = strtod((char*)max->ptr,&eptr);
- if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
}
}
- return REDIS_OK;
+ return C_OK;
}
/* ------------------------ Lexicographic ranges ---------------------------- */
@@ -475,90 +508,88 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
*
* If the string is valid the *dest pointer is set to the redis object
* that will be used for the comparision, and ex will be set to 0 or 1
- * respectively if the item is exclusive or inclusive. REDIS_OK will be
+ * respectively if the item is exclusive or inclusive. C_OK will be
* returned.
*
- * If the string is not a valid range REDIS_ERR is returned, and the value
+ * If the string is not a valid range C_ERR is returned, and the value
* of *dest and *ex is undefined. */
-int zslParseLexRangeItem(robj *item, robj **dest, int *ex) {
+int zslParseLexRangeItem(robj *item, sds *dest, int *ex) {
char *c = item->ptr;
switch(c[0]) {
case '+':
- if (c[1] != '\0') return REDIS_ERR;
+ if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.maxstring;
- incrRefCount(shared.maxstring);
- return REDIS_OK;
+ return C_OK;
case '-':
- if (c[1] != '\0') return REDIS_ERR;
+ if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.minstring;
- incrRefCount(shared.minstring);
- return REDIS_OK;
+ return C_OK;
case '(':
*ex = 1;
- *dest = createStringObject(c+1,sdslen(c)-1);
- return REDIS_OK;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
case '[':
*ex = 0;
- *dest = createStringObject(c+1,sdslen(c)-1);
- return REDIS_OK;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
default:
- return REDIS_ERR;
+ return C_ERR;
}
}
-/* Populate the rangespec according to the objects min and max.
+/* Free a lex range structure, must be called only after zelParseLexRange()
+ * populated the structure with success (C_OK returned). */
+void zslFreeLexRange(zlexrangespec *spec) {
+ if (spec->min != shared.minstring &&
+ spec->min != shared.maxstring) sdsfree(spec->min);
+ if (spec->max != shared.minstring &&
+ spec->max != shared.maxstring) sdsfree(spec->max);
+}
+
+/* Populate the lex rangespec according to the objects min and max.
*
- * Return REDIS_OK on success. On error REDIS_ERR is returned.
+ * Return C_OK on success. On error C_ERR is returned.
* When OK is returned the structure must be freed with zslFreeLexRange(),
* otherwise no release is needed. */
-static int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
/* The range can't be valid if objects are integer encoded.
* Every item must start with ( or [. */
- if (min->encoding == REDIS_ENCODING_INT ||
- max->encoding == REDIS_ENCODING_INT) return REDIS_ERR;
+ if (min->encoding == OBJ_ENCODING_INT ||
+ max->encoding == OBJ_ENCODING_INT) return C_ERR;
spec->min = spec->max = NULL;
- if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == REDIS_ERR ||
- zslParseLexRangeItem(max, &spec->max, &spec->maxex) == REDIS_ERR) {
- if (spec->min) decrRefCount(spec->min);
- if (spec->max) decrRefCount(spec->max);
- return REDIS_ERR;
+ if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == C_ERR ||
+ zslParseLexRangeItem(max, &spec->max, &spec->maxex) == C_ERR) {
+ zslFreeLexRange(spec);
+ return C_ERR;
} else {
- return REDIS_OK;
+ return C_OK;
}
}
-/* Free a lex range structure, must be called only after zelParseLexRange()
- * populated the structure with success (REDIS_OK returned). */
-void zslFreeLexRange(zlexrangespec *spec) {
- decrRefCount(spec->min);
- decrRefCount(spec->max);
-}
-
-/* This is just a wrapper to compareStringObjects() that is able to
+/* This is just a wrapper to sdscmp() that is able to
* handle shared.minstring and shared.maxstring as the equivalent of
* -inf and +inf for strings */
-int compareStringObjectsForLexRange(robj *a, robj *b) {
- if (a == b) return 0; /* This makes sure that we handle inf,inf and
- -inf,-inf ASAP. One special case less. */
+int sdscmplex(sds a, sds b) {
+ if (a == b) return 0;
if (a == shared.minstring || b == shared.maxstring) return -1;
if (a == shared.maxstring || b == shared.minstring) return 1;
- return compareStringObjects(a,b);
+ return sdscmp(a,b);
}
-static int zslLexValueGteMin(robj *value, zlexrangespec *spec) {
+int zslLexValueGteMin(sds value, zlexrangespec *spec) {
return spec->minex ?
- (compareStringObjectsForLexRange(value,spec->min) > 0) :
- (compareStringObjectsForLexRange(value,spec->min) >= 0);
+ (sdscmplex(value,spec->min) > 0) :
+ (sdscmplex(value,spec->min) >= 0);
}
-static int zslLexValueLteMax(robj *value, zlexrangespec *spec) {
+int zslLexValueLteMax(sds value, zlexrangespec *spec) {
return spec->maxex ?
- (compareStringObjectsForLexRange(value,spec->max) < 0) :
- (compareStringObjectsForLexRange(value,spec->max) <= 0);
+ (sdscmplex(value,spec->max) < 0) :
+ (sdscmplex(value,spec->max) <= 0);
}
/* Returns if there is a part of the zset is in the lex range. */
@@ -566,15 +597,15 @@ int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
/* Test for ranges that will always be empty. */
- if (compareStringObjectsForLexRange(range->min,range->max) > 1 ||
- (compareStringObjects(range->min,range->max) == 0 &&
+ if (sdscmplex(range->min,range->max) > 1 ||
+ (sdscmp(range->min,range->max) == 0 &&
(range->minex || range->maxex)))
return 0;
x = zsl->tail;
- if (x == NULL || !zslLexValueGteMin(x->obj,range))
+ if (x == NULL || !zslLexValueGteMin(x->ele,range))
return 0;
x = zsl->header->level[0].forward;
- if (x == NULL || !zslLexValueLteMax(x->obj,range))
+ if (x == NULL || !zslLexValueLteMax(x->ele,range))
return 0;
return 1;
}
@@ -592,16 +623,16 @@ zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
- !zslLexValueGteMin(x->level[i].forward->obj,range))
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score <= max. */
- if (!zslLexValueLteMax(x->obj,range)) return NULL;
+ if (!zslLexValueLteMax(x->ele,range)) return NULL;
return x;
}
@@ -618,15 +649,15 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *IN* range. */
while (x->level[i].forward &&
- zslLexValueLteMax(x->level[i].forward->obj,range))
+ zslLexValueLteMax(x->level[i].forward->ele,range))
x = x->level[i].forward;
}
/* This is an inner range, so this node cannot be NULL. */
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score >= min. */
- if (!zslLexValueGteMin(x->obj,range)) return NULL;
+ if (!zslLexValueGteMin(x->ele,range)) return NULL;
return x;
}
@@ -641,8 +672,8 @@ double zzlGetScore(unsigned char *sptr) {
char buf[128];
double score;
- redisAssert(sptr != NULL);
- redisAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
+ serverAssert(sptr != NULL);
+ serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
memcpy(buf,vstr,vlen);
@@ -655,21 +686,19 @@ double zzlGetScore(unsigned char *sptr) {
return score;
}
-/* Return a ziplist element as a Redis string object.
- * This simple abstraction can be used to simplifies some code at the
- * cost of some performance. */
-robj *ziplistGetObject(unsigned char *sptr) {
+/* Return a ziplist element as an SDS string. */
+sds ziplistGetObject(unsigned char *sptr) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- redisAssert(sptr != NULL);
- redisAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
+ serverAssert(sptr != NULL);
+ serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
- return createStringObject((char*)vstr,vlen);
+ return sdsnewlen((char*)vstr,vlen);
} else {
- return createStringObjectFromLongLong(vlong);
+ return sdsfromlonglong(vlong);
}
}
@@ -681,7 +710,7 @@ int zzlCompareElements(unsigned char *eptr, unsigned char *cstr, unsigned int cl
unsigned char vbuf[32];
int minlen, cmp;
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL) {
/* Store string representation of long long in buf. */
vlen = ll2string((char*)vbuf,sizeof(vbuf),vlong);
@@ -702,12 +731,12 @@ unsigned int zzlLength(unsigned char *zl) {
* NULL when there is no next entry. */
void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
unsigned char *_eptr, *_sptr;
- redisAssert(*eptr != NULL && *sptr != NULL);
+ serverAssert(*eptr != NULL && *sptr != NULL);
_eptr = ziplistNext(zl,*sptr);
if (_eptr != NULL) {
_sptr = ziplistNext(zl,_eptr);
- redisAssert(_sptr != NULL);
+ serverAssert(_sptr != NULL);
} else {
/* No next entry. */
_sptr = NULL;
@@ -721,12 +750,12 @@ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
* set to NULL when there is no next entry. */
void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
unsigned char *_eptr, *_sptr;
- redisAssert(*eptr != NULL && *sptr != NULL);
+ serverAssert(*eptr != NULL && *sptr != NULL);
_sptr = ziplistPrev(zl,*eptr);
if (_sptr != NULL) {
_eptr = ziplistPrev(zl,_sptr);
- redisAssert(_eptr != NULL);
+ serverAssert(_eptr != NULL);
} else {
/* No previous entry. */
_eptr = NULL;
@@ -754,7 +783,7 @@ int zzlIsInRange(unsigned char *zl, zrangespec *range) {
return 0;
p = ziplistIndex(zl,1); /* First score. */
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
score = zzlGetScore(p);
if (!zslValueLteMax(score,range))
return 0;
@@ -773,7 +802,7 @@ unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range) {
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
score = zzlGetScore(sptr);
if (zslValueGteMin(score,range)) {
@@ -801,7 +830,7 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
score = zzlGetScore(sptr);
if (zslValueLteMax(score,range)) {
@@ -815,7 +844,7 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
* When this returns NULL, we know there also is no element. */
sptr = ziplistPrev(zl,eptr);
if (sptr != NULL)
- redisAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
+ serverAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
else
eptr = NULL;
}
@@ -823,17 +852,17 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
return NULL;
}
-static int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec) {
- robj *value = ziplistGetObject(p);
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec) {
+ sds value = ziplistGetObject(p);
int res = zslLexValueGteMin(value,spec);
- decrRefCount(value);
+ sdsfree(value);
return res;
}
-static int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec) {
- robj *value = ziplistGetObject(p);
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec) {
+ sds value = ziplistGetObject(p);
int res = zslLexValueLteMax(value,spec);
- decrRefCount(value);
+ sdsfree(value);
return res;
}
@@ -843,8 +872,8 @@ int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) {
unsigned char *p;
/* Test for ranges that will always be empty. */
- if (compareStringObjectsForLexRange(range->min,range->max) > 1 ||
- (compareStringObjects(range->min,range->max) == 0 &&
+ if (sdscmplex(range->min,range->max) > 1 ||
+ (sdscmp(range->min,range->max) == 0 &&
(range->minex || range->maxex)))
return 0;
@@ -854,7 +883,7 @@ int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) {
return 0;
p = ziplistIndex(zl,0); /* First element. */
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
if (!zzlLexValueLteMax(p,range))
return 0;
@@ -879,7 +908,7 @@ unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range) {
/* Move to next element. */
sptr = ziplistNext(zl,eptr); /* This element score. Skip it. */
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
eptr = ziplistNext(zl,sptr); /* Next element. */
}
@@ -906,7 +935,7 @@ unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) {
* When this returns NULL, we know there also is no element. */
sptr = ziplistPrev(zl,eptr);
if (sptr != NULL)
- redisAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
+ serverAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
else
eptr = NULL;
}
@@ -914,26 +943,22 @@ unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) {
return NULL;
}
-unsigned char *zzlFind(unsigned char *zl, robj *ele, double *score) {
+unsigned char *zzlFind(unsigned char *zl, sds ele, double *score) {
unsigned char *eptr = ziplistIndex(zl,0), *sptr;
- ele = getDecodedObject(ele);
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,ele,sptr != NULL);
+ serverAssert(sptr != NULL);
- if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr))) {
+ if (ziplistCompare(eptr,(unsigned char*)ele,sdslen(ele))) {
/* Matching element, pull out score. */
if (score != NULL) *score = zzlGetScore(sptr);
- decrRefCount(ele);
return eptr;
}
/* Move to next element. */
eptr = ziplistNext(zl,sptr);
}
-
- decrRefCount(ele);
return NULL;
}
@@ -948,41 +973,38 @@ unsigned char *zzlDelete(unsigned char *zl, unsigned char *eptr) {
return zl;
}
-unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, robj *ele, double score) {
+unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, sds ele, double score) {
unsigned char *sptr;
char scorebuf[128];
int scorelen;
size_t offset;
- redisAssertWithInfo(NULL,ele,sdsEncodedObject(ele));
scorelen = d2string(scorebuf,sizeof(scorebuf),score);
if (eptr == NULL) {
- zl = ziplistPush(zl,ele->ptr,sdslen(ele->ptr),ZIPLIST_TAIL);
+ zl = ziplistPush(zl,(unsigned char*)ele,sdslen(ele),ZIPLIST_TAIL);
zl = ziplistPush(zl,(unsigned char*)scorebuf,scorelen,ZIPLIST_TAIL);
} else {
/* Keep offset relative to zl, as it might be re-allocated. */
offset = eptr-zl;
- zl = ziplistInsert(zl,eptr,ele->ptr,sdslen(ele->ptr));
+ zl = ziplistInsert(zl,eptr,(unsigned char*)ele,sdslen(ele));
eptr = zl+offset;
/* Insert score after the element. */
- redisAssertWithInfo(NULL,ele,(sptr = ziplistNext(zl,eptr)) != NULL);
+ serverAssert((sptr = ziplistNext(zl,eptr)) != NULL);
zl = ziplistInsert(zl,sptr,(unsigned char*)scorebuf,scorelen);
}
-
return zl;
}
/* Insert (element,score) pair in ziplist. This function assumes the element is
* not yet present in the list. */
-unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score) {
unsigned char *eptr = ziplistIndex(zl,0), *sptr;
double s;
- ele = getDecodedObject(ele);
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,ele,sptr != NULL);
+ serverAssert(sptr != NULL);
s = zzlGetScore(sptr);
if (s > score) {
@@ -993,7 +1015,7 @@ unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
break;
} else if (s == score) {
/* Ensure lexicographical ordering for elements. */
- if (zzlCompareElements(eptr,ele->ptr,sdslen(ele->ptr)) > 0) {
+ if (zzlCompareElements(eptr,(unsigned char*)ele,sdslen(ele)) > 0) {
zl = zzlInsertAt(zl,eptr,ele,score);
break;
}
@@ -1006,8 +1028,6 @@ unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
/* Push on tail of list when it was not yet inserted. */
if (eptr == NULL)
zl = zzlInsertAt(zl,NULL,ele,score);
-
- decrRefCount(ele);
return zl;
}
@@ -1080,14 +1100,14 @@ unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsig
* Common sorted set API
*----------------------------------------------------------------------------*/
-unsigned int zsetLength(robj *zobj) {
+unsigned int zsetLength(const robj *zobj) {
int length = -1;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
length = zzlLength(zobj->ptr);
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- length = ((zset*)zobj->ptr)->zsl->length;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ length = ((const zset*)zobj->ptr)->zsl->length;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
return length;
}
@@ -1095,52 +1115,50 @@ unsigned int zsetLength(robj *zobj) {
void zsetConvert(robj *zobj, int encoding) {
zset *zs;
zskiplistNode *node, *next;
- robj *ele;
+ sds ele;
double score;
if (zobj->encoding == encoding) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- if (encoding != REDIS_ENCODING_SKIPLIST)
- redisPanic("Unknown target encoding");
+ if (encoding != OBJ_ENCODING_SKIPLIST)
+ serverPanic("Unknown target encoding");
zs = zmalloc(sizeof(*zs));
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
eptr = ziplistIndex(zl,0);
- redisAssertWithInfo(NULL,zobj,eptr != NULL);
+ serverAssertWithInfo(NULL,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,zobj,sptr != NULL);
+ serverAssertWithInfo(NULL,zobj,sptr != NULL);
while (eptr != NULL) {
score = zzlGetScore(sptr);
- redisAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL)
- ele = createStringObjectFromLongLong(vlong);
+ ele = sdsfromlonglong(vlong);
else
- ele = createStringObject((char*)vstr,vlen);
+ ele = sdsnewlen((char*)vstr,vlen);
- /* Has incremented refcount since it was just created. */
node = zslInsert(zs->zsl,score,ele);
- redisAssertWithInfo(NULL,zobj,dictAdd(zs->dict,ele,&node->score) == DICT_OK);
- incrRefCount(ele); /* Added to dictionary. */
+ serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
zzlNext(zl,&eptr,&sptr);
}
zfree(zobj->ptr);
zobj->ptr = zs;
- zobj->encoding = REDIS_ENCODING_SKIPLIST;
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ zobj->encoding = OBJ_ENCODING_SKIPLIST;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
unsigned char *zl = ziplistNew();
- if (encoding != REDIS_ENCODING_ZIPLIST)
- redisPanic("Unknown target encoding");
+ if (encoding != OBJ_ENCODING_ZIPLIST)
+ serverPanic("Unknown target encoding");
/* Approach similar to zslFree(), since we want to free the skiplist at
* the same time as creating the ziplist. */
@@ -1151,10 +1169,7 @@ void zsetConvert(robj *zobj, int encoding) {
zfree(zs->zsl);
while (node) {
- ele = getDecodedObject(node->obj);
- zl = zzlInsertAt(zl,NULL,ele,node->score);
- decrRefCount(ele);
-
+ zl = zzlInsertAt(zl,NULL,node->ele,node->score);
next = node->level[0].forward;
zslFreeNode(node);
node = next;
@@ -1162,9 +1177,304 @@ void zsetConvert(robj *zobj, int encoding) {
zfree(zs);
zobj->ptr = zl;
- zobj->encoding = REDIS_ENCODING_ZIPLIST;
+ zobj->encoding = OBJ_ENCODING_ZIPLIST;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+}
+
+/* Convert the sorted set object into a ziplist if it is not already a ziplist
+ * and if the number of elements and the maximum element size is within the
+ * expected ranges. */
+void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) return;
+ zset *zset = zobj->ptr;
+
+ if (zset->zsl->length <= server.zset_max_ziplist_entries &&
+ maxelelen <= server.zset_max_ziplist_value)
+ zsetConvert(zobj,OBJ_ENCODING_ZIPLIST);
+}
+
+/* Return (by reference) the score of the specified member of the sorted set
+ * storing it into *score. If the element does not exist C_ERR is returned
+ * otherwise C_OK is returned and *score is correctly populated.
+ * If 'zobj' or 'member' is NULL, C_ERR is returned. */
+int zsetScore(robj *zobj, sds member, double *score) {
+ if (!zobj || !member) return C_ERR;
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ if (zzlFind(zobj->ptr, member, score) == NULL) return C_ERR;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de = dictFind(zs->dict, member);
+ if (de == NULL) return C_ERR;
+ *score = *(double*)dictGetVal(de);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return C_OK;
+}
+
+/* Add a new element or update the score of an existing element in a sorted
+ * set, regardless of its encoding.
+ *
+ * The set of flags change the command behavior. They are passed with an integer
+ * pointer since the function will clear the flags and populate them with
+ * other flags to indicate different conditions.
+ *
+ * The input flags are the following:
+ *
+ * ZADD_INCR: Increment the current element score by 'score' instead of updating
+ * the current element score. If the element does not exist, we
+ * assume 0 as previous score.
+ * ZADD_NX: Perform the operation only if the element does not exist.
+ * ZADD_XX: Perform the operation only if the element already exist.
+ *
+ * When ZADD_INCR is used, the new score of the element is stored in
+ * '*newscore' if 'newscore' is not NULL.
+ *
+ * The returned flags are the following:
+ *
+ * ZADD_NAN: The resulting score is not a number.
+ * ZADD_ADDED: The element was added (not present before the call).
+ * ZADD_UPDATED: The element score was updated.
+ * ZADD_NOP: No operation was performed because of NX or XX.
+ *
+ * Return value:
+ *
+ * The function returns 1 on success, and sets the appropriate flags
+ * ADDED or UPDATED to signal what happened during the operation (note that
+ * none could be set if we re-added an element using the same score it used
+ * to have, or in the case a zero increment is used).
+ *
+ * The function returns 0 on erorr, currently only when the increment
+ * produces a NAN condition, or when the 'score' value is NAN since the
+ * start.
+ *
+ * The commad as a side effect of adding a new element may convert the sorted
+ * set internal encoding from ziplist to hashtable+skiplist.
+ *
+ * Memory managemnet of 'ele':
+ *
+ * The function does not take ownership of the 'ele' SDS string, but copies
+ * it if needed. */
+int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) {
+ /* Turn options into simple to check vars. */
+ int incr = (*flags & ZADD_INCR) != 0;
+ int nx = (*flags & ZADD_NX) != 0;
+ int xx = (*flags & ZADD_XX) != 0;
+ *flags = 0; /* We'll return our response flags. */
+ double curscore;
+
+ /* NaN as input is an error regardless of all the other parameters. */
+ if (isnan(score)) {
+ *flags = ZADD_NAN;
+ return 0;
+ }
+
+ /* Update the sorted set according to its encoding. */
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *flags |= ZADD_NAN;
+ return 0;
+ }
+ if (newscore) *newscore = score;
+ }
+
+ /* Remove and re-insert when score changed. */
+ if (score != curscore) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ *flags |= ZADD_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ /* Optimize: check if the element is too large or the list
+ * becomes too long *before* executing zzlInsert. */
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries)
+ zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
+ if (sdslen(ele) > server.zset_max_ziplist_value)
+ zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
+ if (newscore) *newscore = score;
+ *flags |= ZADD_ADDED;
+ return 1;
+ } else {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplistNode *znode;
+ dictEntry *de;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+ curscore = *(double*)dictGetVal(de);
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *flags |= ZADD_NAN;
+ return 0;
+ }
+ if (newscore) *newscore = score;
+ }
+
+ /* Remove and re-insert when score changes. */
+ if (score != curscore) {
+ zskiplistNode *node;
+ serverAssert(zslDelete(zs->zsl,curscore,ele,&node));
+ znode = zslInsert(zs->zsl,score,node->ele);
+ /* We reused the node->ele SDS string, free the node now
+ * since zslInsert created a new one. */
+ node->ele = NULL;
+ zslFreeNode(node);
+ /* Note that we did not removed the original element from
+ * the hash table representing the sorted set, so we just
+ * update the score. */
+ dictGetVal(de) = &znode->score; /* Update score ptr. */
+ *flags |= ZADD_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ ele = sdsdup(ele);
+ znode = zslInsert(zs->zsl,score,ele);
+ serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
+ *flags |= ZADD_ADDED;
+ if (newscore) *newscore = score;
+ return 1;
+ } else {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* Never reached. */
+}
+
+/* Delete the element 'ele' from the sorted set, returning 1 if the element
+ * existed and was deleted, 0 otherwise (the element was not there). */
+int zsetDel(robj *zobj, sds ele) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,NULL)) != NULL) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ return 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de;
+ double score;
+
+ de = dictUnlink(zs->dict,ele);
+ if (de != NULL) {
+ /* Get the score in order to delete from the skiplist later. */
+ score = *(double*)dictGetVal(de);
+
+ /* Delete from the hash table and later from the skiplist.
+ * Note that the order is important: deleting from the skiplist
+ * actually releases the SDS string representing the element,
+ * which is shared between the skiplist and the hash table, so
+ * we need to delete from the skiplist as the final step. */
+ dictFreeUnlinkedEntry(zs->dict,de);
+
+ /* Delete from skiplist. */
+ int retval = zslDelete(zs->zsl,score,ele,NULL);
+ serverAssert(retval);
+
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* No such element found. */
+}
+
+/* Given a sorted set object returns the 0-based rank of the object or
+ * -1 if the object does not exist.
+ *
+ * For rank we mean the position of the element in the sorted collection
+ * of elements. So the first element has rank 0, the second rank 1, and so
+ * forth up to length-1 elements.
+ *
+ * If 'reverse' is false, the rank is returned considering as first element
+ * the one with the lowest score. Otherwise if 'reverse' is non-zero
+ * the rank is computed considering as element with rank 0 the one with
+ * the highest score. */
+long zsetRank(robj *zobj, sds ele, int reverse) {
+ unsigned long llen;
+ unsigned long rank;
+
+ llen = zsetLength(zobj);
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+
+ eptr = ziplistIndex(zl,0);
+ serverAssert(eptr != NULL);
+ sptr = ziplistNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ rank = 1;
+ while(eptr != NULL) {
+ if (ziplistCompare(eptr,(unsigned char*)ele,sdslen(ele)))
+ break;
+ rank++;
+ zzlNext(zl,&eptr,&sptr);
+ }
+
+ if (eptr != NULL) {
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ dictEntry *de;
+ double score;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ score = *(double*)dictGetVal(de);
+ rank = zslGetRank(zsl,score,ele);
+ /* Existing elements always have a rank. */
+ serverAssert(rank != 0);
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
}
@@ -1173,35 +1483,78 @@ void zsetConvert(robj *zobj, int encoding) {
*----------------------------------------------------------------------------*/
/* This generic command implements both ZADD and ZINCRBY. */
-void zaddGenericCommand(redisClient *c, int incr) {
+void zaddGenericCommand(client *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
- robj *ele;
robj *zobj;
- robj *curobj;
- double score = 0, *scores = NULL, curscore = 0.0;
- int j, elements = (c->argc-2)/2;
- int added = 0, updated = 0;
-
- if (c->argc % 2) {
+ sds ele;
+ double score = 0, *scores = NULL;
+ int j, elements;
+ int scoreidx = 0;
+ /* The following vars are used in order to track what the command actually
+ * did during the execution, to reply to the client and to trigger the
+ * notification of keyspace change. */
+ int added = 0; /* Number of new elements added. */
+ int updated = 0; /* Number of elements with updated score. */
+ int processed = 0; /* Number of elements processed, may remain zero with
+ options like XX. */
+
+ /* Parse options. At the end 'scoreidx' is set to the argument position
+ * of the score of the first score-element pair. */
+ scoreidx = 2;
+ while(scoreidx < c->argc) {
+ char *opt = c->argv[scoreidx]->ptr;
+ if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
+ else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
+ else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
+ else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
+ else break;
+ scoreidx++;
+ }
+
+ /* Turn options into simple to check vars. */
+ int incr = (flags & ZADD_INCR) != 0;
+ int nx = (flags & ZADD_NX) != 0;
+ int xx = (flags & ZADD_XX) != 0;
+ int ch = (flags & ZADD_CH) != 0;
+
+ /* After the options, we expect to have an even number of args, since
+ * we expect any number of score-element pairs. */
+ elements = c->argc-scoreidx;
+ if (elements % 2 || !elements) {
addReply(c,shared.syntaxerr);
return;
}
+ elements /= 2; /* Now this holds the number of score-element pairs. */
+
+ /* Check for incompatible options. */
+ if (nx && xx) {
+ addReplyError(c,
+ "XX and NX options at the same time are not compatible");
+ return;
+ }
+
+ if (incr && elements > 1) {
+ addReplyError(c,
+ "INCR option supports a single increment-element pair");
+ return;
+ }
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all. */
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
- if (getDoubleFromObjectOrReply(c,c->argv[2+j*2],&scores[j],NULL)
- != REDIS_OK) goto cleanup;
+ if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
+ != C_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist. */
zobj = lookupKeyWrite(c->db,key);
if (zobj == NULL) {
+ if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
if (server.zset_max_ziplist_entries == 0 ||
- server.zset_max_ziplist_value < sdslen(c->argv[3]->ptr))
+ server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
@@ -1209,167 +1562,78 @@ void zaddGenericCommand(redisClient *c, int incr) {
}
dbAdd(c->db,key,zobj);
} else {
- if (zobj->type != REDIS_ZSET) {
+ if (zobj->type != OBJ_ZSET) {
addReply(c,shared.wrongtypeerr);
goto cleanup;
}
}
for (j = 0; j < elements; j++) {
+ double newscore;
score = scores[j];
+ int retflags = flags;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *eptr;
-
- /* Prefer non-encoded element when dealing with ziplists. */
- ele = c->argv[3+j*2];
- if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
- if (incr) {
- score += curscore;
- if (isnan(score)) {
- addReplyError(c,nanerr);
- goto cleanup;
- }
- }
-
- /* Remove and re-insert when score changed. */
- if (score != curscore) {
- zobj->ptr = zzlDelete(zobj->ptr,eptr);
- zobj->ptr = zzlInsert(zobj->ptr,ele,score);
- server.dirty++;
- updated++;
- }
- } else {
- /* Optimize: check if the element is too large or the list
- * becomes too long *before* executing zzlInsert. */
- zobj->ptr = zzlInsert(zobj->ptr,ele,score);
- if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries)
- zsetConvert(zobj,REDIS_ENCODING_SKIPLIST);
- if (sdslen(ele->ptr) > server.zset_max_ziplist_value)
- zsetConvert(zobj,REDIS_ENCODING_SKIPLIST);
- server.dirty++;
- added++;
- }
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- zskiplistNode *znode;
- dictEntry *de;
-
- ele = c->argv[3+j*2] = tryObjectEncoding(c->argv[3+j*2]);
- de = dictFind(zs->dict,ele);
- if (de != NULL) {
- curobj = dictGetKey(de);
- curscore = *(double*)dictGetVal(de);
-
- if (incr) {
- score += curscore;
- if (isnan(score)) {
- addReplyError(c,nanerr);
- /* Don't need to check if the sorted set is empty
- * because we know it has at least one element. */
- goto cleanup;
- }
- }
-
- /* Remove and re-insert when score changed. We can safely
- * delete the key object from the skiplist, since the
- * dictionary still has a reference to it. */
- if (score != curscore) {
- redisAssertWithInfo(c,curobj,zslDelete(zs->zsl,curscore,curobj));
- znode = zslInsert(zs->zsl,score,curobj);
- incrRefCount(curobj); /* Re-inserted in skiplist. */
- dictGetVal(de) = &znode->score; /* Update score ptr. */
- server.dirty++;
- updated++;
- }
- } else {
- znode = zslInsert(zs->zsl,score,ele);
- incrRefCount(ele); /* Inserted in skiplist. */
- redisAssertWithInfo(c,NULL,dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
- incrRefCount(ele); /* Added to dictionary. */
- server.dirty++;
- added++;
- }
- } else {
- redisPanic("Unknown sorted set encoding");
+ ele = c->argv[scoreidx+1+j*2]->ptr;
+ int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
+ if (retval == 0) {
+ addReplyError(c,nanerr);
+ goto cleanup;
}
+ if (retflags & ZADD_ADDED) added++;
+ if (retflags & ZADD_UPDATED) updated++;
+ if (!(retflags & ZADD_NOP)) processed++;
+ score = newscore;
+ }
+ server.dirty += (added+updated);
+
+reply_to_client:
+ if (incr) { /* ZINCRBY or INCR option. */
+ if (processed)
+ addReplyDouble(c,score);
+ else
+ addReply(c,shared.nullbulk);
+ } else { /* ZADD. */
+ addReplyLongLong(c,ch ? added+updated : added);
}
- if (incr) /* ZINCRBY */
- addReplyDouble(c,score);
- else /* ZADD */
- addReplyLongLong(c,added);
cleanup:
zfree(scores);
if (added || updated) {
signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,
+ notifyKeyspaceEvent(NOTIFY_ZSET,
incr ? "zincr" : "zadd", key, c->db->id);
}
}
-void zaddCommand(redisClient *c) {
- zaddGenericCommand(c,0);
+void zaddCommand(client *c) {
+ zaddGenericCommand(c,ZADD_NONE);
}
-void zincrbyCommand(redisClient *c) {
- zaddGenericCommand(c,1);
+void zincrbyCommand(client *c) {
+ zaddGenericCommand(c,ZADD_INCR);
}
-void zremCommand(redisClient *c) {
+void zremCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
int deleted = 0, keyremoved = 0, j;
if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *eptr;
-
- for (j = 2; j < c->argc; j++) {
- if ((eptr = zzlFind(zobj->ptr,c->argv[j],NULL)) != NULL) {
- deleted++;
- zobj->ptr = zzlDelete(zobj->ptr,eptr);
- if (zzlLength(zobj->ptr) == 0) {
- dbDelete(c->db,key);
- keyremoved = 1;
- break;
- }
- }
- }
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- dictEntry *de;
- double score;
-
- for (j = 2; j < c->argc; j++) {
- de = dictFind(zs->dict,c->argv[j]);
- if (de != NULL) {
- deleted++;
-
- /* Delete from the skiplist */
- score = *(double*)dictGetVal(de);
- redisAssertWithInfo(c,c->argv[j],zslDelete(zs->zsl,score,c->argv[j]));
-
- /* Delete from the hash table */
- dictDelete(zs->dict,c->argv[j]);
- if (htNeedsResize(zs->dict)) dictResize(zs->dict);
- if (dictSize(zs->dict) == 0) {
- dbDelete(c->db,key);
- keyremoved = 1;
- break;
- }
- }
+ for (j = 2; j < c->argc; j++) {
+ if (zsetDel(zobj,c->argv[j]->ptr)) deleted++;
+ if (zsetLength(zobj) == 0) {
+ dbDelete(c->db,key);
+ keyremoved = 1;
+ break;
}
- } else {
- redisPanic("Unknown sorted set encoding");
}
if (deleted) {
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,"zrem",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
signalModifiedKey(c->db,key);
server.dirty += deleted;
}
@@ -1380,27 +1644,27 @@ void zremCommand(redisClient *c) {
#define ZRANGE_RANK 0
#define ZRANGE_SCORE 1
#define ZRANGE_LEX 2
-void zremrangeGenericCommand(redisClient *c, int rangetype) {
+void zremrangeGenericCommand(client *c, int rangetype) {
robj *key = c->argv[1];
robj *zobj;
int keyremoved = 0;
- unsigned long deleted;
+ unsigned long deleted = 0;
zrangespec range;
zlexrangespec lexrange;
long start, end, llen;
/* Step 1: Parse the range. */
if (rangetype == ZRANGE_RANK) {
- if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK))
return;
} else if (rangetype == ZRANGE_SCORE) {
- if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
} else if (rangetype == ZRANGE_LEX) {
- if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
@@ -1408,7 +1672,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
/* Step 2: Lookup & range sanity checks if needed. */
if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) goto cleanup;
+ checkType(c,zobj,OBJ_ZSET)) goto cleanup;
if (rangetype == ZRANGE_RANK) {
/* Sanitize indexes. */
@@ -1427,7 +1691,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
}
/* Step 3: Perform the range deletion operation. */
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
switch(rangetype) {
case ZRANGE_RANK:
zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted);
@@ -1443,7 +1707,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
dbDelete(c->db,key);
keyremoved = 1;
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
switch(rangetype) {
case ZRANGE_RANK:
@@ -1462,16 +1726,16 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
keyremoved = 1;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
/* Step 4: Notifications and reply. */
if (deleted) {
char *event[3] = {"zremrangebyrank","zremrangebyscore","zremrangebylex"};
signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,event[rangetype],key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_ZSET,event[rangetype],key,c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
}
server.dirty += deleted;
addReplyLongLong(c,deleted);
@@ -1480,15 +1744,15 @@ cleanup:
if (rangetype == ZRANGE_LEX) zslFreeLexRange(&lexrange);
}
-void zremrangebyrankCommand(redisClient *c) {
+void zremrangebyrankCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_RANK);
}
-void zremrangebyscoreCommand(redisClient *c) {
+void zremrangebyscoreCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_SCORE);
}
-void zremrangebylexCommand(redisClient *c) {
+void zremrangebylexCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_LEX);
}
@@ -1533,7 +1797,7 @@ typedef struct {
* we already checked that "ell" holds a long long, or tried to convert another
* representation into a long long value. When this was successful,
* OPVAL_VALID_LL is set as well. */
-#define OPVAL_DIRTY_ROBJ 1
+#define OPVAL_DIRTY_SDS 1
#define OPVAL_DIRTY_LL 2
#define OPVAL_VALID_LL 4
@@ -1541,7 +1805,7 @@ typedef struct {
typedef struct {
int flags;
unsigned char _buf[32]; /* Private buffer. */
- robj *ele;
+ sds ele;
unsigned char *estr;
unsigned int elen;
long long ell;
@@ -1555,35 +1819,35 @@ void zuiInitIterator(zsetopsrc *op) {
if (op->subject == NULL)
return;
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
it->is.is = op->subject->ptr;
it->is.ii = 0;
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
it->ht.dict = op->subject->ptr;
it->ht.di = dictGetIterator(op->subject->ptr);
it->ht.de = dictNext(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
it->zl.zl = op->subject->ptr;
it->zl.eptr = ziplistIndex(it->zl.zl,0);
if (it->zl.eptr != NULL) {
it->zl.sptr = ziplistNext(it->zl.zl,it->zl.eptr);
- redisAssert(it->zl.sptr != NULL);
+ serverAssert(it->zl.sptr != NULL);
}
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
it->sl.zs = op->subject->ptr;
it->sl.node = it->sl.zs->zsl->header->level[0].forward;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1591,26 +1855,26 @@ void zuiClearIterator(zsetopsrc *op) {
if (op->subject == NULL)
return;
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
- REDIS_NOTUSED(it); /* skip */
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dictReleaseIterator(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
- REDIS_NOTUSED(it); /* skip */
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
- REDIS_NOTUSED(it); /* skip */
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ UNUSED(it); /* skip */
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1618,26 +1882,26 @@ int zuiLength(zsetopsrc *op) {
if (op->subject == NULL)
return 0;
- if (op->type == REDIS_SET) {
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->type == OBJ_SET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
return intsetLen(op->subject->ptr);
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dict *ht = op->subject->ptr;
return dictSize(ht);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ } else if (op->type == OBJ_ZSET) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
return zzlLength(op->subject->ptr);
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = op->subject->ptr;
return zs->zsl->length;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1648,14 +1912,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
if (op->subject == NULL)
return 0;
- if (val->flags & OPVAL_DIRTY_ROBJ)
- decrRefCount(val->ele);
+ if (val->flags & OPVAL_DIRTY_SDS)
+ sdsfree(val->ele);
memset(val,0,sizeof(zsetopval));
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
int64_t ell;
if (!intsetGet(it->is.is,it->is.ii,&ell))
@@ -1665,7 +1929,7 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->is.ii++;
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
if (it->ht.de == NULL)
return 0;
val->ele = dictGetKey(it->ht.de);
@@ -1674,32 +1938,32 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->ht.de = dictNext(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
/* No need to check both, but better be explicit. */
if (it->zl.eptr == NULL || it->zl.sptr == NULL)
return 0;
- redisAssert(ziplistGet(it->zl.eptr,&val->estr,&val->elen,&val->ell));
+ serverAssert(ziplistGet(it->zl.eptr,&val->estr,&val->elen,&val->ell));
val->score = zzlGetScore(it->zl.sptr);
/* Move to next element. */
zzlNext(it->zl.zl,&it->zl.eptr,&it->zl.sptr);
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
if (it->sl.node == NULL)
return 0;
- val->ele = it->sl.node->obj;
+ val->ele = it->sl.node->ele;
val->score = it->sl.node->score;
/* Move to next element. */
it->sl.node = it->sl.node->level[0].forward;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
return 1;
}
@@ -1709,15 +1973,8 @@ int zuiLongLongFromValue(zsetopval *val) {
val->flags |= OPVAL_DIRTY_LL;
if (val->ele != NULL) {
- if (val->ele->encoding == REDIS_ENCODING_INT) {
- val->ell = (long)val->ele->ptr;
+ if (string2ll(val->ele,sdslen(val->ele),&val->ell))
val->flags |= OPVAL_VALID_LL;
- } else if (sdsEncodedObject(val->ele)) {
- if (string2ll(val->ele->ptr,sdslen(val->ele->ptr),&val->ell))
- val->flags |= OPVAL_VALID_LL;
- } else {
- redisPanic("Unsupported element encoding");
- }
} else if (val->estr != NULL) {
if (string2ll((char*)val->estr,val->elen,&val->ell))
val->flags |= OPVAL_VALID_LL;
@@ -1729,30 +1986,41 @@ int zuiLongLongFromValue(zsetopval *val) {
return val->flags & OPVAL_VALID_LL;
}
-robj *zuiObjectFromValue(zsetopval *val) {
+sds zuiSdsFromValue(zsetopval *val) {
if (val->ele == NULL) {
if (val->estr != NULL) {
- val->ele = createStringObject((char*)val->estr,val->elen);
+ val->ele = sdsnewlen((char*)val->estr,val->elen);
} else {
- val->ele = createStringObjectFromLongLong(val->ell);
+ val->ele = sdsfromlonglong(val->ell);
}
- val->flags |= OPVAL_DIRTY_ROBJ;
+ val->flags |= OPVAL_DIRTY_SDS;
}
return val->ele;
}
+/* This is different from zuiSdsFromValue since returns a new SDS string
+ * which is up to the caller to free. */
+sds zuiNewSdsFromValue(zsetopval *val) {
+ if (val->flags & OPVAL_DIRTY_SDS) {
+ /* We have already one to return! */
+ sds ele = val->ele;
+ val->flags &= ~OPVAL_DIRTY_SDS;
+ val->ele = NULL;
+ return ele;
+ } else if (val->ele) {
+ return sdsdup(val->ele);
+ } else if (val->estr) {
+ return sdsnewlen((char*)val->estr,val->elen);
+ } else {
+ return sdsfromlonglong(val->ell);
+ }
+}
+
int zuiBufferFromValue(zsetopval *val) {
if (val->estr == NULL) {
if (val->ele != NULL) {
- if (val->ele->encoding == REDIS_ENCODING_INT) {
- val->elen = ll2string((char*)val->_buf,sizeof(val->_buf),(long)val->ele->ptr);
- val->estr = val->_buf;
- } else if (sdsEncodedObject(val->ele)) {
- val->elen = sdslen(val->ele->ptr);
- val->estr = val->ele->ptr;
- } else {
- redisPanic("Unsupported element encoding");
- }
+ val->elen = sdslen(val->ele);
+ val->estr = (unsigned char*)val->ele;
} else {
val->elen = ll2string((char*)val->_buf,sizeof(val->_buf),val->ell);
val->estr = val->_buf;
@@ -1767,8 +2035,8 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
if (op->subject == NULL)
return 0;
- if (op->type == REDIS_SET) {
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->type == OBJ_SET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
if (zuiLongLongFromValue(val) &&
intsetFind(op->subject->ptr,val->ell))
{
@@ -1777,9 +2045,9 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
} else {
return 0;
}
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dict *ht = op->subject->ptr;
- zuiObjectFromValue(val);
+ zuiSdsFromValue(val);
if (dictFind(ht,val->ele) != NULL) {
*score = 1.0;
return 1;
@@ -1787,19 +2055,19 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
return 0;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
- zuiObjectFromValue(val);
+ } else if (op->type == OBJ_ZSET) {
+ zuiSdsFromValue(val);
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
if (zzlFind(op->subject->ptr,val->ele,score) != NULL) {
/* Score is already set by zzlFind. */
return 1;
} else {
return 0;
}
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = op->subject->ptr;
dictEntry *de;
if ((de = dictFind(zs->dict,val->ele)) != NULL) {
@@ -1809,10 +2077,10 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
return 0;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1838,17 +2106,29 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat
*target = val > *target ? val : *target;
} else {
/* safety net */
- redisPanic("Unknown ZUNION/INTER aggregate type");
+ serverPanic("Unknown ZUNION/INTER aggregate type");
}
}
-void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
+uint64_t dictSdsHash(const void *key);
+int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
+
+dictType setAccumulatorDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+void zunionInterGenericCommand(client *c, robj *dstkey, int op) {
int i, j;
long setnum;
int aggregate = REDIS_AGGR_SUM;
zsetopsrc *src;
zsetopval zval;
- robj *tmp;
+ sds tmp;
unsigned int maxelelen = 0;
robj *dstobj;
zset *dstzset;
@@ -1856,7 +2136,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
int touched = 0;
/* expect setnum input keys to be given */
- if ((getLongFromObjectOrReply(c, c->argv[2], &setnum, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &setnum, NULL) != C_OK))
return;
if (setnum < 1) {
@@ -1876,7 +2156,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
for (i = 0, j = 3; i < setnum; i++, j++) {
robj *obj = lookupKeyWrite(c->db,c->argv[j]);
if (obj != NULL) {
- if (obj->type != REDIS_ZSET && obj->type != REDIS_SET) {
+ if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
zfree(src);
addReply(c,shared.wrongtypeerr);
return;
@@ -1898,17 +2178,21 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
int remaining = c->argc - j;
while (remaining) {
- if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) {
+ if (remaining >= (setnum + 1) &&
+ !strcasecmp(c->argv[j]->ptr,"weights"))
+ {
j++; remaining--;
for (i = 0; i < setnum; i++, j++, remaining--) {
if (getDoubleFromObjectOrReply(c,c->argv[j],&src[i].weight,
- "weight value is not a float") != REDIS_OK)
+ "weight value is not a float") != C_OK)
{
zfree(src);
return;
}
}
- } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) {
+ } else if (remaining >= 2 &&
+ !strcasecmp(c->argv[j]->ptr,"aggregate"))
+ {
j++; remaining--;
if (!strcasecmp(c->argv[j]->ptr,"sum")) {
aggregate = REDIS_AGGR_SUM;
@@ -1938,7 +2222,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
dstzset = dstobj->ptr;
memset(&zval, 0, sizeof(zval));
- if (op == REDIS_OP_INTER) {
+ if (op == SET_OP_INTER) {
/* Skip everything if the smallest input is empty. */
if (zuiLength(&src[0]) > 0) {
/* Precondition: as src[0] is non-empty and the inputs are ordered
@@ -1966,24 +2250,18 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
/* Only continue when present in every input. */
if (j == setnum) {
- tmp = zuiObjectFromValue(&zval);
+ tmp = zuiNewSdsFromValue(&zval);
znode = zslInsert(dstzset->zsl,score,tmp);
- incrRefCount(tmp); /* added to skiplist */
dictAdd(dstzset->dict,tmp,&znode->score);
- incrRefCount(tmp); /* added to dictionary */
-
- if (sdsEncodedObject(tmp)) {
- if (sdslen(tmp->ptr) > maxelelen)
- maxelelen = sdslen(tmp->ptr);
- }
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
}
}
zuiClearIterator(&src[0]);
}
- } else if (op == REDIS_OP_UNION) {
- dict *accumulator = dictCreate(&setDictType,NULL);
+ } else if (op == SET_OP_UNION) {
+ dict *accumulator = dictCreate(&setAccumulatorDictType,NULL);
dictIterator *di;
- dictEntry *de;
+ dictEntry *de, *existing;
double score;
if (setnum) {
@@ -2004,20 +2282,16 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
if (isnan(score)) score = 0;
/* Search for this element in the accumulating dictionary. */
- de = dictFind(accumulator,zuiObjectFromValue(&zval));
+ de = dictAddRaw(accumulator,zuiSdsFromValue(&zval),&existing);
/* If we don't have it, we need to create a new entry. */
- if (de == NULL) {
- tmp = zuiObjectFromValue(&zval);
+ if (!existing) {
+ tmp = zuiNewSdsFromValue(&zval);
/* Remember the longest single element encountered,
* to understand if it's possible to convert to ziplist
* at the end. */
- if (sdsEncodedObject(tmp)) {
- if (sdslen(tmp->ptr) > maxelelen)
- maxelelen = sdslen(tmp->ptr);
- }
- /* Add the element with its initial score. */
- de = dictAddRaw(accumulator,tmp);
- incrRefCount(tmp);
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
+ /* Update the element with its initial score. */
+ dictSetKey(accumulator, de, tmp);
dictSetDoubleVal(de,score);
} else {
/* Update the score with the score of the new instance
@@ -2026,7 +2300,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
* Here we access directly the dictEntry double
* value inside the union as it is a big speedup
* compared to using the getDouble/setDouble API. */
- zunionInterAggregate(&de->v.d,score,aggregate);
+ zunionInterAggregate(&existing->v.d,score,aggregate);
}
}
zuiClearIterator(&src[i]);
@@ -2041,57 +2315,49 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
dictExpand(dstzset->dict,dictSize(accumulator));
while((de = dictNext(di)) != NULL) {
- robj *ele = dictGetKey(de);
+ sds ele = dictGetKey(de);
score = dictGetDoubleVal(de);
znode = zslInsert(dstzset->zsl,score,ele);
- incrRefCount(ele); /* added to skiplist */
dictAdd(dstzset->dict,ele,&znode->score);
- incrRefCount(ele); /* added to dictionary */
}
dictReleaseIterator(di);
-
- /* We can free the accumulator dictionary now. */
dictRelease(accumulator);
} else {
- redisPanic("Unknown operator");
+ serverPanic("Unknown operator");
}
- if (dbDelete(c->db,dstkey)) {
- signalModifiedKey(c->db,dstkey);
+ if (dbDelete(c->db,dstkey))
touched = 1;
- server.dirty++;
- }
if (dstzset->zsl->length) {
- /* Convert to ziplist when in limits. */
- if (dstzset->zsl->length <= server.zset_max_ziplist_entries &&
- maxelelen <= server.zset_max_ziplist_value)
- zsetConvert(dstobj,REDIS_ENCODING_ZIPLIST);
-
+ zsetConvertToZiplistIfNeeded(dstobj,maxelelen);
dbAdd(c->db,dstkey,dstobj);
addReplyLongLong(c,zsetLength(dstobj));
- if (!touched) signalModifiedKey(c->db,dstkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,
- (op == REDIS_OP_UNION) ? "zunionstore" : "zinterstore",
+ signalModifiedKey(c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_ZSET,
+ (op == SET_OP_UNION) ? "zunionstore" : "zinterstore",
dstkey,c->db->id);
server.dirty++;
} else {
decrRefCount(dstobj);
addReply(c,shared.czero);
- if (touched)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ if (touched) {
+ signalModifiedKey(c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ server.dirty++;
+ }
}
zfree(src);
}
-void zunionstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
+void zunionstoreCommand(client *c) {
+ zunionInterGenericCommand(c,c->argv[1], SET_OP_UNION);
}
-void zinterstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
+void zinterstoreCommand(client *c) {
+ zunionInterGenericCommand(c,c->argv[1], SET_OP_INTER);
}
-void zrangeGenericCommand(redisClient *c, int reverse) {
+void zrangeGenericCommand(client *c, int reverse) {
robj *key = c->argv[1];
robj *zobj;
int withscores = 0;
@@ -2100,8 +2366,8 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
int llen;
int rangelen;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
withscores = 1;
@@ -2111,7 +2377,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
}
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL
- || checkType(c,zobj,REDIS_ZSET)) return;
+ || checkType(c,zobj,OBJ_ZSET)) return;
/* Sanitize indexes. */
llen = zsetLength(zobj);
@@ -2131,7 +2397,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c, withscores ? (rangelen*2) : rangelen);
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2143,12 +2409,12 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
else
eptr = ziplistIndex(zl,2*start);
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
while (rangelen--) {
- redisAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL)
addReplyBulkLongLong(c,vlong);
else
@@ -2163,11 +2429,11 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
- robj *ele;
+ sds ele;
/* Check if starting point is trivial, before doing log(N) lookup. */
if (reverse) {
@@ -2181,28 +2447,28 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
}
while(rangelen--) {
- redisAssertWithInfo(c,zobj,ln != NULL);
- ele = ln->obj;
- addReplyBulk(c,ele);
+ serverAssertWithInfo(c,zobj,ln != NULL);
+ ele = ln->ele;
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
if (withscores)
addReplyDouble(c,ln->score);
ln = reverse ? ln->backward : ln->level[0].forward;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
}
-void zrangeCommand(redisClient *c) {
+void zrangeCommand(client *c) {
zrangeGenericCommand(c,0);
}
-void zrevrangeCommand(redisClient *c) {
+void zrevrangeCommand(client *c) {
zrangeGenericCommand(c,1);
}
/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
-void genericZrangebyscoreCommand(redisClient *c, int reverse) {
+void genericZrangebyscoreCommand(client *c, int reverse) {
zrangespec range;
robj *key = c->argv[1];
robj *zobj;
@@ -2221,7 +2487,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
minidx = 2; maxidx = 3;
}
- if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
@@ -2237,8 +2503,13 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
pos++; remaining--;
withscores = 1;
} else if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL)
+ != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL)
+ != C_OK))
+ {
+ return;
+ }
pos += 3; remaining -= 3;
} else {
addReply(c,shared.syntaxerr);
@@ -2249,9 +2520,9 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
/* Ok, lookup the key and get the range */
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2273,7 +2544,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
/* Get score pointer for the first element. */
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
/* We don't know in advance how many matching elements there are in the
@@ -2302,7 +2573,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
/* We know the element exists, so ziplistGet should always succeed */
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
rangelen++;
if (vstr == NULL) {
@@ -2322,7 +2593,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
@@ -2364,7 +2635,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
rangelen++;
- addReplyBulk(c,ln->obj);
+ addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
if (withscores) {
addReplyDouble(c,ln->score);
@@ -2378,7 +2649,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
if (withscores) {
@@ -2388,31 +2659,31 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
setDeferredMultiBulkLength(c, replylen, rangelen);
}
-void zrangebyscoreCommand(redisClient *c) {
+void zrangebyscoreCommand(client *c) {
genericZrangebyscoreCommand(c,0);
}
-void zrevrangebyscoreCommand(redisClient *c) {
+void zrevrangebyscoreCommand(client *c) {
genericZrangebyscoreCommand(c,1);
}
-void zcountCommand(redisClient *c) {
+void zcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zrangespec range;
int count = 0;
/* Parse the range arguments */
- if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
/* Lookup the sorted set */
if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
- checkType(c, zobj, REDIS_ZSET)) return;
+ checkType(c, zobj, OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
double score;
@@ -2429,7 +2700,7 @@ void zcountCommand(redisClient *c) {
/* First element is in range */
sptr = ziplistNext(zl,eptr);
score = zzlGetScore(sptr);
- redisAssertWithInfo(c,zobj,zslValueLteMax(score,&range));
+ serverAssertWithInfo(c,zobj,zslValueLteMax(score,&range));
/* Iterate over elements in range */
while (eptr) {
@@ -2443,7 +2714,7 @@ void zcountCommand(redisClient *c) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *zn;
@@ -2454,7 +2725,7 @@ void zcountCommand(redisClient *c) {
/* Use rank of first element, if any, to determine preliminary count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count = (zsl->length - (rank - 1));
/* Find last element in range */
@@ -2462,38 +2733,38 @@ void zcountCommand(redisClient *c) {
/* Use rank of last element, if any, to determine the actual count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count -= (zsl->length - rank);
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
addReplyLongLong(c, count);
}
-void zlexcountCommand(redisClient *c) {
+void zlexcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zlexrangespec range;
int count = 0;
/* Parse the range arguments */
- if (zslParseLexRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
/* Lookup the sorted set */
if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
- checkType(c, zobj, REDIS_ZSET))
+ checkType(c, zobj, OBJ_ZSET))
{
zslFreeLexRange(&range);
return;
}
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
@@ -2509,7 +2780,7 @@ void zlexcountCommand(redisClient *c) {
/* First element is in range */
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range));
+ serverAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range));
/* Iterate over elements in range */
while (eptr) {
@@ -2521,7 +2792,7 @@ void zlexcountCommand(redisClient *c) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *zn;
@@ -2532,7 +2803,7 @@ void zlexcountCommand(redisClient *c) {
/* Use rank of first element, if any, to determine preliminary count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count = (zsl->length - (rank - 1));
/* Find last element in range */
@@ -2540,12 +2811,12 @@ void zlexcountCommand(redisClient *c) {
/* Use rank of last element, if any, to determine the actual count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count -= (zsl->length - rank);
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
zslFreeLexRange(&range);
@@ -2553,7 +2824,7 @@ void zlexcountCommand(redisClient *c) {
}
/* This command implements ZRANGEBYLEX, ZREVRANGEBYLEX. */
-void genericZrangebylexCommand(redisClient *c, int reverse) {
+void genericZrangebylexCommand(client *c, int reverse) {
zlexrangespec range;
robj *key = c->argv[1];
robj *zobj;
@@ -2571,7 +2842,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
minidx = 2; maxidx = 3;
}
- if (zslParseLexRange(c->argv[minidx],c->argv[maxidx],&range) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
@@ -2584,8 +2855,8 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
while (remaining) {
if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != C_OK)) return;
pos += 3; remaining -= 3;
} else {
zslFreeLexRange(&range);
@@ -2597,13 +2868,13 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
/* Ok, lookup the key and get the range */
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET))
+ checkType(c,zobj,OBJ_ZSET))
{
zslFreeLexRange(&range);
return;
}
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2625,7 +2896,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
}
/* Get score pointer for the first element. */
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
/* We don't know in advance how many matching elements there are in the
@@ -2653,7 +2924,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
/* We know the element exists, so ziplistGet should always
* succeed. */
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
rangelen++;
if (vstr == NULL) {
@@ -2669,7 +2940,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
@@ -2706,13 +2977,13 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
while (ln && limit--) {
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zslLexValueGteMin(ln->obj,&range)) break;
+ if (!zslLexValueGteMin(ln->ele,&range)) break;
} else {
- if (!zslLexValueLteMax(ln->obj,&range)) break;
+ if (!zslLexValueLteMax(ln->ele,&range)) break;
}
rangelen++;
- addReplyBulk(c,ln->obj);
+ addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
/* Move to next node */
if (reverse) {
@@ -2722,137 +2993,78 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
zslFreeLexRange(&range);
setDeferredMultiBulkLength(c, replylen, rangelen);
}
-void zrangebylexCommand(redisClient *c) {
+void zrangebylexCommand(client *c) {
genericZrangebylexCommand(c,0);
}
-void zrevrangebylexCommand(redisClient *c) {
+void zrevrangebylexCommand(client *c) {
genericZrangebylexCommand(c,1);
}
-void zcardCommand(redisClient *c) {
+void zcardCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
if ((zobj = lookupKeyReadOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
addReplyLongLong(c,zsetLength(zobj));
}
-void zscoreCommand(redisClient *c) {
+void zscoreCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
double score;
if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- if (zzlFind(zobj->ptr,c->argv[2],&score) != NULL)
- addReplyDouble(c,score);
- else
- addReply(c,shared.nullbulk);
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- dictEntry *de;
-
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- de = dictFind(zs->dict,c->argv[2]);
- if (de != NULL) {
- score = *(double*)dictGetVal(de);
- addReplyDouble(c,score);
- } else {
- addReply(c,shared.nullbulk);
- }
+ if (zsetScore(zobj,c->argv[2]->ptr,&score) == C_ERR) {
+ addReply(c,shared.nullbulk);
} else {
- redisPanic("Unknown sorted set encoding");
+ addReplyDouble(c,score);
}
}
-void zrankGenericCommand(redisClient *c, int reverse) {
+void zrankGenericCommand(client *c, int reverse) {
robj *key = c->argv[1];
robj *ele = c->argv[2];
robj *zobj;
- unsigned long llen;
- unsigned long rank;
+ long rank;
if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
- llen = zsetLength(zobj);
-
- redisAssertWithInfo(c,ele,sdsEncodedObject(ele));
+ checkType(c,zobj,OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = zobj->ptr;
- unsigned char *eptr, *sptr;
-
- eptr = ziplistIndex(zl,0);
- redisAssertWithInfo(c,zobj,eptr != NULL);
- sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(c,zobj,sptr != NULL);
-
- rank = 1;
- while(eptr != NULL) {
- if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr)))
- break;
- rank++;
- zzlNext(zl,&eptr,&sptr);
- }
-
- if (eptr != NULL) {
- if (reverse)
- addReplyLongLong(c,llen-rank);
- else
- addReplyLongLong(c,rank-1);
- } else {
- addReply(c,shared.nullbulk);
- }
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- zskiplist *zsl = zs->zsl;
- dictEntry *de;
- double score;
-
- ele = c->argv[2] = tryObjectEncoding(c->argv[2]);
- de = dictFind(zs->dict,ele);
- if (de != NULL) {
- score = *(double*)dictGetVal(de);
- rank = zslGetRank(zsl,score,ele);
- redisAssertWithInfo(c,ele,rank); /* Existing elements always have a rank. */
- if (reverse)
- addReplyLongLong(c,llen-rank);
- else
- addReplyLongLong(c,rank-1);
- } else {
- addReply(c,shared.nullbulk);
- }
+ serverAssertWithInfo(c,ele,sdsEncodedObject(ele));
+ rank = zsetRank(zobj,ele->ptr,reverse);
+ if (rank >= 0) {
+ addReplyLongLong(c,rank);
} else {
- redisPanic("Unknown sorted set encoding");
+ addReply(c,shared.nullbulk);
}
}
-void zrankCommand(redisClient *c) {
+void zrankCommand(client *c) {
zrankGenericCommand(c, 0);
}
-void zrevrankCommand(redisClient *c) {
+void zrevrankCommand(client *c) {
zrankGenericCommand(c, 1);
}
-void zscanCommand(redisClient *c) {
+void zscanCommand(client *c) {
robj *o;
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,o,REDIS_ZSET)) return;
+ checkType(c,o,OBJ_ZSET)) return;
scanGenericCommand(c,o,cursor);
}
diff --git a/src/util.c b/src/util.c
index a0bb2b84c..8d68f0bb1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -38,8 +38,10 @@
#include <sys/time.h>
#include <float.h>
#include <stdint.h>
+#include <errno.h>
#include "util.h"
+#include "sha1.h"
/* Glob-style pattern matching. */
int stringmatchlen(const char *pattern, int patternLen,
@@ -169,11 +171,12 @@ int stringmatch(const char *pattern, const char *string, int nocase) {
}
/* Convert a string representing an amount of memory into the number of
- * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
+ * bytes, so for instance memtoll("1Gb") will return 1073741824 that is
* (1024*1024*1024).
*
* On parsing error, if *err is not NULL, it's set to 1, otherwise it's
- * set to 0 */
+ * set to 0. On error the function return value is 0, regardless of the
+ * fact 'err' is NULL or not. */
long long memtoll(const char *p, int *err) {
const char *u;
char buf[128];
@@ -182,6 +185,7 @@ long long memtoll(const char *p, int *err) {
unsigned int digits;
if (err) *err = 0;
+
/* Search the first non digit character. */
u = p;
if (*u == '-') u++;
@@ -202,16 +206,26 @@ long long memtoll(const char *p, int *err) {
mul = 1024L*1024*1024;
} else {
if (err) *err = 1;
- mul = 1;
+ return 0;
}
+
+ /* Copy the digits into a buffer, we'll use strtoll() to convert
+ * the digit (without the unit) into a number. */
digits = u-p;
if (digits >= sizeof(buf)) {
if (err) *err = 1;
- return LLONG_MAX;
+ return 0;
}
memcpy(buf,p,digits);
buf[digits] = '\0';
- val = strtoll(buf,NULL,10);
+
+ char *endptr;
+ errno = 0;
+ val = strtoll(buf,&endptr,10);
+ if ((val == 0 && errno == EINVAL) || *endptr != '\0') {
+ if (err) *err = 1;
+ return 0;
+ }
return val*mul;
}
@@ -237,6 +251,18 @@ uint32_t digits10(uint64_t v) {
return 12 + digits10(v / 1000000000000UL);
}
+/* Like digits10() but for signed values. */
+uint32_t sdigits10(int64_t v) {
+ if (v < 0) {
+ /* Abs value of LLONG_MIN requires special handling. */
+ uint64_t uv = (v != LLONG_MIN) ?
+ (uint64_t)-v : ((uint64_t) LLONG_MAX)+1;
+ return digits10(uv)+1; /* +1 for the minus. */
+ } else {
+ return digits10(v);
+ }
+}
+
/* Convert a long long into a string. Returns the number of
* characters needed to represent the number.
* If the buffer is not big enough to store the string, 0 is returned.
@@ -248,7 +274,7 @@ uint32_t digits10(uint64_t v) {
*
* Modified in order to handle signed integers since the original code was
* designed for unsigned integers. */
-int ll2string(char* dst, size_t dstlen, long long svalue) {
+int ll2string(char *dst, size_t dstlen, long long svalue) {
static const char digits[201] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
@@ -261,7 +287,11 @@ int ll2string(char* dst, size_t dstlen, long long svalue) {
/* The main loop works with 64bit unsigned integers for simplicity, so
* we convert the number here and remember if it is negative. */
if (svalue < 0) {
- value = -svalue;
+ if (svalue != LLONG_MIN) {
+ value = -svalue;
+ } else {
+ value = ((unsigned long long) LLONG_MAX)+1;
+ }
negative = 1;
} else {
value = svalue;
@@ -300,7 +330,16 @@ int ll2string(char* dst, size_t dstlen, long long svalue) {
/* Convert a string into a long long. Returns 1 if the string could be parsed
* into a (non-overflowing) long long, 0 otherwise. The value will be set to
- * the parsed value when appropriate. */
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a long long: no spaces or other characters before or after the string
+ * representing the number are accepted, nor zeroes at the start if not
+ * for the string "0" representing the zero number.
+ *
+ * Because of its strictness, it is safe to use this function to check if
+ * you can convert a string into a long long, and obtain back the string
+ * from the number without any loss in the string representation. */
int string2ll(const char *s, size_t slen, long long *value) {
const char *p = s;
size_t plen = 0;
@@ -380,8 +419,40 @@ int string2l(const char *s, size_t slen, long *lval) {
return 1;
}
+/* Convert a string into a double. Returns 1 if the string could be parsed
+ * into a (non-overflowing) double, 0 otherwise. The value will be set to
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a double: no spaces or other characters before or after the string
+ * representing the number are accepted. */
+int string2ld(const char *s, size_t slen, long double *dp) {
+ char buf[256];
+ long double value;
+ char *eptr;
+
+ if (slen >= sizeof(buf)) return 0;
+ memcpy(buf,s,slen);
+ buf[slen] = '\0';
+
+ errno = 0;
+ value = strtold(buf, &eptr);
+ if (isspace(buf[0]) || eptr[0] != '\0' ||
+ (errno == ERANGE &&
+ (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
+ errno == EINVAL ||
+ isnan(value))
+ return 0;
+
+ if (dp) *dp = value;
+ return 1;
+}
+
/* Convert a double to a string representation. Returns the number of bytes
- * required. The representation should always be parsable by stdtod(3). */
+ * required. The representation should always be parsable by strtod(3).
+ * This function does not support human-friendly formatting like ld2string
+ * does. It is intented mainly to be used inside t_zset.c when writing scores
+ * into a ziplist representing a sorted set. */
int d2string(char *buf, size_t len, double value) {
if (isnan(value)) {
len = snprintf(buf,len,"nan");
@@ -419,16 +490,95 @@ int d2string(char *buf, size_t len, double value) {
return len;
}
+/* Convert a long double into a string. If humanfriendly is non-zero
+ * it does not use exponential format and trims trailing zeroes at the end,
+ * however this results in loss of precision. Otherwise exp format is used
+ * and the output of snprintf() is not modified.
+ *
+ * The function returns the length of the string or zero if there was not
+ * enough buffer room to store it. */
+int ld2string(char *buf, size_t len, long double value, int humanfriendly) {
+ size_t l;
+
+ if (isinf(value)) {
+ /* Libc in odd systems (Hi Solaris!) will format infinite in a
+ * different way, so better to handle it in an explicit way. */
+ if (len < 5) return 0; /* No room. 5 is "-inf\0" */
+ if (value > 0) {
+ memcpy(buf,"inf",3);
+ l = 3;
+ } else {
+ memcpy(buf,"-inf",4);
+ l = 4;
+ }
+ } else if (humanfriendly) {
+ /* We use 17 digits precision since with 128 bit floats that precision
+ * after rounding is able to represent most small decimal numbers in a
+ * way that is "non surprising" for the user (that is, most small
+ * decimal numbers will be represented in a way that when converted
+ * back into a string are exactly the same as what the user typed.) */
+ l = snprintf(buf,len,"%.17Lf", value);
+ if (l+1 > len) return 0; /* No room. */
+ /* Now remove trailing zeroes after the '.' */
+ if (strchr(buf,'.') != NULL) {
+ char *p = buf+l-1;
+ while(*p == '0') {
+ p--;
+ l--;
+ }
+ if (*p == '.') l--;
+ }
+ } else {
+ l = snprintf(buf,len,"%.17Lg", value);
+ if (l+1 > len) return 0; /* No room. */
+ }
+ buf[l] = '\0';
+ return l;
+}
+
/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a
* given execution of Redis, so that if you are talking with an instance
* having run_id == A, and you reconnect and it has run_id == B, you can be
* sure that it is either a different instance or it was restarted. */
void getRandomHexChars(char *p, unsigned int len) {
- FILE *fp = fopen("/dev/urandom","r");
char *charset = "0123456789abcdef";
unsigned int j;
- if (fp == NULL || fread(p,len,1,fp) == 0) {
+ /* Global state. */
+ static int seed_initialized = 0;
+ static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */
+ static uint64_t counter = 0; /* The counter we hash with the seed. */
+
+ if (!seed_initialized) {
+ /* Initialize a seed and use SHA1 in counter mode, where we hash
+ * the same seed with a progressive counter. For the goals of this
+ * function we just need non-colliding strings, there are no
+ * cryptographic security needs. */
+ FILE *fp = fopen("/dev/urandom","r");
+ if (fp && fread(seed,sizeof(seed),1,fp) == 1)
+ seed_initialized = 1;
+ if (fp) fclose(fp);
+ }
+
+ if (seed_initialized) {
+ while(len) {
+ unsigned char digest[20];
+ SHA1_CTX ctx;
+ unsigned int copylen = len > 20 ? 20 : len;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, seed, sizeof(seed));
+ SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter));
+ SHA1Final(digest, &ctx);
+ counter++;
+
+ memcpy(p,digest,copylen);
+ /* Convert to hex digits. */
+ for (j = 0; j < copylen; j++) p[j] = charset[p[j] & 0x0F];
+ len -= copylen;
+ p += copylen;
+ }
+ } else {
/* If we can't read from /dev/urandom, do some reasonable effort
* in order to create some entropy, since this function is used to
* generate run_id and cluster instance IDs */
@@ -455,14 +605,12 @@ void getRandomHexChars(char *p, unsigned int len) {
x += sizeof(pid);
}
/* Finally xor it with rand() output, that was already seeded with
- * time() at startup. */
- for (j = 0; j < len; j++)
+ * time() at startup, and convert to hex digits. */
+ for (j = 0; j < len; j++) {
p[j] ^= rand();
+ p[j] = charset[p[j] & 0x0F];
+ }
}
- /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */
- for (j = 0; j < len; j++)
- p[j] = charset[p[j] & 0x0F];
- if (fp) fclose(fp);
}
/* Given the filename, return the absolute path as an SDS string, or NULL
@@ -525,10 +673,10 @@ int pathIsBaseName(char *path) {
return strchr(path,'/') == NULL && strchr(path,'\\') == NULL;
}
-#ifdef UTIL_TEST_MAIN
+#ifdef REDIS_TEST
#include <assert.h>
-void test_string2ll(void) {
+static void test_string2ll(void) {
char buf[32];
long long v;
@@ -583,7 +731,7 @@ void test_string2ll(void) {
assert(string2ll(buf,strlen(buf),&v) == 0);
}
-void test_string2l(void) {
+static void test_string2l(void) {
char buf[32];
long v;
@@ -632,9 +780,55 @@ void test_string2l(void) {
#endif
}
-int main(int argc, char **argv) {
+static void test_ll2string(void) {
+ char buf[32];
+ long long v;
+ int sz;
+
+ v = 0;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 1);
+ assert(!strcmp(buf, "0"));
+
+ v = -1;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "-1"));
+
+ v = 99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 2);
+ assert(!strcmp(buf, "99"));
+
+ v = -99;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 3);
+ assert(!strcmp(buf, "-99"));
+
+ v = -2147483648;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 11);
+ assert(!strcmp(buf, "-2147483648"));
+
+ v = LLONG_MIN;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 20);
+ assert(!strcmp(buf, "-9223372036854775808"));
+
+ v = LLONG_MAX;
+ sz = ll2string(buf, sizeof buf, v);
+ assert(sz == 19);
+ assert(!strcmp(buf, "9223372036854775807"));
+}
+
+#define UNUSED(x) (void)(x)
+int utilTest(int argc, char **argv) {
+ UNUSED(argc);
+ UNUSED(argv);
+
test_string2ll();
test_string2l();
+ test_ll2string();
return 0;
}
#endif
diff --git a/src/util.h b/src/util.h
index b3667cd6f..d7784495b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -30,16 +30,25 @@
#ifndef __REDIS_UTIL_H
#define __REDIS_UTIL_H
+#include <stdint.h>
#include "sds.h"
int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase);
int stringmatch(const char *p, const char *s, int nocase);
long long memtoll(const char *p, int *err);
+uint32_t digits10(uint64_t v);
+uint32_t sdigits10(int64_t v);
int ll2string(char *s, size_t len, long long value);
int string2ll(const char *s, size_t slen, long long *value);
int string2l(const char *s, size_t slen, long *value);
+int string2ld(const char *s, size_t slen, long double *dp);
int d2string(char *buf, size_t len, double value);
+int ld2string(char *buf, size_t len, long double value, int humanfriendly);
sds getAbsolutePath(char *filename);
int pathIsBaseName(char *path);
+#ifdef REDIS_TEST
+int utilTest(int argc, char **argv);
+#endif
+
#endif
diff --git a/src/version.h b/src/version.h
index 00cbae681..eb65e9bbd 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define REDIS_VERSION "2.9.999"
+#define REDIS_VERSION "999.999.999"
diff --git a/src/ziplist.c b/src/ziplist.c
index d78f8f5da..e407937ff 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -8,72 +8,150 @@
*
* ----------------------------------------------------------------------------
*
- * ZIPLIST OVERALL LAYOUT:
+ * ZIPLIST OVERALL LAYOUT
+ * ======================
+ *
* The general layout of the ziplist is as follows:
- * <zlbytes><zltail><zllen><entry><entry><zlend>
*
- * <zlbytes> is an unsigned integer to hold the number of bytes that the
- * ziplist occupies. This value needs to be stored to be able to resize the
- * entire structure without the need to traverse it first.
+ * <zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>
+ *
+ * NOTE: all fields are stored in little endian, if not specified otherwise.
+ *
+ * <uint32_t zlbytes> is an unsigned integer to hold the number of bytes that
+ * the ziplist occupies, including the four bytes of the zlbytes field itself.
+ * This value needs to be stored to be able to resize the entire structure
+ * without the need to traverse it first.
*
- * <zltail> is the offset to the last entry in the list. This allows a pop
- * operation on the far side of the list without the need for full traversal.
+ * <uint32_t zltail> is the offset to the last entry in the list. This allows
+ * a pop operation on the far side of the list without the need for full
+ * traversal.
*
- * <zllen> is the number of entries.When this value is larger than 2**16-2,
- * we need to traverse the entire list to know how many items it holds.
+ * <uint16_t zllen> is the number of entries. When there are more than
+ * 2^16-2 entires, this value is set to 2^16-1 and we need to traverse the
+ * entire list to know how many items it holds.
*
- * <zlend> is a single byte special value, equal to 255, which indicates the
- * end of the list.
+ * <uint8_t zlend> is a special entry representing the end of the ziplist.
+ * Is encoded as a single byte equal to 255. No other normal entry starts
+ * with a byte set to the value of 255.
*
- * ZIPLIST ENTRIES:
- * Every entry in the ziplist is prefixed by a header that contains two pieces
+ * ZIPLIST ENTRIES
+ * ===============
+ *
+ * Every entry in the ziplist is prefixed by metadata that contains two pieces
* of information. First, the length of the previous entry is stored to be
- * able to traverse the list from back to front. Second, the encoding with an
- * optional string length of the entry itself is stored.
+ * able to traverse the list from back to front. Second, the entry encoding is
+ * provided. It represents the entry type, integer or string, and in the case
+ * of strings it also represents the length of the string payload.
+ * So a complete entry is stored like this:
+ *
+ * <prevlen> <encoding> <entry-data>
+ *
+ * Sometimes the encoding represents the entry itself, like for small integers
+ * as we'll see later. In such a case the <entry-data> part is missing, and we
+ * could have just:
+ *
+ * <prevlen> <encoding>
+ *
+ * The length of the previous entry, <prevlen>, is encoded in the following way:
+ * If this length is smaller than 255 bytes, it will only consume a single
+ * byte representing the length as an unsinged 8 bit integer. When the length
+ * is greater than or equal to 255, it will consume 5 bytes. The first byte is
+ * set to 255 (FF) to indicate a larger value is following. The remaining 4
+ * bytes take the length of the previous entry as value.
+ *
+ * So practically an entry is encoded in the following way:
+ *
+ * <prevlen from 0 to 254> <encoding> <entry>
*
- * The length of the previous entry is encoded in the following way:
- * If this length is smaller than 254 bytes, it will only consume a single
- * byte that takes the length as value. When the length is greater than or
- * equal to 254, it will consume 5 bytes. The first byte is set to 254 to
- * indicate a larger value is following. The remaining 4 bytes take the
- * length of the previous entry as value.
+ * Or alternatively if the previous entry length is greater than 254 bytes
+ * the following encoding is used:
*
- * The other header field of the entry itself depends on the contents of the
- * entry. When the entry is a string, the first 2 bits of this header will hold
- * the type of encoding used to store the length of the string, followed by the
- * actual length of the string. When the entry is an integer the first 2 bits
- * are both set to 1. The following 2 bits are used to specify what kind of
- * integer will be stored after this header. An overview of the different
- * types and encodings is as follows:
+ * 0xFF <4 bytes unsigned little endian prevlen> <encoding> <entry>
+ *
+ * The encoding field of the entry depends on the content of the
+ * entry. When the entry is a string, the first 2 bits of the encoding first
+ * byte will hold the type of encoding used to store the length of the string,
+ * followed by the actual length of the string. When the entry is an integer
+ * the first 2 bits are both set to 1. The following 2 bits are used to specify
+ * what kind of integer will be stored after this header. An overview of the
+ * different types and encodings is as follows. The first byte is always enough
+ * to determine the kind of entry.
*
* |00pppppp| - 1 byte
* String value with length less than or equal to 63 bytes (6 bits).
+ * "pppppp" represents the unsigned 6 bit length.
* |01pppppp|qqqqqqqq| - 2 bytes
* String value with length less than or equal to 16383 bytes (14 bits).
- * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
+ * IMPORTANT: The 14 bit number is stored in big endian.
+ * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
* String value with length greater than or equal to 16384 bytes.
- * |11000000| - 1 byte
+ * Only the 4 bytes following the first byte represents the length
+ * up to 32^2-1. The 6 lower bits of the first byte are not used and
+ * are set to zero.
+ * IMPORTANT: The 32 bit number is stored in big endian.
+ * |11000000| - 3 bytes
* Integer encoded as int16_t (2 bytes).
- * |11010000| - 1 byte
+ * |11010000| - 5 bytes
* Integer encoded as int32_t (4 bytes).
- * |11100000| - 1 byte
+ * |11100000| - 9 bytes
* Integer encoded as int64_t (8 bytes).
- * |11110000| - 1 byte
+ * |11110000| - 4 bytes
* Integer encoded as 24 bit signed (3 bytes).
- * |11111110| - 1 byte
+ * |11111110| - 2 bytes
* Integer encoded as 8 bit signed (1 byte).
* |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.
* Unsigned integer from 0 to 12. The encoded value is actually from
* 1 to 13 because 0000 and 1111 can not be used, so 1 should be
* subtracted from the encoded 4 bit value to obtain the right value.
- * |11111111| - End of ziplist.
+ * |11111111| - End of ziplist special entry.
+ *
+ * Like for the ziplist header, all the integers are represented in little
+ * endian byte order, even when this code is compiled in big endian systems.
+ *
+ * EXAMPLES OF ACTUAL ZIPLISTS
+ * ===========================
+ *
+ * The following is a ziplist containing the two elements representing
+ * the strings "2" and "5". It is composed of 15 bytes, that we visually
+ * split into sections:
+ *
+ * [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff]
+ * | | | | | |
+ * zlbytes zltail entries "2" "5" end
*
- * All the integers are represented in little endian byte order.
+ * The first 4 bytes represent the number 15, that is the number of bytes
+ * the whole ziplist is composed of. The second 4 bytes are the offset
+ * at which the last ziplist entry is found, that is 12, in fact the
+ * last entry, that is "5", is at offset 12 inside the ziplist.
+ * The next 16 bit integer represents the number of elements inside the
+ * ziplist, its value is 2 since there are just two elements inside.
+ * Finally "00 f3" is the first entry representing the number 2. It is
+ * composed of the previous entry length, which is zero because this is
+ * our first entry, and the byte F3 which corresponds to the encoding
+ * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the "F"
+ * higher order bits 1111, and subtract 1 from the "3", so the entry value
+ * is "2". The next entry has a prevlen of 02, since the first entry is
+ * composed of exactly two bytes. The entry itself, F6, is encoded exactly
+ * like the first entry, and 6-1 = 5, so the value of the entry is 5.
+ * Finally the special entry FF signals the end of the ziplist.
+ *
+ * Adding another element to the above string with the value "Hello World"
+ * allows us to show how the ziplist encodes small strings. We'll just show
+ * the hex dump of the entry itself. Imagine the bytes as following the
+ * entry that stores "5" in the ziplist above:
+ *
+ * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64]
+ *
+ * The first byte, 02, is the length of the previous entry. The next
+ * byte represents the encoding in the pattern |00pppppp| that means
+ * that the entry is a string of length <pppppp>, so 0B means that
+ * an 11 bytes string follows. From the third byte (48) to the last (64)
+ * there are just the ASCII characters for "Hello World".
*
* ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-2017, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -112,8 +190,13 @@
#include "endianconv.h"
#include "redisassert.h"
-#define ZIP_END 255
-#define ZIP_BIGLEN 254
+#define ZIP_END 255 /* Special "end of ziplist" entry. */
+#define ZIP_BIG_PREVLEN 254 /* Max number of bytes of the previous entry, for
+ the "prevlen" field prefixing each entry, to be
+ represented with just a single byte. Otherwise
+ it is represented as FF AA BB CC DD, where
+ AA BB CC DD are a 4 bytes unsigned integer
+ representing the previous entry len. */
/* Different encoding/length possibilities */
#define ZIP_STR_MASK 0xc0
@@ -126,66 +209,127 @@
#define ZIP_INT_64B (0xc0 | 2<<4)
#define ZIP_INT_24B (0xc0 | 3<<4)
#define ZIP_INT_8B 0xfe
-/* 4 bit integer immediate encoding */
-#define ZIP_INT_IMM_MASK 0x0f
+
+/* 4 bit integer immediate encoding |1111xxxx| with xxxx between
+ * 0001 and 1101. */
+#define ZIP_INT_IMM_MASK 0x0f /* Mask to extract the 4 bits value. To add
+ one is needed to reconstruct the value. */
#define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */
#define ZIP_INT_IMM_MAX 0xfd /* 11111101 */
-#define ZIP_INT_IMM_VAL(v) (v & ZIP_INT_IMM_MASK)
#define INT24_MAX 0x7fffff
#define INT24_MIN (-INT24_MAX - 1)
-/* Macro to determine type */
+/* Macro to determine if the entry is a string. String entries never start
+ * with "11" as most significant bits of the first byte. */
#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
-/* Utility macros */
+/* Utility macros.*/
+
+/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
+
+/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
+
+/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
+ * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
+
+/* The size of a ziplist header: two 32 bit integers for the total
+ * bytes count and last item offset. One 16 bit integer for the number
+ * of items field. */
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
+
+/* Size of the "end of ziplist" entry. Just one byte. */
+#define ZIPLIST_END_SIZE (sizeof(uint8_t))
+
+/* Return the pointer to the first entry of a ziplist. */
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
+
+/* Return the pointer to the last entry of a ziplist, using the
+ * last entry offset inside the ziplist header. */
#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
+
+/* Return the pointer to the last byte of a ziplist, which is, the
+ * end of ziplist FF entry. */
#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
-/* We know a positive increment can only be 1 because entries can only be
- * pushed one at a time. */
+/* Increment the number of items field in the ziplist header. Note that this
+ * macro should never overflow the unsigned 16 bit integer, since entires are
+ * always pushed one at a time. When UINT16_MAX is reached we want the count
+ * to stay there to signal that a full scan is needed to get the number of
+ * items inside the ziplist. */
#define ZIPLIST_INCR_LENGTH(zl,incr) { \
if (ZIPLIST_LENGTH(zl) < UINT16_MAX) \
ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \
}
+/* We use this function to receive information about a ziplist entry.
+ * Note that this is not how the data is actually encoded, is just what we
+ * get filled by a function in order to operate more easily. */
typedef struct zlentry {
- unsigned int prevrawlensize, prevrawlen;
- unsigned int lensize, len;
- unsigned int headersize;
- unsigned char encoding;
- unsigned char *p;
+ unsigned int prevrawlensize; /* Bytes used to encode the previos entry len*/
+ unsigned int prevrawlen; /* Previous entry len. */
+ unsigned int lensize; /* Bytes used to encode this entry type/len.
+ For example strings have a 1, 2 or 5 bytes
+ header. Integers always use a single byte.*/
+ unsigned int len; /* Bytes used to represent the actual entry.
+ For strings this is just the string length
+ while for integers it is 1, 2, 3, 4, 8 or
+ 0 (for 4 bit immediate) depending on the
+ number range. */
+ unsigned int headersize; /* prevrawlensize + lensize. */
+ unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
+ the entry encoding. However for 4 bits
+ immediate integers this can assume a range
+ of values and must be range-checked. */
+ unsigned char *p; /* Pointer to the very start of the entry, that
+ is, this points to prev-entry-len field. */
} zlentry;
+#define ZIPLIST_ENTRY_ZERO(zle) { \
+ (zle)->prevrawlensize = (zle)->prevrawlen = 0; \
+ (zle)->lensize = (zle)->len = (zle)->headersize = 0; \
+ (zle)->encoding = 0; \
+ (zle)->p = NULL; \
+}
+
/* Extract the encoding from the byte pointed by 'ptr' and set it into
- * 'encoding'. */
+ * 'encoding' field of the zlentry structure. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
(encoding) = (ptr[0]); \
if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)
-/* Return bytes needed to store integer encoded by 'encoding' */
-static unsigned int zipIntSize(unsigned char encoding) {
+/* Return bytes needed to store integer encoded by 'encoding'. */
+unsigned int zipIntSize(unsigned char encoding) {
switch(encoding) {
case ZIP_INT_8B: return 1;
case ZIP_INT_16B: return 2;
case ZIP_INT_24B: return 3;
case ZIP_INT_32B: return 4;
case ZIP_INT_64B: return 8;
- default: return 0; /* 4 bit immediate */
}
- assert(NULL);
+ if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
+ return 0; /* 4 bit immediate */
+ panic("Invalid integer encoding 0x%02X", encoding);
return 0;
}
-/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
- * the amount of bytes required to encode such a length. */
-static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
+/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns
+ * the amount of bytes required to encode such a length. Arguments:
+ *
+ * 'encoding' is the encoding we are using for the entry. It could be
+ * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
+ * for single-byte small immediate integers.
+ *
+ * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
+ * srting that this entry represents.
+ *
+ * The function returns the number of bytes used by the encoding/length
+ * header stored in 'p'. */
+unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
unsigned char len = 1, buf[5];
if (ZIP_IS_STR(encoding)) {
@@ -214,15 +358,16 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
buf[0] = encoding;
}
- /* Store this length at p */
+ /* Store this length at p. */
memcpy(p,buf,len);
return len;
}
-/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the
- * entries encoding, the 'lensize' variable will hold the number of bytes
- * required to encode the entries length, and the 'len' variable will hold the
- * entries length. */
+/* Decode the entry encoding type and data length (string length for strings,
+ * number of bytes used for the integer for integer entries) encoded in 'ptr'.
+ * The 'encoding' variable will hold the entry encoding, the 'lensize'
+ * variable will hold the number of bytes required to encode the entry
+ * length, and the 'len' variable will hold the entry length. */
#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \
ZIP_ENTRY_ENCODING((ptr), (encoding)); \
if ((encoding) < ZIP_STR_MASK) { \
@@ -232,14 +377,14 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
} else if ((encoding) == ZIP_STR_14B) { \
(lensize) = 2; \
(len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \
- } else if (encoding == ZIP_STR_32B) { \
+ } else if ((encoding) == ZIP_STR_32B) { \
(lensize) = 5; \
(len) = ((ptr)[1] << 24) | \
((ptr)[2] << 16) | \
((ptr)[3] << 8) | \
((ptr)[4]); \
} else { \
- assert(NULL); \
+ panic("Invalid string encoding 0x%02X", (encoding)); \
} \
} else { \
(lensize) = 1; \
@@ -247,45 +392,49 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
} \
} while(0);
+/* Encode the length of the previous entry and write it to "p". This only
+ * uses the larger encoding (required in __ziplistCascadeUpdate). */
+int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
+ if (p != NULL) {
+ p[0] = ZIP_BIG_PREVLEN;
+ memcpy(p+1,&len,sizeof(len));
+ memrev32ifbe(p+1);
+ }
+ return 1+sizeof(len);
+}
+
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
-static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) {
+unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
- return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1;
+ return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
} else {
- if (len < ZIP_BIGLEN) {
+ if (len < ZIP_BIG_PREVLEN) {
p[0] = len;
return 1;
} else {
- p[0] = ZIP_BIGLEN;
- memcpy(p+1,&len,sizeof(len));
- memrev32ifbe(p+1);
- return 1+sizeof(len);
+ return zipStorePrevEntryLengthLarge(p,len);
}
}
}
-/* Encode the length of the previous entry and write it to "p". This only
- * uses the larger encoding (required in __ziplistCascadeUpdate). */
-static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
- if (p == NULL) return;
- p[0] = ZIP_BIGLEN;
- memcpy(p+1,&len,sizeof(len));
- memrev32ifbe(p+1);
-}
-
-/* Decode the number of bytes required to store the length of the previous
- * element, from the perspective of the entry pointed to by 'ptr'. */
+/* Return the number of bytes used to encode the length of the previous
+ * entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
- if ((ptr)[0] < ZIP_BIGLEN) { \
+ if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
(prevlensize) = 1; \
} else { \
(prevlensize) = 5; \
} \
} while(0);
-/* Decode the length of the previous element, from the perspective of the entry
- * pointed to by 'ptr'. */
+/* Return the length of the previous element, and the number of bytes that
+ * are used in order to encode the previous element length.
+ * 'ptr' must point to the prevlen prefix of an entry (that encodes the
+ * length of the previos entry in order to navigate the elements backward).
+ * The length of the previous entry is stored in 'prevlen', the number of
+ * bytes needed to encode the previous entry length are stored in
+ * 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
if ((prevlensize) == 1) { \
@@ -297,16 +446,29 @@ static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
} \
} while(0);
-/* Return the difference in number of bytes needed to store the length of the
- * previous element 'len', in the entry pointed to by 'p'. */
-static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
+/* Given a pointer 'p' to the prevlen info that prefixes an entry, this
+ * function returns the difference in number of bytes needed to encode
+ * the prevlen if the previous entry changes of size.
+ *
+ * So if A is the number of bytes used right now to encode the 'prevlen'
+ * field.
+ *
+ * And B is the number of bytes that are needed in order to encode the
+ * 'prevlen' if the previous element will be updated to one of size 'len'.
+ *
+ * Then the function returns B - A
+ *
+ * So the function returns a positive number if more space is needed,
+ * a negative number if less space is needed, or zero if the same space
+ * is needed. */
+int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
unsigned int prevlensize;
ZIP_DECODE_PREVLENSIZE(p, prevlensize);
- return zipPrevEncodeLength(NULL, len) - prevlensize;
+ return zipStorePrevEntryLength(NULL, len) - prevlensize;
}
/* Return the total number of bytes used by the entry pointed to by 'p'. */
-static unsigned int zipRawEntryLength(unsigned char *p) {
+unsigned int zipRawEntryLength(unsigned char *p) {
unsigned int prevlensize, encoding, lensize, len;
ZIP_DECODE_PREVLENSIZE(p, prevlensize);
ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
@@ -315,7 +477,7 @@ static unsigned int zipRawEntryLength(unsigned char *p) {
/* Check if string pointed to by 'entry' can be encoded as an integer.
* Stores the integer value in 'v' and its encoding in 'encoding'. */
-static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
+int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;
if (entrylen >= 32 || entrylen == 0) return 0;
@@ -342,7 +504,7 @@ static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long
}
/* Store integer 'value' at 'p', encoded as 'encoding' */
-static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
+void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64;
@@ -372,7 +534,7 @@ static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encodi
}
/* Read integer encoded as 'encoding' from 'p' */
-static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
+int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64, ret = 0;
@@ -404,14 +566,12 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
}
/* Return a struct with all information about an entry. */
-static zlentry zipEntry(unsigned char *p) {
- zlentry e;
-
- ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen);
- ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len);
- e.headersize = e.prevrawlensize + e.lensize;
- e.p = p;
- return e;
+void zipEntry(unsigned char *p, zlentry *e) {
+
+ ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
+ ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
+ e->headersize = e->prevrawlensize + e->lensize;
+ e->p = p;
}
/* Create a new empty ziplist. */
@@ -426,7 +586,7 @@ unsigned char *ziplistNew(void) {
}
/* Resize the ziplist. */
-static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
+unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
zl = zrealloc(zl,len);
ZIPLIST_BYTES(zl) = intrev32ifbe(len);
zl[len-1] = ZIP_END;
@@ -441,8 +601,8 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
* causes a realloc and memmove). However, encoding the prevlen may require
* that this entry is grown as well. This effect may cascade throughout
* the ziplist when there are consecutive entries with a size close to
- * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every
- * consecutive entry.
+ * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
+ * every consecutive entry.
*
* Note that this effect can also happen in reverse, where the bytes required
* to encode the prevlen field can shrink. This effect is deliberately ignored,
@@ -453,20 +613,20 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
*
* The pointer "p" points to the first entry that does NOT need to be
* updated, i.e. consecutive fields MAY need an update. */
-static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
+unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
size_t offset, noffset, extra;
unsigned char *np;
zlentry cur, next;
while (p[0] != ZIP_END) {
- cur = zipEntry(p);
+ zipEntry(p, &cur);
rawlen = cur.headersize + cur.len;
- rawlensize = zipPrevEncodeLength(NULL,rawlen);
+ rawlensize = zipStorePrevEntryLength(NULL,rawlen);
/* Abort if there is no next entry. */
if (p[rawlen] == ZIP_END) break;
- next = zipEntry(p+rawlen);
+ zipEntry(p+rawlen, &next);
/* Abort when "prevlen" has not changed. */
if (next.prevrawlen == rawlen) break;
@@ -493,7 +653,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
memmove(np+rawlensize,
np+next.prevrawlensize,
curlen-noffset-next.prevrawlensize-1);
- zipPrevEncodeLength(np,rawlen);
+ zipStorePrevEntryLength(np,rawlen);
/* Advance the cursor */
p += rawlen;
@@ -502,9 +662,9 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
if (next.prevrawlensize > rawlensize) {
/* This would result in shrinking, which we want to avoid.
* So, set "rawlen" in the available bytes. */
- zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
+ zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
} else {
- zipPrevEncodeLength(p+rawlen,rawlen);
+ zipStorePrevEntryLength(p+rawlen,rawlen);
}
/* Stop here, as the raw length of "next" has not changed. */
@@ -515,19 +675,19 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
}
/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
-static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
+unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
size_t offset;
int nextdiff = 0;
zlentry first, tail;
- first = zipEntry(p);
+ zipEntry(p, &first);
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
}
- totlen = p-first.p;
+ totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
if (totlen > 0) {
if (p[0] != ZIP_END) {
/* Storing `prevrawlen` in this entry may increase or decrease the
@@ -535,8 +695,13 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
* There always is room to store this, because it was previously
* stored by an entry that is now being deleted. */
nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
+
+ /* Note that there is always space when p jumps backward: if
+ * the new previous entry is large, one of the deleted elements
+ * had a 5 bytes prevlen header, so there is for sure at least
+ * 5 bytes free and we need just 4. */
p -= nextdiff;
- zipPrevEncodeLength(p,first.prevrawlen);
+ zipStorePrevEntryLength(p,first.prevrawlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
@@ -545,7 +710,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p);
+ zipEntry(p, &tail);
if (p[tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -575,20 +740,20 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
}
/* Insert item at "p". */
-static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
- size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, prevlen = 0;
+unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
+ size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
+ unsigned int prevlensize, prevlen = 0;
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
- zlentry entry, tail;
+ zlentry tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
- entry = zipEntry(p);
- prevlen = entry.prevrawlen;
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
@@ -601,19 +766,24 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
- /* 'encoding' is untouched, however zipEncodeLength will use the
+ /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
- reqlen += zipPrevEncodeLength(NULL,prevlen);
- reqlen += zipEncodeLength(NULL,encoding,slen);
+ reqlen += zipStorePrevEntryLength(NULL,prevlen);
+ reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
+ int forcelarge = 0;
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
+ if (nextdiff == -4 && reqlen < 4) {
+ nextdiff = 0;
+ forcelarge = 1;
+ }
/* Store offset because a realloc may change the address of zl. */
offset = p-zl;
@@ -626,7 +796,10 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
- zipPrevEncodeLength(p+reqlen,reqlen);
+ if (forcelarge)
+ zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
+ else
+ zipStorePrevEntryLength(p+reqlen,reqlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
@@ -635,7 +808,7 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
/* When the tail contains more than one entry, we need to take
* "nextdiff" in account as well. Otherwise, a change in the
* size of prevlen doesn't have an effect on the *tail* offset. */
- tail = zipEntry(p+reqlen);
+ zipEntry(p+reqlen, &tail);
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
@@ -654,8 +827,8 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
}
/* Write the entry */
- p += zipPrevEncodeLength(p,prevlen);
- p += zipEncodeLength(p,encoding,slen);
+ p += zipStorePrevEntryLength(p,prevlen);
+ p += zipStoreEntryEncoding(p,encoding,slen);
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
@@ -665,6 +838,121 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
return zl;
}
+/* Merge ziplists 'first' and 'second' by appending 'second' to 'first'.
+ *
+ * NOTE: The larger ziplist is reallocated to contain the new merged ziplist.
+ * Either 'first' or 'second' can be used for the result. The parameter not
+ * used will be free'd and set to NULL.
+ *
+ * After calling this function, the input parameters are no longer valid since
+ * they are changed and free'd in-place.
+ *
+ * The result ziplist is the contents of 'first' followed by 'second'.
+ *
+ * On failure: returns NULL if the merge is impossible.
+ * On success: returns the merged ziplist (which is expanded version of either
+ * 'first' or 'second', also frees the other unused input ziplist, and sets the
+ * input ziplist argument equal to newly reallocated ziplist return value. */
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second) {
+ /* If any params are null, we can't merge, so NULL. */
+ if (first == NULL || *first == NULL || second == NULL || *second == NULL)
+ return NULL;
+
+ /* Can't merge same list into itself. */
+ if (*first == *second)
+ return NULL;
+
+ size_t first_bytes = intrev32ifbe(ZIPLIST_BYTES(*first));
+ size_t first_len = intrev16ifbe(ZIPLIST_LENGTH(*first));
+
+ size_t second_bytes = intrev32ifbe(ZIPLIST_BYTES(*second));
+ size_t second_len = intrev16ifbe(ZIPLIST_LENGTH(*second));
+
+ int append;
+ unsigned char *source, *target;
+ size_t target_bytes, source_bytes;
+ /* Pick the largest ziplist so we can resize easily in-place.
+ * We must also track if we are now appending or prepending to
+ * the target ziplist. */
+ if (first_len >= second_len) {
+ /* retain first, append second to first. */
+ target = *first;
+ target_bytes = first_bytes;
+ source = *second;
+ source_bytes = second_bytes;
+ append = 1;
+ } else {
+ /* else, retain second, prepend first to second. */
+ target = *second;
+ target_bytes = second_bytes;
+ source = *first;
+ source_bytes = first_bytes;
+ append = 0;
+ }
+
+ /* Calculate final bytes (subtract one pair of metadata) */
+ size_t zlbytes = first_bytes + second_bytes -
+ ZIPLIST_HEADER_SIZE - ZIPLIST_END_SIZE;
+ size_t zllength = first_len + second_len;
+
+ /* Combined zl length should be limited within UINT16_MAX */
+ zllength = zllength < UINT16_MAX ? zllength : UINT16_MAX;
+
+ /* Save offset positions before we start ripping memory apart. */
+ size_t first_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*first));
+ size_t second_offset = intrev32ifbe(ZIPLIST_TAIL_OFFSET(*second));
+
+ /* Extend target to new zlbytes then append or prepend source. */
+ target = zrealloc(target, zlbytes);
+ if (append) {
+ /* append == appending to target */
+ /* Copy source after target (copying over original [END]):
+ * [TARGET - END, SOURCE - HEADER] */
+ memcpy(target + target_bytes - ZIPLIST_END_SIZE,
+ source + ZIPLIST_HEADER_SIZE,
+ source_bytes - ZIPLIST_HEADER_SIZE);
+ } else {
+ /* !append == prepending to target */
+ /* Move target *contents* exactly size of (source - [END]),
+ * then copy source into vacataed space (source - [END]):
+ * [SOURCE - END, TARGET - HEADER] */
+ memmove(target + source_bytes - ZIPLIST_END_SIZE,
+ target + ZIPLIST_HEADER_SIZE,
+ target_bytes - ZIPLIST_HEADER_SIZE);
+ memcpy(target, source, source_bytes - ZIPLIST_END_SIZE);
+ }
+
+ /* Update header metadata. */
+ ZIPLIST_BYTES(target) = intrev32ifbe(zlbytes);
+ ZIPLIST_LENGTH(target) = intrev16ifbe(zllength);
+ /* New tail offset is:
+ * + N bytes of first ziplist
+ * - 1 byte for [END] of first ziplist
+ * + M bytes for the offset of the original tail of the second ziplist
+ * - J bytes for HEADER because second_offset keeps no header. */
+ ZIPLIST_TAIL_OFFSET(target) = intrev32ifbe(
+ (first_bytes - ZIPLIST_END_SIZE) +
+ (second_offset - ZIPLIST_HEADER_SIZE));
+
+ /* __ziplistCascadeUpdate just fixes the prev length values until it finds a
+ * correct prev length value (then it assumes the rest of the list is okay).
+ * We tell CascadeUpdate to start at the first ziplist's tail element to fix
+ * the merge seam. */
+ target = __ziplistCascadeUpdate(target, target+first_offset);
+
+ /* Now free and NULL out what we didn't realloc */
+ if (append) {
+ zfree(*second);
+ *second = NULL;
+ *first = target;
+ } else {
+ zfree(*first);
+ *first = NULL;
+ *second = target;
+ }
+ return target;
+}
+
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
unsigned char *p;
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
@@ -676,15 +964,15 @@ unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int sle
* doesn't contain an element at the provided index, NULL is returned. */
unsigned char *ziplistIndex(unsigned char *zl, int index) {
unsigned char *p;
- zlentry entry;
+ unsigned int prevlensize, prevlen = 0;
if (index < 0) {
index = (-index)-1;
p = ZIPLIST_ENTRY_TAIL(zl);
if (p[0] != ZIP_END) {
- entry = zipEntry(p);
- while (entry.prevrawlen > 0 && index--) {
- p -= entry.prevrawlen;
- entry = zipEntry(p);
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ while (prevlen > 0 && index--) {
+ p -= prevlen;
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
}
}
} else {
@@ -722,7 +1010,7 @@ unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {
/* Return pointer to previous entry in ziplist. */
unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {
- zlentry entry;
+ unsigned int prevlensize, prevlen = 0;
/* Iterating backwards from ZIP_END should return the tail. When "p" is
* equal to the first element of the list, we're already at the head,
@@ -733,14 +1021,14 @@ unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p) {
} else if (p == ZIPLIST_ENTRY_HEAD(zl)) {
return NULL;
} else {
- entry = zipEntry(p);
- assert(entry.prevrawlen > 0);
- return p-entry.prevrawlen;
+ ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
+ assert(prevlen > 0);
+ return p-prevlen;
}
}
-/* Get entry pointed to by 'p' and store in either 'e' or 'v' depending
- * on the encoding of the entry. 'e' is always set to NULL to be able
+/* Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending
+ * on the encoding of the entry. '*sstr' is always set to NULL to be able
* to find out whether the string pointer or the integer value was set.
* Return 0 if 'p' points to the end of the ziplist, 1 otherwise. */
unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) {
@@ -748,7 +1036,7 @@ unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *sl
if (p == NULL || p[0] == ZIP_END) return 0;
if (sstr) *sstr = NULL;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
if (sstr) {
*slen = entry.len;
@@ -783,19 +1071,20 @@ unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
}
/* Delete a range of entries from the ziplist. */
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num) {
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num) {
unsigned char *p = ziplistIndex(zl,index);
return (p == NULL) ? zl : __ziplistDelete(zl,p,num);
}
-/* Compare entry pointer to by 'p' with 'entry'. Return 1 if equal. */
+/* Compare entry pointer to by 'p' with 'sstr' of length 'slen'. */
+/* Return 1 if equal. */
unsigned int ziplistCompare(unsigned char *p, unsigned char *sstr, unsigned int slen) {
zlentry entry;
unsigned char sencoding;
long long zval, sval;
if (p[0] == ZIP_END) return 0;
- entry = zipEntry(p);
+ zipEntry(p, &entry);
if (ZIP_IS_STR(entry.encoding)) {
/* Raw compare */
if (entry.len == slen) {
@@ -905,25 +1194,24 @@ void ziplistRepr(unsigned char *zl) {
printf(
"{total bytes %d} "
- "{length %u}\n"
+ "{num entries %u}\n"
"{tail offset %u}\n",
intrev32ifbe(ZIPLIST_BYTES(zl)),
intrev16ifbe(ZIPLIST_LENGTH(zl)),
intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));
p = ZIPLIST_ENTRY_HEAD(zl);
while(*p != ZIP_END) {
- entry = zipEntry(p);
+ zipEntry(p, &entry);
printf(
- "{"
- "addr 0x%08lx, "
- "index %2d, "
- "offset %5ld, "
- "rl: %5u, "
- "hs %2u, "
- "pl: %5u, "
- "pls: %2u, "
- "payload %5u"
- "} ",
+ "{\n"
+ "\taddr 0x%08lx,\n"
+ "\tindex %2d,\n"
+ "\toffset %5ld,\n"
+ "\thdr+entry len: %5u,\n"
+ "\thdr len%2u,\n"
+ "\tprevrawlen: %5u,\n"
+ "\tprevrawlensize: %2u,\n"
+ "\tpayload %5u\n",
(long unsigned)p,
index,
(unsigned long) (p-zl),
@@ -932,8 +1220,14 @@ void ziplistRepr(unsigned char *zl) {
entry.prevrawlen,
entry.prevrawlensize,
entry.len);
+ printf("\tbytes: ");
+ for (unsigned int i = 0; i < entry.headersize+entry.len; i++) {
+ printf("%02x|",p[i]);
+ }
+ printf("\n");
p += entry.headersize;
if (ZIP_IS_STR(entry.encoding)) {
+ printf("\t[str]");
if (entry.len > 40) {
if (fwrite(p,40,1,stdout) == 0) perror("fwrite");
printf("...");
@@ -942,23 +1236,23 @@ void ziplistRepr(unsigned char *zl) {
fwrite(p,entry.len,1,stdout) == 0) perror("fwrite");
}
} else {
- printf("%lld", (long long) zipLoadInteger(p,entry.encoding));
+ printf("\t[int]%lld", (long long) zipLoadInteger(p,entry.encoding));
}
- printf("\n");
+ printf("\n}\n");
p += entry.len;
index++;
}
printf("{end}\n\n");
}
-#ifdef ZIPLIST_TEST_MAIN
+#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"
#include "sds.h"
#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); }
-unsigned char *createList() {
+static unsigned char *createList() {
unsigned char *zl = ziplistNew();
zl = ziplistPush(zl, (unsigned char*)"foo", 3, ZIPLIST_TAIL);
zl = ziplistPush(zl, (unsigned char*)"quux", 4, ZIPLIST_TAIL);
@@ -967,7 +1261,7 @@ unsigned char *createList() {
return zl;
}
-unsigned char *createIntList() {
+static unsigned char *createIntList() {
unsigned char *zl = ziplistNew();
char buf[32];
@@ -986,13 +1280,13 @@ unsigned char *createIntList() {
return zl;
}
-long long usec(void) {
+static long long usec(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
-void stress(int pos, int num, int maxsize, int dnum) {
+static void stress(int pos, int num, int maxsize, int dnum) {
int i,j,k;
unsigned char *zl;
char posstr[2][5] = { "HEAD", "TAIL" };
@@ -1015,7 +1309,7 @@ void stress(int pos, int num, int maxsize, int dnum) {
}
}
-void pop(unsigned char *zl, int where) {
+static unsigned char *pop(unsigned char *zl, int where) {
unsigned char *p, *vstr;
unsigned int vlen;
long long vlong;
@@ -1027,20 +1321,22 @@ void pop(unsigned char *zl, int where) {
else
printf("Pop tail: ");
- if (vstr)
+ if (vstr) {
if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite");
- else
+ }
+ else {
printf("%lld", vlong);
+ }
printf("\n");
- ziplistDeleteRange(zl,-1,1);
+ return ziplistDelete(zl,&p);
} else {
printf("ERROR: Could not pop\n");
exit(1);
}
}
-int randstring(char *target, unsigned int min, unsigned int max) {
+static int randstring(char *target, unsigned int min, unsigned int max) {
int p = 0;
int len = min+rand()%(max-min+1);
int minval, maxval;
@@ -1066,23 +1362,24 @@ int randstring(char *target, unsigned int min, unsigned int max) {
return len;
}
-void verify(unsigned char *zl, zlentry *e) {
- int i;
+static void verify(unsigned char *zl, zlentry *e) {
int len = ziplistLen(zl);
zlentry _e;
- for (i = 0; i < len; i++) {
+ ZIPLIST_ENTRY_ZERO(&_e);
+
+ for (int i = 0; i < len; i++) {
memset(&e[i], 0, sizeof(zlentry));
- e[i] = zipEntry(ziplistIndex(zl, i));
+ zipEntry(ziplistIndex(zl, i), &e[i]);
memset(&_e, 0, sizeof(zlentry));
- _e = zipEntry(ziplistIndex(zl, -len+i));
+ zipEntry(ziplistIndex(zl, -len+i), &_e);
assert(memcmp(&e[i], &_e, sizeof(zlentry)) == 0);
}
}
-int main(int argc, char **argv) {
+int ziplistTest(int argc, char **argv) {
unsigned char *zl, *p;
unsigned char *entry;
unsigned int elen;
@@ -1095,21 +1392,25 @@ int main(int argc, char **argv) {
zl = createIntList();
ziplistRepr(zl);
+ zfree(zl);
+
zl = createList();
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_HEAD);
+ zl = pop(zl,ZIPLIST_HEAD);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
- pop(zl,ZIPLIST_TAIL);
+ zl = pop(zl,ZIPLIST_TAIL);
ziplistRepr(zl);
+ zfree(zl);
+
printf("Get element at index 3:\n");
{
zl = createList();
@@ -1125,6 +1426,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index 4 (out of range):\n");
@@ -1138,6 +1440,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -1 (last element):\n");
@@ -1155,6 +1458,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -4 (first element):\n");
@@ -1172,6 +1476,7 @@ int main(int argc, char **argv) {
printf("%lld\n", value);
}
printf("\n");
+ zfree(zl);
}
printf("Get element at index -5 (reverse out of range):\n");
@@ -1185,6 +1490,7 @@ int main(int argc, char **argv) {
return 1;
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 0 to end:\n");
@@ -1202,6 +1508,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 1 to end:\n");
@@ -1219,6 +1526,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate list from 2 to end:\n");
@@ -1236,6 +1544,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate starting out of range:\n");
@@ -1248,6 +1557,7 @@ int main(int argc, char **argv) {
printf("ERROR\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front:\n");
@@ -1265,6 +1575,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Iterate from back to front, deleting all items:\n");
@@ -1283,6 +1594,7 @@ int main(int argc, char **argv) {
printf("\n");
}
printf("\n");
+ zfree(zl);
}
printf("Delete inclusive range 0,0:\n");
@@ -1290,6 +1602,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 0,1:\n");
@@ -1297,6 +1610,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 0, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete inclusive range 1,2:\n");
@@ -1304,6 +1618,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 2);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with start index out of range:\n");
@@ -1311,6 +1626,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 5, 1);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete with num overflow:\n");
@@ -1318,6 +1634,7 @@ int main(int argc, char **argv) {
zl = createList();
zl = ziplistDeleteRange(zl, 1, 5);
ziplistRepr(zl);
+ zfree(zl);
}
printf("Delete foo while iterating:\n");
@@ -1342,11 +1659,12 @@ int main(int argc, char **argv) {
}
printf("\n");
ziplistRepr(zl);
+ zfree(zl);
}
printf("Regression test for >255 byte strings:\n");
{
- char v1[257],v2[257];
+ char v1[257] = {0}, v2[257] = {0};
memset(v1,'x',256);
memset(v2,'y',256);
zl = ziplistNew();
@@ -1361,13 +1679,15 @@ int main(int argc, char **argv) {
assert(ziplistGet(p,&entry,&elen,&value));
assert(strncmp(v2,(char*)entry,elen) == 0);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Regression test deleting next to last entries:\n");
{
- char v[3][257];
- zlentry e[3];
- int i;
+ char v[3][257] = {{0}};
+ zlentry e[3] = {{.prevrawlensize = 0, .prevrawlen = 0, .lensize = 0,
+ .len = 0, .headersize = 0, .encoding = 0, .p = NULL}};
+ size_t i;
for (i = 0; i < (sizeof(v)/sizeof(v[0])); i++) {
memset(v[i], 'a' + i, sizeof(v[0]));
@@ -1398,6 +1718,7 @@ int main(int argc, char **argv) {
assert(e[1].prevrawlensize == 5);
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Create long list and check indices:\n");
@@ -1419,6 +1740,7 @@ int main(int argc, char **argv) {
assert(999-i == value);
}
printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Compare strings with ziplist entries:\n");
@@ -1444,6 +1766,82 @@ int main(int argc, char **argv) {
return 1;
}
printf("SUCCESS\n\n");
+ zfree(zl);
+ }
+
+ printf("Merge test:\n");
+ {
+ /* create list gives us: [hello, foo, quux, 1024] */
+ zl = createList();
+ unsigned char *zl2 = createList();
+
+ unsigned char *zl3 = ziplistNew();
+ unsigned char *zl4 = ziplistNew();
+
+ if (ziplistMerge(&zl4, &zl4)) {
+ printf("ERROR: Allowed merging of one ziplist into itself.\n");
+ return 1;
+ }
+
+ /* Merge two empty ziplists, get empty result back. */
+ zl4 = ziplistMerge(&zl3, &zl4);
+ ziplistRepr(zl4);
+ if (ziplistLen(zl4)) {
+ printf("ERROR: Merging two empty ziplists created entries.\n");
+ return 1;
+ }
+ zfree(zl4);
+
+ zl2 = ziplistMerge(&zl, &zl2);
+ /* merge gives us: [hello, foo, quux, 1024, hello, foo, quux, 1024] */
+ ziplistRepr(zl2);
+
+ if (ziplistLen(zl2) != 8) {
+ printf("ERROR: Merged length not 8, but: %u\n", ziplistLen(zl2));
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,0);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,3);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,4);
+ if (!ziplistCompare(p,(unsigned char*)"hello",5)) {
+ printf("ERROR: not \"hello\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"hella",5)) {
+ printf("ERROR: \"hella\"\n");
+ return 1;
+ }
+
+ p = ziplistIndex(zl2,7);
+ if (!ziplistCompare(p,(unsigned char*)"1024",4)) {
+ printf("ERROR: not \"1024\"\n");
+ return 1;
+ }
+ if (ziplistCompare(p,(unsigned char*)"1025",4)) {
+ printf("ERROR: \"1025\"\n");
+ return 1;
+ }
+ printf("SUCCESS\n\n");
+ zfree(zl);
}
printf("Stress with random payloads of different encoding:\n");
@@ -1463,7 +1861,7 @@ int main(int argc, char **argv) {
for (i = 0; i < 20000; i++) {
zl = ziplistNew();
ref = listCreate();
- listSetFreeMethod(ref,sdsfree);
+ listSetFreeMethod(ref,(void (*)(void*))sdsfree);
len = rand() % 256;
/* Create lists */
@@ -1531,5 +1929,4 @@ int main(int argc, char **argv) {
return 0;
}
-
#endif
diff --git a/src/ziplist.h b/src/ziplist.h
index b29c34167..964a47f6d 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -28,10 +28,14 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _ZIPLIST_H
+#define _ZIPLIST_H
+
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
unsigned char *ziplistNew(void);
+unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
unsigned char *ziplistIndex(unsigned char *zl, int index);
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
@@ -39,8 +43,15 @@ unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
-unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num);
+unsigned char *ziplistDeleteRange(unsigned char *zl, int index, unsigned int num);
unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
unsigned int ziplistLen(unsigned char *zl);
size_t ziplistBlobLen(unsigned char *zl);
+void ziplistRepr(unsigned char *zl);
+
+#ifdef REDIS_TEST
+int ziplistTest(int argc, char *argv[]);
+#endif
+
+#endif /* _ZIPLIST_H */
diff --git a/src/zipmap.c b/src/zipmap.c
index 803fedeec..22bfa1a46 100644
--- a/src/zipmap.c
+++ b/src/zipmap.c
@@ -51,10 +51,9 @@
* <len> is the length of the following string (key or value).
* <len> lengths are encoded in a single value or in a 5 bytes value.
* If the first byte value (as an unsigned 8 bit value) is between 0 and
- * 252, it's a single-byte length. If it is 253 then a four bytes unsigned
+ * 253, it's a single-byte length. If it is 254 then a four bytes unsigned
* integer follows (in the host byte ordering). A value of 255 is used to
- * signal the end of the hash. The special value 254 is used to mark
- * empty space that can be used to add new key/value pairs.
+ * signal the end of the hash.
*
* <free> is the number of free unused bytes after the string, resulting
* from modification of values associated to a key. For instance if "foo"
@@ -371,8 +370,8 @@ size_t zipmapBlobLen(unsigned char *zm) {
return totlen;
}
-#ifdef ZIPMAP_TEST_MAIN
-void zipmapRepr(unsigned char *p) {
+#ifdef REDIS_TEST
+static void zipmapRepr(unsigned char *p) {
unsigned int l;
printf("{status %u}",*p++);
@@ -405,9 +404,13 @@ void zipmapRepr(unsigned char *p) {
printf("\n");
}
-int main(void) {
+#define UNUSED(x) (void)(x)
+int zipmapTest(int argc, char *argv[]) {
unsigned char *zm;
+ UNUSED(argc);
+ UNUSED(argv);
+
zm = zipmapNew();
zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL);
diff --git a/src/zipmap.h b/src/zipmap.h
index 9cf1b2484..ac588f05a 100644
--- a/src/zipmap.h
+++ b/src/zipmap.h
@@ -46,4 +46,8 @@ unsigned int zipmapLen(unsigned char *zm);
size_t zipmapBlobLen(unsigned char *zm);
void zipmapRepr(unsigned char *p);
+#ifdef REDIS_TEST
+int zipmapTest(int argc, char *argv[]);
+#endif
+
#endif
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 11616e5ad..094dd80fa 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -43,6 +43,7 @@ void zlibc_free(void *ptr) {
#include <pthread.h>
#include "config.h"
#include "zmalloc.h"
+#include "atomicvar.h"
#ifdef HAVE_MALLOC_SIZE
#define PREFIX_SIZE (0)
@@ -65,51 +66,23 @@ void zlibc_free(void *ptr) {
#define calloc(count,size) je_calloc(count,size)
#define realloc(ptr,size) je_realloc(ptr,size)
#define free(ptr) je_free(ptr)
-#endif
-
-#if defined(__ATOMIC_RELAXED)
-#define update_zmalloc_stat_add(__n) __atomic_add_fetch(&used_memory, (__n), __ATOMIC_RELAXED)
-#define update_zmalloc_stat_sub(__n) __atomic_sub_fetch(&used_memory, (__n), __ATOMIC_RELAXED)
-#elif defined(HAVE_ATOMIC)
-#define update_zmalloc_stat_add(__n) __sync_add_and_fetch(&used_memory, (__n))
-#define update_zmalloc_stat_sub(__n) __sync_sub_and_fetch(&used_memory, (__n))
-#else
-#define update_zmalloc_stat_add(__n) do { \
- pthread_mutex_lock(&used_memory_mutex); \
- used_memory += (__n); \
- pthread_mutex_unlock(&used_memory_mutex); \
-} while(0)
-
-#define update_zmalloc_stat_sub(__n) do { \
- pthread_mutex_lock(&used_memory_mutex); \
- used_memory -= (__n); \
- pthread_mutex_unlock(&used_memory_mutex); \
-} while(0)
-
+#define mallocx(size,flags) je_mallocx(size,flags)
+#define dallocx(ptr,flags) je_dallocx(ptr,flags)
#endif
#define update_zmalloc_stat_alloc(__n) do { \
size_t _n = (__n); \
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
- if (zmalloc_thread_safe) { \
- update_zmalloc_stat_add(_n); \
- } else { \
- used_memory += _n; \
- } \
+ atomicIncr(used_memory,__n); \
} while(0)
#define update_zmalloc_stat_free(__n) do { \
size_t _n = (__n); \
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
- if (zmalloc_thread_safe) { \
- update_zmalloc_stat_sub(_n); \
- } else { \
- used_memory -= _n; \
- } \
+ atomicDecr(used_memory,__n); \
} while(0)
static size_t used_memory = 0;
-static int zmalloc_thread_safe = 0;
pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER;
static void zmalloc_default_oom(size_t size) {
@@ -135,6 +108,24 @@ void *zmalloc(size_t size) {
#endif
}
+/* Allocation and free functions that bypass the thread cache
+ * and go straight to the allocator arena bins.
+ * Currently implemented only for jemalloc. Used for online defragmentation. */
+#ifdef HAVE_DEFRAG
+void *zmalloc_no_tcache(size_t size) {
+ void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);
+ if (!ptr) zmalloc_oom_handler(size);
+ update_zmalloc_stat_alloc(zmalloc_size(ptr));
+ return ptr;
+}
+
+void zfree_no_tcache(void *ptr) {
+ if (ptr == NULL) return;
+ update_zmalloc_stat_free(zmalloc_size(ptr));
+ dallocx(ptr, MALLOCX_TCACHE_NONE);
+}
+#endif
+
void *zcalloc(size_t size) {
void *ptr = calloc(1, size+PREFIX_SIZE);
@@ -220,27 +211,10 @@ char *zstrdup(const char *s) {
size_t zmalloc_used_memory(void) {
size_t um;
-
- if (zmalloc_thread_safe) {
-#if defined(__ATOMIC_RELAXED) || defined(HAVE_ATOMIC)
- um = update_zmalloc_stat_add(0);
-#else
- pthread_mutex_lock(&used_memory_mutex);
- um = used_memory;
- pthread_mutex_unlock(&used_memory_mutex);
-#endif
- }
- else {
- um = used_memory;
- }
-
+ atomicGet(used_memory,um);
return um;
}
-void zmalloc_enable_thread_safeness(void) {
- zmalloc_thread_safe = 1;
-}
-
void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
zmalloc_oom_handler = oom_handler;
}
@@ -328,27 +302,110 @@ float zmalloc_get_fragmentation_ratio(size_t rss) {
return (float)rss/zmalloc_used_memory();
}
+/* Get the sum of the specified field (converted form kb to bytes) in
+ * /proc/self/smaps. The field must be specified with trailing ":" as it
+ * apperas in the smaps output.
+ *
+ * If a pid is specified, the information is extracted for such a pid,
+ * otherwise if pid is -1 the information is reported is about the
+ * current process.
+ *
+ * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1);
+ */
#if defined(HAVE_PROC_SMAPS)
-size_t zmalloc_get_private_dirty(void) {
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
char line[1024];
- size_t pd = 0;
- FILE *fp = fopen("/proc/self/smaps","r");
+ size_t bytes = 0;
+ int flen = strlen(field);
+ FILE *fp;
+
+ if (pid == -1) {
+ fp = fopen("/proc/self/smaps","r");
+ } else {
+ char filename[128];
+ snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid);
+ fp = fopen(filename,"r");
+ }
if (!fp) return 0;
while(fgets(line,sizeof(line),fp) != NULL) {
- if (strncmp(line,"Private_Dirty:",14) == 0) {
+ if (strncmp(line,field,flen) == 0) {
char *p = strchr(line,'k');
if (p) {
*p = '\0';
- pd += strtol(line+14,NULL,10) * 1024;
+ bytes += strtol(line+flen,NULL,10) * 1024;
}
}
}
fclose(fp);
- return pd;
+ return bytes;
}
#else
-size_t zmalloc_get_private_dirty(void) {
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
+ ((void) field);
+ ((void) pid);
return 0;
}
#endif
+
+size_t zmalloc_get_private_dirty(long pid) {
+ return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
+}
+
+/* Returns the size of physical memory (RAM) in bytes.
+ * It looks ugly, but this is the cleanest way to achive cross platform results.
+ * Cleaned up from:
+ *
+ * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
+ *
+ * Note that this function:
+ * 1) Was released under the following CC attribution license:
+ * http://creativecommons.org/licenses/by/3.0/deed.en_US.
+ * 2) Was originally implemented by David Robert Nadeau.
+ * 3) Was modified for Redis by Matt Stancliff.
+ * 4) This note exists in order to comply with the original license.
+ */
+size_t zmalloc_get_memory_size(void) {
+#if defined(__unix__) || defined(__unix) || defined(unix) || \
+ (defined(__APPLE__) && defined(__MACH__))
+#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
+#elif defined(HW_PHYSMEM64)
+ mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
+#endif
+ int64_t size = 0; /* 64-bit */
+ size_t len = sizeof(size);
+ if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+ /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
+ return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);
+
+#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
+ /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
+ int mib[2];
+ mib[0] = CTL_HW;
+#if defined(HW_REALMEM)
+ mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
+#elif defined(HW_PYSMEM)
+ mib[1] = HW_PHYSMEM; /* Others. ------------------ */
+#endif
+ unsigned int size = 0; /* 32-bit */
+ size_t len = sizeof(size);
+ if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
+ return (size_t)size;
+ return 0L; /* Failed? */
+#else
+ return 0L; /* Unknown method to get the data. */
+#endif
+#else
+ return 0L; /* Unknown OS. */
+#endif
+}
+
+
diff --git a/src/zmalloc.h b/src/zmalloc.h
index 72a4f8138..64f2f36aa 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -65,19 +65,32 @@
#define ZMALLOC_LIB "libc"
#endif
+/* We can enable the Redis defrag capabilities only if we are using Jemalloc
+ * and the version used is our special version modified for Redis having
+ * the ability to return per-allocation fragmentation hints. */
+#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)
+#define HAVE_DEFRAG
+#endif
+
void *zmalloc(size_t size);
void *zcalloc(size_t size);
void *zrealloc(void *ptr, size_t size);
void zfree(void *ptr);
char *zstrdup(const char *s);
size_t zmalloc_used_memory(void);
-void zmalloc_enable_thread_safeness(void);
void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
float zmalloc_get_fragmentation_ratio(size_t rss);
size_t zmalloc_get_rss(void);
-size_t zmalloc_get_private_dirty(void);
+size_t zmalloc_get_private_dirty(long pid);
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid);
+size_t zmalloc_get_memory_size(void);
void zlibc_free(void *ptr);
+#ifdef HAVE_DEFRAG
+void zfree_no_tcache(void *ptr);
+void *zmalloc_no_tcache(size_t size);
+#endif
+
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);
#endif
diff --git a/tests/assets/default.conf b/tests/assets/default.conf
index 81f8470bc..d7b8a75c6 100644
--- a/tests/assets/default.conf
+++ b/tests/assets/default.conf
@@ -1,5 +1,6 @@
# Redis configuration for testing.
+always-show-logo yes
notify-keyspace-events KEA
daemonize no
pidfile /var/run/redis.pid
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index 55f979f2f..0647914dc 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -1,7 +1,7 @@
# Cluster-specific test functions.
#
# Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This softare is released under the BSD License. See the COPYING file for
+# This software is released under the BSD License. See the COPYING file for
# more information.
# Returns a parsed CLUSTER NODES output as a list of dictionaries.
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 7af442ecc..93603ddc9 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -1,5 +1,5 @@
# Cluster test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This softare is released under the BSD License. See the COPYING file for
+# This software is released under the BSD License. See the COPYING file for
# more information.
cd tests/cluster
@@ -17,10 +17,12 @@ proc main {} {
}
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
puts $::errorInfo
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
diff --git a/tests/cluster/tests/03-failover-loop.tcl b/tests/cluster/tests/03-failover-loop.tcl
index 3a966732a..8e1bcd6fe 100644
--- a/tests/cluster/tests/03-failover-loop.tcl
+++ b/tests/cluster/tests/03-failover-loop.tcl
@@ -89,7 +89,7 @@ while {[incr iterations -1]} {
test "Restarting node #$tokill" {
restart_instance redis $tokill
}
-
+
test "Instance #$tokill is now a slave" {
wait_for_condition 1000 50 {
[RI $tokill role] eq {slave}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
index 376061db3..0ccbf717d 100644
--- a/tests/cluster/tests/04-resharding.tcl
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -13,6 +13,24 @@ test "Cluster is up" {
assert_cluster_state ok
}
+test "Enable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly yes
+ # We use "appendfsync no" because it's fast but also guarantees that
+ # write(2) is performed before replying to client.
+ R $id config set appendfsync no
+ }
+
+ foreach_redis_id id {
+ wait_for_condition 1000 500 {
+ [RI $id aof_rewrite_in_progress] == 0 &&
+ [RI $id aof_enabled] == 1
+ } else {
+ fail "Failed to enable AOF on instance #$id"
+ }
+ }
+}
+
# Return nno-zero if the specified PID is about a process still in execution,
# otherwise 0 is returned.
proc process_is_running {pid} {
@@ -41,6 +59,7 @@ array set content {}
set tribpid {}
test "Cluster consistency during live resharding" {
+ set ele 0
for {set j 0} {$j < $numops} {incr j} {
# Trigger the resharding once we execute half the ops.
if {$tribpid ne {} &&
@@ -53,19 +72,31 @@ test "Cluster consistency during live resharding" {
puts -nonewline "...Starting resharding..."
flush stdout
set target [dict get [get_myself [randomInt 5]] id]
- set tribpid [exec \
+ set tribpid [lindex [exec \
../../../src/redis-trib.rb reshard \
--from all \
--to $target \
--slots 100 \
--yes \
- 127.0.0.1:[get_instance_attrib redis 0 port] &]
+ 127.0.0.1:[get_instance_attrib redis 0 port] \
+ | [info nameofexecutable] \
+ ../tests/helpers/onlydots.tcl \
+ &] 0]
}
# Write random data to random list.
- set key "key:[randomInt $numkeys]"
- set ele [randomValue]
- $cluster rpush $key $ele
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
+ incr ele
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
lappend content($key) $ele
if {($j % 1000) == 0} {
@@ -85,6 +116,57 @@ test "Cluster consistency during live resharding" {
test "Verify $numkeys keys for consistency with logical content" {
# Check that the Redis Cluster content matches our logical content.
foreach {key value} [array get content] {
- assert {[$cluster lrange $key 0 -1] eq $value}
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Crash and restart all the instances" {
+ foreach_redis_id id {
+ kill_instance redis $id
+ restart_instance redis $id
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Verify $numkeys keys after the crash & restart" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Disable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly no
+ }
+}
+
+test "Verify slaves consistency" {
+ set verified_masters 0
+ foreach_redis_id id {
+ set role [R $id role]
+ lassign $role myrole myoffset slaves
+ if {$myrole eq {slave}} continue
+ set masterport [get_instance_attrib redis $id port]
+ set masterdigest [R $id debug digest]
+ foreach_redis_id sid {
+ set srole [R $sid role]
+ if {[lindex $srole 0] eq {master}} continue
+ if {[lindex $srole 2] != $masterport} continue
+ wait_for_condition 1000 500 {
+ [R $sid debug digest] eq $masterdigest
+ } else {
+ fail "Master and slave data digest are different"
+ }
+ incr verified_masters
+ }
}
+ assert {$verified_masters >= 5}
}
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
index 4167d64be..6efedce5d 100644
--- a/tests/cluster/tests/05-slave-selection.tcl
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -83,7 +83,7 @@ test "Cluster should eventually be up again" {
assert_cluster_state ok
}
-test "Node #10 should eventaully replicate node #5" {
+test "Node #10 should eventually replicate node #5" {
set port5 [get_instance_attrib redis 5 port]
wait_for_condition 1000 50 {
([lindex [R 10 role] 2] == $port5) &&
diff --git a/tests/cluster/tests/07-replica-migration.tcl b/tests/cluster/tests/07-replica-migration.tcl
index 2ec0742b5..68231cd28 100644
--- a/tests/cluster/tests/07-replica-migration.tcl
+++ b/tests/cluster/tests/07-replica-migration.tcl
@@ -45,3 +45,59 @@ foreach_redis_id id {
}
}
}
+
+# Now test the migration to a master which used to be a slave, after
+# a failver.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+ create_cluster 5 10
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Kill slave #7 of master #2. Only slave left is #12 now" {
+ kill_instance redis 7
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing master node #2, #12 should failover" {
+ kill_instance redis 2
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance 12 is now a master without slaves" {
+ assert {[RI 12 role] eq {master}}
+}
+
+# The remaining instance is now without slaves. Some other slave
+# should migrate to it.
+
+test "Master #12 should get at least one migrated replica" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 12 role] 2]] >= 1
+ } else {
+ fail "Master #12 has no replicas"
+ }
+}
diff --git a/tests/cluster/tests/08-update-msg.tcl b/tests/cluster/tests/08-update-msg.tcl
new file mode 100644
index 000000000..6f9661db0
--- /dev/null
+++ b/tests/cluster/tests/08-update-msg.tcl
@@ -0,0 +1,90 @@
+# Test UPDATE messages sent by other nodes when the currently authorirative
+# master is unavaialble. The test is performed in the following steps:
+#
+# 1) Master goes down.
+# 2) Slave failover and becomes new master.
+# 3) New master is partitoned away.
+# 4) Old master returns.
+# 5) At this point we expect the old master to turn into a slave ASAP because
+# of the UPDATE messages it will receive from the other nodes when its
+# configuration will be found to be outdated.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+ kill_instance redis 0
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Killing the new master #5" {
+ kill_instance redis 5
+}
+
+test "Cluster should be down now" {
+ assert_cluster_state fail
+}
+
+test "Restarting the old master node" {
+ restart_instance redis 0
+}
+
+test "Instance #0 gets converted into a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave}
+ } else {
+ fail "Old master was not converted into slave"
+ }
+}
+
+test "Restarting the new master node" {
+ restart_instance redis 5
+}
+
+test "Cluster is up again" {
+ assert_cluster_state ok
+}
diff --git a/tests/cluster/tests/09-pubsub.tcl b/tests/cluster/tests/09-pubsub.tcl
new file mode 100644
index 000000000..e62b91c4b
--- /dev/null
+++ b/tests/cluster/tests/09-pubsub.tcl
@@ -0,0 +1,40 @@
+# Test PUBLISH propagation across the cluster.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+proc test_cluster_publish {instance instances} {
+ # Subscribe all the instances but the one we use to send.
+ for {set j 0} {$j < $instances} {incr j} {
+ if {$j != $instance} {
+ R $j deferred 1
+ R $j subscribe testchannel
+ R $j read; # Read the subscribe reply
+ }
+ }
+
+ set data [randomValue]
+ R $instance PUBLISH testchannel $data
+
+ # Read the message back from all the nodes.
+ for {set j 0} {$j < $instances} {incr j} {
+ if {$j != $instance} {
+ set msg [R $j read]
+ assert {$data eq [lindex $msg 2]}
+ R $j unsubscribe testchannel
+ R $j read; # Read the unsubscribe reply
+ R $j deferred 0
+ }
+ }
+}
+
+test "Test publishing to master" {
+ test_cluster_publish 0 10
+}
+
+test "Test publishing to slave" {
+ test_cluster_publish 5 10
+}
diff --git a/tests/cluster/tests/10-manual-failover.tcl b/tests/cluster/tests/10-manual-failover.tcl
new file mode 100644
index 000000000..5441b79f3
--- /dev/null
+++ b/tests/cluster/tests/10-manual-failover.tcl
@@ -0,0 +1,192 @@
+# Check the manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+set numkeys 50000
+set numops 10000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+
+test "Send CLUSTER FAILOVER to #5, during load" {
+ for {set j 0} {$j < $numops} {incr j} {
+ # Write random data to random list.
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
+ set ele [randomValue]
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
+ lappend content($key) $ele
+
+ if {($j % 1000) == 0} {
+ puts -nonewline W; flush stdout
+ }
+
+ if {$j == $numops/2} {R 5 cluster failover}
+ }
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ assert {[$cluster lrange $key 0 -1] eq $value}
+ }
+}
+
+test "Instance #0 gets converted into a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave}
+ } else {
+ fail "Old master was not converted into slave"
+ }
+}
+
+## Check that manual failover does not happen if we can't talk with the master.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover
+}
+
+test "Instance #5 is still a slave after some time (no failover)" {
+ after 5000
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
+
+## Check with "force" failover happens anyway.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover force
+}
+
+test "Instance #5 is a master after some time" {
+ wait_for_condition 1000 50 {
+ [RI 5 role] eq {master}
+ } else {
+ fail "Instance #5 is not a master after some time regardless of FORCE"
+ }
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
new file mode 100644
index 000000000..f567c6962
--- /dev/null
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -0,0 +1,59 @@
+# Manual takeover test
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Killing majority of master nodes" {
+ kill_instance redis 0
+ kill_instance redis 1
+ kill_instance redis 2
+}
+
+test "Cluster should eventually be down" {
+ assert_cluster_state fail
+}
+
+test "Use takeover to bring slaves back" {
+ R 5 cluster failover takeover
+ R 6 cluster failover takeover
+ R 7 cluster failover takeover
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 4
+}
+
+test "Instance #5, #6, #7 are now masters" {
+ assert {[RI 5 role] eq {master}}
+ assert {[RI 6 role] eq {master}}
+ assert {[RI 7 role] eq {master}}
+}
+
+test "Restarting the previously killed master nodes" {
+ restart_instance redis 0
+ restart_instance redis 1
+ restart_instance redis 2
+}
+
+test "Instance #0, #1, #2 gets converted into a slaves" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave} && [RI 1 role] eq {slave} && [RI 2 role] eq {slave}
+ } else {
+ fail "Old masters not converted into slaves"
+ }
+}
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
new file mode 100644
index 000000000..48ecd1d50
--- /dev/null
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -0,0 +1,64 @@
+# Replica migration test #2.
+#
+# Check that the status of master that can be targeted by replica migration
+# is acquired again, after being getting slots again, in a cluster where the
+# other masters have slaves.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+ create_cluster 5 15
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+ foreach_redis_id id {
+ if {$id < 5} {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] >= 2
+ } else {
+ fail "Master #$id does not have 2 slaves as expected"
+ }
+ }
+ }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+ set output [exec \
+ ../../../src/redis-trib.rb rebalance \
+ --weight ${master0_id}=0 \
+ 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout]
+}
+
+test "Master #0 should lose its replicas" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] == 0
+ } else {
+ fail "Master #0 still has replicas"
+ }
+}
+
+test "Resharding back some slot to master #0" {
+ # Wait for the cluster config to propagate before attempting a
+ # new resharding.
+ after 10000
+ set output [exec \
+ ../../../src/redis-trib.rb rebalance \
+ --weight ${master0_id}=.01 \
+ --use-empty-masters \
+ 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout]
+}
+
+test "Master #0 should re-acquire one or more replicas" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] >= 1
+ } else {
+ fail "Master #0 has no has replicas"
+ }
+}
diff --git a/tests/cluster/tests/helpers/onlydots.tcl b/tests/cluster/tests/helpers/onlydots.tcl
new file mode 100644
index 000000000..4a6d1aee0
--- /dev/null
+++ b/tests/cluster/tests/helpers/onlydots.tcl
@@ -0,0 +1,16 @@
+# Read the standard input and only shows dots in the output, filtering out
+# all the other characters. Designed to avoid bufferization so that when
+# we get the output of redis-trib and want to show just the dots, we'll see
+# the dots as soon as redis-trib will output them.
+
+fconfigure stdin -buffering none
+
+while 1 {
+ set c [read stdin 1]
+ if {$c eq {}} {
+ exit 0; # EOF
+ } elseif {$c eq {.}} {
+ puts -nonewline .
+ flush stdout
+ }
+}
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
index 65fc806e1..466ab8f25 100644
--- a/tests/cluster/tests/includes/init-tests.tcl
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -27,10 +27,17 @@ test "Cluster nodes are reachable" {
test "Cluster nodes hard reset" {
foreach_redis_id id {
+ if {$::valgrind} {
+ set node_timeout 10000
+ } else {
+ set node_timeout 3000
+ }
catch {R $id flushall} ; # May fail for readonly slaves.
+ R $id MULTI
R $id cluster reset hard
R $id cluster set-config-epoch [expr {$id+1}]
- R $id config set cluster-node-timeout 3000
+ R $id EXEC
+ R $id config set cluster-node-timeout $node_timeout
R $id config set cluster-slave-validity-factor 10
R $id config rewrite
}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 84ebec1c2..2ba67ac19 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -4,7 +4,7 @@
# instances.
#
# Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This softare is released under the BSD License. See the COPYING file for
+# This software is released under the BSD License. See the COPYING file for
# more information.
package require Tcl 8.5
@@ -16,8 +16,10 @@ source ../support/server.tcl
source ../support/test.tcl
set ::verbose 0
+set ::valgrind 0
set ::pause_on_error 0
set ::simulate_error 0
+set ::failed 0
set ::sentinel_instances {}
set ::redis_instances {}
set ::sentinel_base_port 20000
@@ -32,6 +34,25 @@ if {[catch {cd tmp}]} {
exit 1
}
+# Execute the specified instance of the server specified by 'type', using
+# the provided configuration file. Returns the PID of the process.
+proc exec_instance {type cfgfile} {
+ if {$type eq "redis"} {
+ set prgname redis-server
+ } elseif {$type eq "sentinel"} {
+ set prgname redis-sentinel
+ } else {
+ error "Unknown instance type."
+ }
+
+ if {$::valgrind} {
+ set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
+ } else {
+ set pid [exec ../../../src/${prgname} $cfgfile &]
+ }
+ return $pid
+}
+
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
for {set j 0} {$j < $count} {incr j} {
@@ -58,14 +79,7 @@ proc spawn_instance {type base_port count {conf {}}} {
close $cfg
# Finally exec it and remember the pid for later cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } elseif {$type eq "sentinel"} {
- set prgname redis-sentinel
- } else {
- error "Unknown instance type."
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
lappend ::pids $pid
# Check availability
@@ -85,8 +99,25 @@ proc spawn_instance {type base_port count {conf {}}} {
}
}
+proc log_crashes {} {
+ set start_pattern {*REDIS BUG REPORT START*}
+ set logs [glob */log.txt]
+ foreach log $logs {
+ set fd [open $log]
+ set found 0
+ while {[gets $fd line] >= 0} {
+ if {[string match $start_pattern $line]} {
+ puts "\n*** Crash report found in $log ***"
+ set found 1
+ }
+ if {$found} {puts $line}
+ }
+ }
+}
+
proc cleanup {} {
puts "Cleaning up..."
+ log_crashes
foreach pid $::pids {
catch {exec kill -9 $pid}
}
@@ -96,8 +127,10 @@ proc cleanup {} {
}
proc abort_sentinel_test msg {
+ incr ::failed
puts "WARNING: Aborting the test."
puts ">>>>>>>> $msg"
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
@@ -113,12 +146,15 @@ proc parse_options {} {
set ::pause_on_error 1
} elseif {$opt eq "--fail"} {
set ::simulate_error 1
+ } elseif {$opt eq {--valgrind}} {
+ set ::valgrind 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
puts "--single <pattern> Only runs tests specified by pattern."
puts "--pause-on-error Pause for manual inspection on error."
puts "--fail Simulate a test failure."
+ puts "--valgrind Run with valgrind."
puts "--help Shows this help."
exit 0
} else {
@@ -215,6 +251,7 @@ proc test {descr code} {
flush stdout
if {[catch {set retval [uplevel 1 $code]} error]} {
+ incr ::failed
if {[string match "assertion:*" $error]} {
set msg [string range $error 10 end]
puts [colorstr red $msg]
@@ -230,6 +267,38 @@ proc test {descr code} {
}
}
+# Check memory leaks when running on OSX using the "leaks" utility.
+proc check_leaks instance_types {
+ if {[string match {*Darwin*} [exec uname -a]]} {
+ puts -nonewline "Testing for memory leaks..."; flush stdout
+ foreach type $instance_types {
+ foreach_instance_id [set ::${type}_instances] id {
+ if {[instance_is_killed $type $id]} continue
+ set pid [get_instance_attrib $type $id pid]
+ set output {0 leaks}
+ catch {exec leaks $pid} output
+ if {[string match {*process does not exist*} $output] ||
+ [string match {*cannot examine*} $output]} {
+ # In a few tests we kill the server process.
+ set output "0 leaks"
+ } else {
+ puts -nonewline "$type/$pid "
+ flush stdout
+ }
+ if {![string match {*0 leaks*} $output]} {
+ puts [colorstr red "=== MEMORY LEAK DETECTED ==="]
+ puts "Instance type $type, ID $id:"
+ puts $output
+ puts "==="
+ incr ::failed
+ }
+ }
+ }
+ puts ""
+ }
+}
+
+# Execute all the units inside the 'tests' directory.
proc run_tests {} {
set tests [lsort [glob ../tests/*]]
foreach test $tests {
@@ -239,6 +308,18 @@ proc run_tests {} {
if {[file isdirectory $test]} continue
puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
source $test
+ check_leaks {redis sentinel}
+ }
+}
+
+# Print a message and exists with 0 / 1 according to zero or more failures.
+proc end_tests {} {
+ if {$::failed == 0} {
+ puts "GOOD! No errors."
+ exit 0
+ } else {
+ puts "WARNING $::failed tests faield."
+ exit 1
}
}
@@ -360,15 +441,31 @@ proc get_instance_id_by_port {type port} {
# The instance can be restarted with restart-instance.
proc kill_instance {type id} {
set pid [get_instance_attrib $type $id pid]
+ set port [get_instance_attrib $type $id port]
+
if {$pid == -1} {
error "You tried to kill $type $id twice."
}
+
exec kill -9 $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
# Remove the PID from the list of pids to kill at exit.
set ::pids [lsearch -all -inline -not -exact $::pids $pid]
+
+ # Wait for the port it was using to be available again, so that's not
+ # an issue to start a new server ASAP with the same port.
+ set retry 10
+ while {[incr retry -1]} {
+ set port_is_free [catch {set s [socket 127.0.01 $port]}]
+ if {$port_is_free} break
+ catch {close $s}
+ after 1000
+ }
+ if {$retry == 0} {
+ error "Port $port does not return available after killing instance."
+ }
}
# Return true of the instance of the specified type/id is killed.
@@ -385,12 +482,7 @@ proc restart_instance {type id} {
# Execute the instance with its old setup and append the new pid
# file for cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } else {
- set prgname redis-sentinel
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid
@@ -403,5 +495,17 @@ proc restart_instance {type id} {
set link [redis 127.0.0.1 $port]
$link reconnect 1
set_instance_attrib $type $id link $link
+
+ # Make sure the instance is not loading the dataset when this
+ # function returns.
+ while 1 {
+ catch {[$link ping]} retval
+ if {[string match {*LOADING*} $retval]} {
+ after 100
+ continue
+ } else {
+ break
+ }
+ }
}
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index 9a24a96bd..e397faeeb 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -23,6 +23,84 @@ proc start_server_aof {overrides code} {
}
tags {"aof"} {
+ ## Server can start when aof-load-truncated is set to yes and AOF
+ ## is truncated, with an incomplete MULTI block.
+ create_aof {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof [formatCommand multi]
+ append_to_aof [formatCommand set bar world]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Unfinished MULTI: Server should start if load-truncated is yes" {
+ assert_equal 1 [is_alive $srv]
+ }
+ }
+
+ ## Should also start with truncated AOF without incomplete MULTI block.
+ create_aof {
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [formatCommand incr foo]
+ append_to_aof [string range [formatCommand incr foo] 0 end-1]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Short read: Server should start if load-truncated is yes" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ set client [redis [dict get $srv host] [dict get $srv port]]
+
+ test "Truncated AOF loaded: we expect foo to be equal to 5" {
+ assert {[$client get foo] eq "5"}
+ }
+
+ test "Append a new command after loading an incomplete AOF" {
+ $client incr foo
+ }
+ }
+
+ # Now the AOF file is expected to be correct
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Short read + command: Server should start" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ set client [redis [dict get $srv host] [dict get $srv port]]
+
+ test "Truncated AOF loaded: we expect foo to be equal to 6 now" {
+ assert {[$client get foo] eq "6"}
+ }
+ }
+
+ ## Test that the server exits when the AOF contains a format error
+ create_aof {
+ append_to_aof [formatCommand set foo hello]
+ append_to_aof "!!!"
+ append_to_aof [formatCommand set foo hello]
+ }
+
+ start_server_aof [list dir $server_path aof-load-truncated yes] {
+ test "Bad format: Server should have logged an error" {
+ set pattern "*Bad file format reading the append only file*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -1 < [dict get $srv stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected error not found on config file"
+ }
+ }
+ }
+
## Test the server doesn't start when the AOF contains an unfinished MULTI
create_aof {
append_to_aof [formatCommand set foo hello]
@@ -30,12 +108,12 @@ tags {"aof"} {
append_to_aof [formatCommand set bar world]
}
- start_server_aof [list dir $server_path] {
+ start_server_aof [list dir $server_path aof-load-truncated no] {
test "Unfinished MULTI: Server should have logged an error" {
set pattern "*Unexpected end of file reading the append only file*"
set retry 10
while {$retry} {
- set result [exec tail -n1 < [dict get $srv stdout]]
+ set result [exec tail -1 < [dict get $srv stdout]]
if {[string match $pattern $result]} {
break
}
@@ -54,12 +132,12 @@ tags {"aof"} {
append_to_aof [string range [formatCommand set bar world] 0 end-1]
}
- start_server_aof [list dir $server_path] {
+ start_server_aof [list dir $server_path aof-load-truncated no] {
test "Short read: Server should have logged an error" {
- set pattern "*Bad file format reading the append only file*"
+ set pattern "*Unexpected end of file reading the append only file*"
set retry 10
while {$retry} {
- set result [exec tail -n1 < [dict get $srv stdout]]
+ set result [exec tail -1 < [dict get $srv stdout]]
if {[string match $pattern $result]} {
break
}
@@ -86,12 +164,12 @@ tags {"aof"} {
}
## Test that the server can be started using the truncated AOF
- start_server_aof [list dir $server_path] {
+ start_server_aof [list dir $server_path aof-load-truncated no] {
test "Fixed AOF: Server should have been started" {
assert_equal 1 [is_alive $srv]
}
- test "Fixed AOF: Keyspace should contain values that were parsable" {
+ test "Fixed AOF: Keyspace should contain values that were parseable" {
set client [redis [dict get $srv host] [dict get $srv port]]
wait_for_condition 50 100 {
[catch {$client ping} e] == 0
@@ -110,6 +188,30 @@ tags {"aof"} {
append_to_aof [formatCommand spop set]
}
+ start_server_aof [list dir $server_path aof-load-truncated no] {
+ test "AOF+SPOP: Server should have been started" {
+ assert_equal 1 [is_alive $srv]
+ }
+
+ test "AOF+SPOP: Set should have 1 member" {
+ set client [redis [dict get $srv host] [dict get $srv port]]
+ wait_for_condition 50 100 {
+ [catch {$client ping} e] == 0
+ } else {
+ fail "Loading DB is taking too much time."
+ }
+ assert_equal 1 [$client scard set]
+ }
+ }
+
+ ## Uses the alsoPropagate() API.
+ create_aof {
+ append_to_aof [formatCommand sadd set foo]
+ append_to_aof [formatCommand sadd set bar]
+ append_to_aof [formatCommand sadd set gah]
+ append_to_aof [formatCommand spop set 2]
+ }
+
start_server_aof [list dir $server_path] {
test "AOF+SPOP: Server should have been started" {
assert_equal 1 [is_alive $srv]
@@ -133,7 +235,7 @@ tags {"aof"} {
append_to_aof [formatCommand rpush list bar]
}
- start_server_aof [list dir $server_path] {
+ start_server_aof [list dir $server_path aof-load-truncated no] {
test "AOF+EXPIRE: Server should have been started" {
assert_equal 1 [is_alive $srv]
}
diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl
new file mode 100644
index 000000000..c1f4854d4
--- /dev/null
+++ b/tests/integration/logging.tcl
@@ -0,0 +1,24 @@
+set server_path [tmpdir server.log]
+set system_name [string tolower [exec uname -s]]
+
+if {$system_name eq {linux} || $system_name eq {darwin}} {
+ start_server [list overrides [list dir $server_path]] {
+ test "Server is able to generate a stack trace on selected systems" {
+ r config set watchdog-period 200
+ r debug sleep 1
+ set pattern "*debugCommand*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -100 < [srv 0 stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected stack trace not found into log file"
+ }
+ }
+ }
+}
diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl
new file mode 100644
index 000000000..ba610a3b8
--- /dev/null
+++ b/tests/integration/psync2-reg.tcl
@@ -0,0 +1,78 @@
+# Issue 3899 regression test.
+# We create a chain of three instances: master -> slave -> slave2
+# and continuously break the link while traffic is generated by
+# redis-benchmark. At the end we check that the data is the same
+# everywhere.
+
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0 ; # Do not exit at end of the test
+
+ set duration 20 ; # Total test seconds
+
+ for {set j 0} {$j < 3} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ # Setup the replication and backlog parameters
+ test "PSYNC2 #3899 regression: setup" {
+ $R(1) slaveof $R_host(0) $R_port(0)
+ $R(2) slaveof $R_host(0) $R_port(0)
+ $R(0) set foo bar
+ wait_for_condition 50 1000 {
+ [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1
+ } else {
+ fail "Slaves not replicating from master"
+ }
+ $R(0) config set repl-backlog-size 10mb
+ $R(1) config set repl-backlog-size 10mb
+ }
+
+ set cycle_start_time [clock milliseconds]
+ set bench_pid [exec src/redis-benchmark -p $R_port(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &]
+ while 1 {
+ set elapsed [expr {[clock milliseconds]-$cycle_start_time}]
+ if {$elapsed > $duration*1000} break
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill first slave" {
+ $R(1) client kill type master
+ }
+ }
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill chained slave" {
+ $R(2) client kill type master
+ }
+ }
+ after 100
+ }
+ exec kill -9 $bench_pid
+
+ if {$debug_msg} {
+ for {set j 0} {$j < 100} {incr j} {
+ if {
+ [$R(0) debug digest] == [$R(1) debug digest] &&
+ [$R(1) debug digest] == [$R(2) debug digest]
+ } break
+ puts [$R(0) debug digest]
+ puts [$R(1) debug digest]
+ puts [$R(2) debug digest]
+ after 1000
+ }
+ }
+
+ test "PSYNC2 #3899 regression: verify consistency" {
+ wait_for_condition 50 1000 {
+ ([$R(0) debug digest] eq [$R(1) debug digest]) &&
+ ([$R(1) debug digest] eq [$R(2) debug digest])
+ } else {
+ fail "The three instances have different data sets"
+ }
+ }
+}}}
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
new file mode 100644
index 000000000..d91969e3e
--- /dev/null
+++ b/tests/integration/psync2.tcl
@@ -0,0 +1,182 @@
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
+start_server {} {
+start_server {} {
+ set master_id 0 ; # Current master
+ set start_time [clock seconds] ; # Test start time
+ set counter_value 0 ; # Current value of the Redis counter "x"
+
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0; ; # Do not exit at end of the test
+
+ set duration 20 ; # Total test seconds
+
+ set genload 1 ; # Load master with writes at every cycle
+
+ set genload_time 5000 ; # Writes duration time in ms
+
+ set disconnect 1 ; # Break replication link between random
+ # master and slave instances while the
+ # master is loaded with writes.
+
+ set disconnect_period 1000 ; # Disconnect repl link every N ms.
+
+ for {set j 0} {$j < 5} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ set cycle 1
+ while {([clock seconds]-$start_time) < $duration} {
+ test "PSYNC2: --- CYCLE $cycle ---" {
+ incr cycle
+ }
+
+ # Create a random replication layout.
+ # Start with switching master (this simulates a failover).
+
+ # 1) Select the new master.
+ set master_id [randomInt 5]
+ set used [list $master_id]
+ test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
+ $R($master_id) slaveof no one
+ if {$counter_value == 0} {
+ $R($master_id) set x $counter_value
+ }
+ }
+
+ # 2) Attach all the slaves to a random instance
+ while {[llength $used] != 5} {
+ while 1 {
+ set slave_id [randomInt 5]
+ if {[lsearch -exact $used $slave_id] == -1} break
+ }
+ set rand [randomInt [llength $used]]
+ set mid [lindex $used $rand]
+ set master_host $R_host($mid)
+ set master_port $R_port($mid)
+
+ test "PSYNC2: Set #$slave_id to replicate from #$mid" {
+ $R($slave_id) slaveof $master_host $master_port
+ }
+ lappend used $slave_id
+ }
+
+ # 3) Increment the counter and wait for all the instances
+ # to converge.
+ test "PSYNC2: cluster is consistent after failover" {
+ $R($master_id) incr x; incr counter_value
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # 4) Generate load while breaking the connection of random
+ # slave-master pairs.
+ test "PSYNC2: generate load while killing replication links" {
+ set t [clock milliseconds]
+ set next_break [expr {$t+$disconnect_period}]
+ while {[clock milliseconds]-$t < $genload_time} {
+ if {$genload} {
+ $R($master_id) incr x; incr counter_value
+ }
+ if {[clock milliseconds] == $next_break} {
+ set next_break \
+ [expr {[clock milliseconds]+$disconnect_period}]
+ set slave_id [randomInt 5]
+ if {$disconnect} {
+ $R($slave_id) client kill type master
+ if {$debug_msg} {
+ puts "+++ Breaking link for slave #$slave_id"
+ }
+ }
+ }
+ }
+ }
+
+ # 5) Increment the counter and wait for all the instances
+ set x [$R($master_id) get x]
+ test "PSYNC2: cluster is consistent after load (x = $x)" {
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # Put down the old master so that it cannot generate more
+ # replication stream, this way in the next master switch, the time at
+ # which we move slaves away is not important, each will have full
+ # history (otherwise PINGs will make certain slaves have more history),
+ # and sometimes a full resync will be needed.
+ $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail.
+
+ if {$debug_msg} {
+ for {set j 0} {$j < 5} {incr j} {
+ puts "$j: sync_full: [status $R($j) sync_full]"
+ puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
+ puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
+ puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
+ puts "---"
+ }
+ }
+
+ test "PSYNC2: total sum of full synchronizations is exactly 4" {
+ set sum 0
+ for {set j 0} {$j < 5} {incr j} {
+ incr sum [status $R($j) sync_full]
+ }
+ assert {$sum == 4}
+ }
+ }
+
+ test "PSYNC2: Bring the master back again for next test" {
+ $R($master_id) slaveof no one
+ set master_host $R_host($master_id)
+ set master_port $R_port($master_id)
+ for {set j 0} {$j < 5} {incr j} {
+ if {$j == $master_id} continue
+ $R($j) slaveof $master_host $master_port
+ }
+
+ # Wait for slaves to sync
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4
+ } else {
+ fail "Slave not reconnecting"
+ }
+ }
+
+ test "PSYNC2: Partial resync after restart using RDB aux fields" {
+ # Pick a random slave
+ set slave_id [expr {($master_id+1)%5}]
+ set sync_count [status $R($master_id) sync_full]
+ catch {
+ $R($slave_id) config rewrite
+ $R($slave_id) debug restart
+ }
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4
+ } else {
+ fail "Slave not reconnecting"
+ }
+ set new_sync_count [status $R($master_id) sync_full]
+ assert {$sync_count == $new_sync_count}
+ }
+
+ if {$no_exit} {
+ while 1 { puts -nonewline .; flush stdout; after 1000}
+ }
+
+}}}}}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index 71876a6ed..66aad4cc7 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -7,19 +7,19 @@ start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rd
test "RDB encoding loading test" {
r select 0
csvdump r
- } {"compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-"hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000",
-"hash_zipped","hash","a","1","b","2","c","3",
-"list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000",
-"list_zipped","list","1","2","3","a","b","c","100000","6000000000",
-"number","string","10"
-"set","set","1","100000","2","3","6000000000","a","b","c",
-"set_zipped_1","set","1","2","3","4",
-"set_zipped_2","set","100000","200000","300000","400000",
-"set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000",
-"string","string","Hello World"
-"zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000",
-"zset_zipped","zset","a","1","b","2","c","3",
+ } {"0","compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+"0","hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000",
+"0","hash_zipped","hash","a","1","b","2","c","3",
+"0","list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000",
+"0","list_zipped","list","1","2","3","a","b","c","100000","6000000000",
+"0","number","string","10"
+"0","set","set","1","100000","2","3","6000000000","a","b","c",
+"0","set_zipped_1","set","1","2","3","4",
+"0","set_zipped_2","set","100000","200000","300000","400000",
+"0","set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000",
+"0","string","string","Hello World"
+"0","zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000",
+"0","zset_zipped","zset","a","1","b","2","c","3",
}
}
@@ -66,7 +66,7 @@ if {!$isroot} {
test {Server should not start if RDB file can't be open} {
wait_for_condition 50 100 {
[string match {*Fatal error loading*} \
- [exec tail -n1 < [dict get $srv stdout]]]
+ [exec tail -1 < [dict get $srv stdout]]]
} else {
fail "Server started even if RDB was unreadable!"
}
@@ -89,8 +89,8 @@ close $fd
start_server_and_kill_it [list "dir" $server_path] {
test {Server should not start if RDB is corrupted} {
wait_for_condition 50 100 {
- [string match {*RDB checksum*} \
- [exec tail -n1 < [dict get $srv stdout]]]
+ [string match {*CRC error*} \
+ [exec tail -10 < [dict get $srv stdout]]]
} else {
fail "Server started even if RDB was corrupted!"
}
diff --git a/tests/integration/replication-3.tcl b/tests/integration/replication-3.tcl
index 0fcbad45b..50dcb9a9a 100644
--- a/tests/integration/replication-3.tcl
+++ b/tests/integration/replication-3.tcl
@@ -30,6 +30,18 @@ start_server {tags {"repl"}} {
}
assert_equal [r debug digest] [r -1 debug digest]
}
+
+ test {Slave is able to evict keys created in writable slaves} {
+ r -1 select 5
+ assert {[r -1 dbsize] == 0}
+ r -1 config set slave-read-only no
+ r -1 set key1 1 ex 5
+ r -1 set key2 2 ex 5
+ r -1 set key3 3 ex 5
+ assert {[r -1 dbsize] == 3}
+ after 6000
+ r -1 dbsize
+ } {0}
}
}
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index 6db9ffe2b..1c559b706 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -132,5 +132,24 @@ start_server {tags {"repl"}} {
}
assert {[$master dbsize] > 0}
}
+
+ test {Replication of SPOP command -- alsoPropagate() API} {
+ $master del myset
+ set size [expr 1+[randomInt 100]]
+ set content {}
+ for {set j 0} {$j < $size} {incr j} {
+ lappend content [randomValue]
+ }
+ $master sadd myset {*}$content
+
+ set count [randomInt 100]
+ set result [$master spop myset $count]
+
+ wait_for_condition 50 100 {
+ [$master debug digest] eq [$slave debug digest]
+ } else {
+ fail "SPOP replication inconsistency"
+ }
+ }
}
}
diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl
index f131dafe3..2b9e13f50 100644
--- a/tests/integration/replication-psync.tcl
+++ b/tests/integration/replication-psync.tcl
@@ -13,7 +13,11 @@ proc stop_bg_complex_data {handle} {
#
# You can specifiy backlog size, ttl, delay before reconnection, test duration
# in seconds, and an additional condition to verify at the end.
-proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
+#
+# If reconnect is > 0, the test actually try to break the connection and
+# reconnect with the master, otherwise just the initial synchronization is
+# checked for consistency.
+proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} {
start_server {tags {"repl"}} {
start_server {} {
@@ -24,6 +28,8 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
$master config set repl-backlog-size $backlog_size
$master config set repl-backlog-ttl $backlog_ttl
+ $master config set repl-diskless-sync $diskless
+ $master config set repl-diskless-sync-delay 1
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
@@ -41,29 +47,31 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
# Check that the background clients are actually writing.
test {Detect write load to master} {
- wait_for_condition 50 100 {
+ wait_for_condition 50 1000 {
[$master dbsize] > 100
} else {
fail "Can't detect write load from background clients."
}
}
- test "Test replication partial resync: $descr" {
+ test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" {
# Now while the clients are writing data, break the maste-slave
# link multiple times.
- for {set j 0} {$j < $duration*10} {incr j} {
- after 100
- # catch {puts "MASTER [$master dbsize] keys, SLAVE [$slave dbsize] keys"}
+ if ($reconnect) {
+ for {set j 0} {$j < $duration*10} {incr j} {
+ after 100
+ # catch {puts "MASTER [$master dbsize] keys, SLAVE [$slave dbsize] keys"}
- if {($j % 20) == 0} {
- catch {
- if {$delay} {
- $slave multi
- $slave client kill $master_host:$master_port
- $slave debug sleep $delay
- $slave exec
- } else {
- $slave client kill $master_host:$master_port
+ if {($j % 20) == 0} {
+ catch {
+ if {$delay} {
+ $slave multi
+ $slave client kill $master_host:$master_port
+ $slave debug sleep $delay
+ $slave exec
+ } else {
+ $slave client kill $master_host:$master_port
+ }
}
}
}
@@ -98,18 +106,23 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
}
}
-test_psync {ok psync} 6 1000000 3600 0 {
- assert {[s -1 sync_partial_ok] > 0}
-}
+foreach diskless {no yes} {
+ test_psync {no reconnection, just sync} 6 1000000 3600 0 {
+ } $diskless 0
-test_psync {no backlog} 6 100 3600 0.5 {
- assert {[s -1 sync_partial_err] > 0}
-}
+ test_psync {ok psync} 6 100000000 3600 0 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $diskless 1
-test_psync {ok after delay} 3 100000000 3600 3 {
- assert {[s -1 sync_partial_ok] > 0}
-}
+ test_psync {no backlog} 6 100 3600 0.5 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $diskless 1
+
+ test_psync {ok after delay} 3 100000000 3600 3 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $diskless 1
-test_psync {backlog expired} 3 100000000 1 3 {
- assert {[s -1 sync_partial_err] > 0}
+ test_psync {backlog expired} 3 100000000 1 3 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $diskless 1
}
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index ae1977dc2..e811cf0ee 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -1,10 +1,70 @@
+proc log_file_matches {log pattern} {
+ set fp [open $log r]
+ set content [read $fp]
+ close $fp
+ string match $pattern $content
+}
+
+start_server {tags {"repl"}} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set slave_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Configure the master in order to hang waiting for the BGSAVE
+ # operation, so that the slave remains in the handshake state.
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 1000
+
+ # Use a short replication timeout on the slave, so that if there
+ # are no bugs the timeout is triggered in a reasonable amount
+ # of time.
+ $slave config set repl-timeout 5
+
+ # Start the replication process...
+ $slave slaveof $master_host $master_port
+
+ test {Slave enters handshake} {
+ wait_for_condition 50 1000 {
+ [string match *handshake* [$slave role]]
+ } else {
+ fail "Slave does not enter handshake state"
+ }
+ }
+
+ # But make the master unable to send
+ # the periodic newlines to refresh the connection. The slave
+ # should detect the timeout.
+ $master debug sleep 10
+
+ test {Slave is able to detect timeout during handshake} {
+ wait_for_condition 50 1000 {
+ [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
+ } else {
+ fail "Slave is not able to detect timeout"
+ }
+ }
+ }
+}
+
start_server {tags {"repl"}} {
+ set A [srv 0 client]
+ set A_host [srv 0 host]
+ set A_port [srv 0 port]
start_server {} {
- test {First server should have role slave after SLAVEOF} {
- r -1 slaveof [srv 0 host] [srv 0 port]
+ set B [srv 0 client]
+ set B_host [srv 0 host]
+ set B_port [srv 0 port]
+
+ test {Set instance A as slave of B} {
+ $A slaveof $B_host $B_port
wait_for_condition 50 100 {
- [s -1 role] eq {slave} &&
- [string match {*master_link_status:up*} [r -1 info replication]]
+ [lindex [$A role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$A info replication]]
} else {
fail "Can't turn the instance into a slave"
}
@@ -15,9 +75,9 @@ start_server {tags {"repl"}} {
$rd brpoplpush a b 5
r lpush a foo
wait_for_condition 50 100 {
- [r debug digest] eq [r -1 debug digest]
+ [$A debug digest] eq [$B debug digest]
} else {
- fail "Master and slave have different digest: [r debug digest] VS [r -1 debug digest]"
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
}
}
@@ -28,24 +88,53 @@ start_server {tags {"repl"}} {
r lpush c 3
$rd brpoplpush c d 5
after 1000
- assert_equal [r debug digest] [r -1 debug digest]
+ assert_equal [$A debug digest] [$B debug digest]
+ }
+
+ test {BLPOP followed by role change, issue #2473} {
+ set rd [redis_deferring_client]
+ $rd blpop foo 0 ; # Block while B is a master
+
+ # Turn B into master of A
+ $A slaveof no one
+ $B slaveof $A_host $A_port
+ wait_for_condition 50 100 {
+ [lindex [$B role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$B info replication]]
+ } else {
+ fail "Can't turn the instance into a slave"
+ }
+
+ # Push elements into the "foo" list of the new slave.
+ # If the client is still attached to the instance, we'll get
+ # a desync between the two instances.
+ $A rpush foo a b c
+ after 100
+
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest] &&
+ [$A lrange foo 0 -1] eq {a b c} &&
+ [$B lrange foo 0 -1] eq {a b c}
+ } else {
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
+ }
}
}
}
start_server {tags {"repl"}} {
r set mykey foo
-
+
start_server {} {
test {Second server should have role master at first} {
s role
} {master}
-
+
test {SLAVEOF should start with link status "down"} {
r slaveof [srv -1 host] [srv -1 port]
s master_link_status
} {down}
-
+
test {The role should immediately be changed to "slave"} {
s role
} {slave}
@@ -54,11 +143,11 @@ start_server {tags {"repl"}} {
test {Sync should have transferred keys from master} {
r get mykey
} {foo}
-
+
test {The link status should be up} {
s master_link_status
} {up}
-
+
test {SET on the master should immediately propagate} {
r -1 set mykey bar
@@ -94,79 +183,86 @@ start_server {tags {"repl"}} {
}
}
-start_server {tags {"repl"}} {
- set master [srv 0 client]
- set master_host [srv 0 host]
- set master_port [srv 0 port]
- set slaves {}
- set load_handle0 [start_write_load $master_host $master_port 3]
- set load_handle1 [start_write_load $master_host $master_port 5]
- set load_handle2 [start_write_load $master_host $master_port 20]
- set load_handle3 [start_write_load $master_host $master_port 8]
- set load_handle4 [start_write_load $master_host $master_port 4]
- start_server {} {
- lappend slaves [srv 0 client]
+foreach dl {no yes} {
+ start_server {tags {"repl"}} {
+ set master [srv 0 client]
+ $master config set repl-diskless-sync $dl
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set slaves {}
+ set load_handle0 [start_write_load $master_host $master_port 3]
+ set load_handle1 [start_write_load $master_host $master_port 5]
+ set load_handle2 [start_write_load $master_host $master_port 20]
+ set load_handle3 [start_write_load $master_host $master_port 8]
+ set load_handle4 [start_write_load $master_host $master_port 4]
start_server {} {
lappend slaves [srv 0 client]
start_server {} {
lappend slaves [srv 0 client]
- test "Connect multiple slaves at the same time (issue #141)" {
- # Send SALVEOF commands to slaves
- [lindex $slaves 0] slaveof $master_host $master_port
- [lindex $slaves 1] slaveof $master_host $master_port
- [lindex $slaves 2] slaveof $master_host $master_port
-
- # Wait for all the three slaves to reach the "online" state
- set retry 500
- while {$retry} {
- set info [r -3 info]
- if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
- break
+ start_server {} {
+ lappend slaves [srv 0 client]
+ test "Connect multiple slaves at the same time (issue #141), diskless=$dl" {
+ # Send SLAVEOF commands to slaves
+ [lindex $slaves 0] slaveof $master_host $master_port
+ [lindex $slaves 1] slaveof $master_host $master_port
+ [lindex $slaves 2] slaveof $master_host $master_port
+
+ # Wait for all the three slaves to reach the "online"
+ # state from the POV of the master.
+ set retry 500
+ while {$retry} {
+ set info [r -3 info]
+ if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
+ break
+ } else {
+ incr retry -1
+ after 100
+ }
+ }
+ if {$retry == 0} {
+ error "assertion:Slaves not correctly synchronized"
+ }
+
+ # Wait that slaves acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 500 100 {
+ [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
+ [lindex [[lindex $slaves 2] role] 3] eq {connected}
} else {
- incr retry -1
- after 100
+ fail "Slaves still not connected after some time"
}
- }
- if {$retry == 0} {
- error "assertion:Slaves not correctly synchronized"
- }
- # Stop the write load
- stop_write_load $load_handle0
- stop_write_load $load_handle1
- stop_write_load $load_handle2
- stop_write_load $load_handle3
- stop_write_load $load_handle4
-
- # Wait that slaves exit the "loading" state
- wait_for_condition 500 100 {
- ![string match {*loading:1*} [[lindex $slaves 0] info]] &&
- ![string match {*loading:1*} [[lindex $slaves 1] info]] &&
- ![string match {*loading:1*} [[lindex $slaves 2] info]]
- } else {
- fail "Slaves still loading data after too much time"
- }
+ # Stop the write load
+ stop_write_load $load_handle0
+ stop_write_load $load_handle1
+ stop_write_load $load_handle2
+ stop_write_load $load_handle3
+ stop_write_load $load_handle4
- # Make sure that slaves and master have same number of keys
- wait_for_condition 500 100 {
- [$master dbsize] == [[lindex $slaves 0] dbsize] &&
- [$master dbsize] == [[lindex $slaves 1] dbsize] &&
- [$master dbsize] == [[lindex $slaves 2] dbsize]
- } else {
- fail "Different number of keys between masted and slave after too long time."
- }
+ # Make sure that slaves and master have same
+ # number of keys
+ wait_for_condition 500 100 {
+ [$master dbsize] == [[lindex $slaves 0] dbsize] &&
+ [$master dbsize] == [[lindex $slaves 1] dbsize] &&
+ [$master dbsize] == [[lindex $slaves 2] dbsize]
+ } else {
+ fail "Different number of keys between masted and slave after too long time."
+ }
- # Check digests
- set digest [$master debug digest]
- set digest0 [[lindex $slaves 0] debug digest]
- set digest1 [[lindex $slaves 1] debug digest]
- set digest2 [[lindex $slaves 2] debug digest]
- assert {$digest ne 0000000000000000000000000000000000000000}
- assert {$digest eq $digest0}
- assert {$digest eq $digest1}
- assert {$digest eq $digest2}
- }
- }
+ # Check digests
+ set digest [$master debug digest]
+ set digest0 [[lindex $slaves 0] debug digest]
+ set digest1 [[lindex $slaves 1] debug digest]
+ set digest2 [[lindex $slaves 2] debug digest]
+ assert {$digest ne 0000000000000000000000000000000000000000}
+ assert {$digest eq $digest0}
+ assert {$digest eq $digest1}
+ assert {$digest eq $digest2}
+ }
+ }
+ }
}
}
}
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index 66198af94..9a2fcfb49 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -1,5 +1,5 @@
# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This softare is released under the BSD License. See the COPYING file for
+# This software is released under the BSD License. See the COPYING file for
# more information.
cd tests/sentinel
@@ -13,6 +13,7 @@ proc main {} {
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl
index 1a60d814b..5214fdce1 100644
--- a/tests/sentinel/tests/05-manual.tcl
+++ b/tests/sentinel/tests/05-manual.tcl
@@ -6,7 +6,8 @@ test "Manual failover works" {
set old_port [RI $master_id tcp_port]
set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
assert {[lindex $addr 1] == $old_port}
- S 0 SENTINEL FAILOVER mymaster
+ catch {S 0 SENTINEL FAILOVER mymaster} reply
+ assert {$reply eq "OK"}
foreach_sentinel_id id {
wait_for_condition 1000 50 {
[lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
diff --git a/tests/sentinel/tests/06-ckquorum.tcl b/tests/sentinel/tests/06-ckquorum.tcl
new file mode 100644
index 000000000..31e5fa2f8
--- /dev/null
+++ b/tests/sentinel/tests/06-ckquorum.tcl
@@ -0,0 +1,34 @@
+# Test for the SENTINEL CKQUORUM command
+
+source "../tests/includes/init-tests.tcl"
+set num_sentinels [llength $::sentinel_instances]
+
+test "CKQUORUM reports OK and the right amount of Sentinels" {
+ foreach_sentinel_id id {
+ assert_match "*OK $num_sentinels usable*" [S $id SENTINEL CKQUORUM mymaster]
+ }
+}
+
+test "CKQUORUM detects quorum cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum [expr {$num_sentinels+1}]
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+}
+
+test "CKQUORUM detects failover authorization cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum 1
+ kill_instance sentinel 1
+ kill_instance sentinel 2
+ kill_instance sentinel 3
+ after 5000
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+ restart_instance sentinel 1
+ restart_instance sentinel 2
+ restart_instance sentinel 3
+}
+
diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl
new file mode 100644
index 000000000..a60656e59
--- /dev/null
+++ b/tests/sentinel/tests/07-down-conditions.tcl
@@ -0,0 +1,68 @@
+# Test conditions where an instance is considered to be down
+
+source "../tests/includes/init-tests.tcl"
+
+proc ensure_master_up {} {
+ wait_for_condition 1000 50 {
+ [dict get [S 4 sentinel master mymaster] flags] eq "master"
+ } else {
+ fail "Master flags are not just 'master'"
+ }
+}
+
+proc ensure_master_down {} {
+ wait_for_condition 1000 50 {
+ [string match *down* \
+ [dict get [S 4 sentinel master mymaster] flags]]
+ } else {
+ fail "Master is not flagged SDOWN"
+ }
+}
+
+test "Crash the majority of Sentinels to prevent failovers for this unit" {
+ for {set id 0} {$id < $quorum} {incr id} {
+ kill_instance sentinel $id
+ }
+}
+
+test "SDOWN is triggered by non-responding but not crashed instance" {
+ lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
+ ensure_master_up
+ exec ../../../src/redis-cli -h $host -p $port debug sleep 10 > /dev/null &
+ ensure_master_down
+ ensure_master_up
+}
+
+test "SDOWN is triggered by crashed instance" {
+ lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
+ ensure_master_up
+ kill_instance redis 0
+ ensure_master_down
+ restart_instance redis 0
+ ensure_master_up
+}
+
+test "SDOWN is triggered by masters advertising as slaves" {
+ ensure_master_up
+ R 0 slaveof 127.0.0.1 34567
+ ensure_master_down
+ R 0 slaveof no one
+ ensure_master_up
+}
+
+test "SDOWN is triggered by misconfigured instance repling with errors" {
+ ensure_master_up
+ set orig_dir [lindex [R 0 config get dir] 1]
+ set orig_save [lindex [R 0 config get save] 1]
+ # Set dir to / and filename to "tmp" to make sure it will fail.
+ R 0 config set dir /
+ R 0 config set dbfilename tmp
+ R 0 config set save "1000000 1000000"
+ R 0 bgsave
+ ensure_master_down
+ R 0 config set save $orig_save
+ R 0 config set dir $orig_dir
+ R 0 config set dbfilename dump.rdb
+ R 0 bgsave
+ ensure_master_up
+}
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index b007e3b05..1576053b4 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -58,7 +58,8 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
set idx 0; # Index of the node that will respond.
set errmsg {}
foreach start_node $::redis_cluster::startup_nodes($id) {
- lassign [split $start_node :] start_host start_port
+ set ip_port [lindex [split $start_node @] 0]
+ lassign [split $ip_port :] start_host start_port
if {[catch {
set r {}
set r [redis $start_host $start_port]
@@ -68,7 +69,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
if {$r ne {}} {catch {$r close}}
incr idx
if {[string length $errmsg] < 200} {
- append errmsg " $start_node: $e"
+ append errmsg " $ip_port: $e"
}
continue ; # Try next.
} else {
@@ -98,6 +99,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
set args [split $line " "]
lassign $args nodeid addr flags slaveof pingsent pongrecv configepoch linkstate
set slots [lrange $args 8 end]
+ set addr [lindex [split $addr @] 0]
if {$addr eq {:0}} {
set addr $start_host:$start_port
}
@@ -226,6 +228,8 @@ proc ::redis_cluster::get_keys_from_command {cmd argv} {
# Special handling for other commands
switch -exact $cmd {
mget {return $argv}
+ eval {return [lrange $argv 2 1+[lindex $argv 1]]}
+ evalsha {return [lrange $argv 2 1+[lindex $argv 1]]}
}
# All the remaining commands are not handled.
diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl
index ad9cbe8ab..cd8ae3a34 100644
--- a/tests/support/redis.tcl
+++ b/tests/support/redis.tcl
@@ -18,7 +18,7 @@
# $r ping [list handlePong]
# }
# }
-#
+#
# set r [redis]
# $r blocking 0
# $r get fo [list handlePong]
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 9f92ce31e..c36b30775 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -54,10 +54,15 @@ proc kill_server config {
# kill server and wait for the process to be totally exited
catch {exec kill $pid}
+ if {$::valgrind} {
+ set max_wait 60000
+ } else {
+ set max_wait 10000
+ }
while {[is_alive $config]} {
incr wait 10
- if {$wait >= 5000} {
+ if {$wait >= $max_wait} {
puts "Forcing process $pid to exit..."
catch {exec kill -KILL $pid}
} elseif {$wait % 1000 == 0} {
@@ -70,6 +75,9 @@ proc kill_server config {
if {$::valgrind} {
check_valgrind_errors [dict get $config stderr]
}
+
+ # Remove this pid from the set of active pids in the test server.
+ send_data_packet $::test_server_fd server-killed $pid
}
proc is_alive config {
@@ -178,10 +186,10 @@ proc start_server {options {code undefined}} {
dict set config $directive $arguments
}
}
-
+
# use a different directory every time a server is started
dict set config dir [tmpdir server]
-
+
# start every server on a different port
set ::port [find_available_port [expr {$::port+1}]]
dict set config port $::port
@@ -190,7 +198,7 @@ proc start_server {options {code undefined}} {
foreach {directive arguments} [concat $::global_overrides $overrides] {
dict set config $directive $arguments
}
-
+
# write new configuration to temporary file
set config_file [tmpfile redis.conf]
set fp [open $config_file w+]
@@ -204,11 +212,16 @@ proc start_server {options {code undefined}} {
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
if {$::valgrind} {
- exec valgrind --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &
+ set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
+ } elseif ($::stack_logging) {
+ set pid [exec /usr/bin/env MallocStackLogging=1 MallocLogFile=/tmp/malloc_log.txt src/redis-server $config_file > $stdout 2> $stderr &]
} else {
- exec src/redis-server $config_file > $stdout 2> $stderr &
+ set pid [exec src/redis-server $config_file > $stdout 2> $stderr &]
}
-
+
+ # Tell the test server about this new instance.
+ send_data_packet $::test_server_fd server-spawned $pid
+
# check that the server actually started
# ugly but tries to be as fast as possible...
if {$::valgrind} {set retrynum 1000} else {set retrynum 100}
@@ -233,10 +246,10 @@ proc start_server {options {code undefined}} {
start_server_error $config_file $err
return
}
-
- # find out the pid
- while {![info exists pid]} {
- regexp {PID:\s(\d+)} [exec cat $stdout] _ pid
+
+ # Wait for actual startup
+ while {![info exists _pid]} {
+ regexp {PID:\s(\d+)} [exec cat $stdout] _ _pid
after 100
}
@@ -265,7 +278,7 @@ proc start_server {options {code undefined}} {
while 1 {
# check that the server actually started and is ready for connections
- if {[exec grep "ready to accept" | wc -l < $stdout] > 0} {
+ if {[exec grep -i "Ready to accept" | wc -l < $stdout] > 0} {
break
}
after 10
diff --git a/tests/support/test.tcl b/tests/support/test.tcl
index bf2cb0e2f..d60eb3c47 100644
--- a/tests/support/test.tcl
+++ b/tests/support/test.tcl
@@ -19,9 +19,12 @@ proc assert_match {pattern value} {
}
}
-proc assert_equal {expected value} {
+proc assert_equal {expected value {detail ""}} {
if {$expected ne $value} {
- error "assertion:Expected '$value' to be equal to '$expected'"
+ if {$detail ne ""} {
+ set detail " (detail: $detail)"
+ }
+ error "assertion:Expected '$value' to be equal to '$expected'$detail"
}
}
@@ -29,18 +32,12 @@ proc assert_error {pattern code} {
if {[catch {uplevel 1 $code} error]} {
assert_match $pattern $error
} else {
- error "assertion:Expected an error but nothing was catched"
+ error "assertion:Expected an error but nothing was caught"
}
}
proc assert_encoding {enc key} {
- # Swapped out values don't have an encoding, so make sure that
- # the value is swapped in before checking the encoding.
set dbg [r debug object $key]
- while {[string match "* swapped at:*" $dbg]} {
- r debug swapin $key
- set dbg [r debug object $key]
- }
assert_match "* encoding:$enc *" $dbg
}
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 7774dd99a..64c36b326 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -262,46 +262,50 @@ proc formatCommand {args} {
proc csvdump r {
set o {}
- foreach k [lsort [{*}$r keys *]] {
- set type [{*}$r type $k]
- append o [csvstring $k] , [csvstring $type] ,
- switch $type {
- string {
- append o [csvstring [{*}$r get $k]] "\n"
- }
- list {
- foreach e [{*}$r lrange $k 0 -1] {
- append o [csvstring $e] ,
+ for {set db 0} {$db < 16} {incr db} {
+ {*}$r select $db
+ foreach k [lsort [{*}$r keys *]] {
+ set type [{*}$r type $k]
+ append o [csvstring $db] , [csvstring $k] , [csvstring $type] ,
+ switch $type {
+ string {
+ append o [csvstring [{*}$r get $k]] "\n"
}
- append o "\n"
- }
- set {
- foreach e [lsort [{*}$r smembers $k]] {
- append o [csvstring $e] ,
+ list {
+ foreach e [{*}$r lrange $k 0 -1] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- append o "\n"
- }
- zset {
- foreach e [{*}$r zrange $k 0 -1 withscores] {
- append o [csvstring $e] ,
+ set {
+ foreach e [lsort [{*}$r smembers $k]] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- append o "\n"
- }
- hash {
- set fields [{*}$r hgetall $k]
- set newfields {}
- foreach {k v} $fields {
- lappend newfields [list $k $v]
+ zset {
+ foreach e [{*}$r zrange $k 0 -1 withscores] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- set fields [lsort -index 0 $newfields]
- foreach kv $fields {
- append o [csvstring [lindex $kv 0]] ,
- append o [csvstring [lindex $kv 1]] ,
+ hash {
+ set fields [{*}$r hgetall $k]
+ set newfields {}
+ foreach {k v} $fields {
+ lappend newfields [list $k $v]
+ }
+ set fields [lsort -index 0 $newfields]
+ foreach kv $fields {
+ append o [csvstring [lindex $kv 0]] ,
+ append o [csvstring [lindex $kv 1]] ,
+ }
+ append o "\n"
}
- append o "\n"
}
}
}
+ {*}$r select 9
return $o
}
@@ -353,7 +357,7 @@ proc colorstr {color str} {
default {set colorcode {37}}
}
if {$colorcode ne {}} {
- return "\033\[$b;${colorcode};40m$str\033\[0m"
+ return "\033\[$b;${colorcode};49m$str\033\[0m"
}
} else {
return $str
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 78b979469..41c867803 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -1,5 +1,5 @@
# Redis test suite. Copyright (C) 2009 Salvatore Sanfilippo antirez@gmail.com
-# This softare is released under the BSD License. See the COPYING file for
+# This software is released under the BSD License. See the COPYING file for
# more information.
package require Tcl 8.5
@@ -16,8 +16,10 @@ set ::all_tests {
unit/dump
unit/auth
unit/protocol
- unit/basic
+ unit/keyspace
unit/scan
+ unit/type/string
+ unit/type/incr
unit/type/list
unit/type/list-2
unit/type/list-3
@@ -38,16 +40,24 @@ set ::all_tests {
integration/aof
integration/rdb
integration/convert-zipmap-hash-on-load
+ integration/logging
+ integration/psync2
+ integration/psync2-reg
unit/pubsub
unit/slowlog
unit/scripting
unit/maxmemory
unit/introspection
+ unit/introspection-2
unit/limits
unit/obuf-limits
unit/bitops
+ unit/bitfield
+ unit/geo
unit/memefficiency
unit/hyperloglog
+ unit/lazyfree
+ unit/wait
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
@@ -56,6 +66,7 @@ set ::host 127.0.0.1
set ::port 21111
set ::traceleaks 0
set ::valgrind 0
+set ::stack_logging 0
set ::verbose 0
set ::quiet 0
set ::denytags {}
@@ -65,6 +76,9 @@ set ::file ""; # If set, runs only the tests in this comma separated list
set ::curfile ""; # Hold the filename of the current suite
set ::accurate 0; # If true runs fuzz tests with more iterations
set ::force_failure 0
+set ::timeout 600; # 10 minutes without progresses will quit the test.
+set ::last_progress [clock seconds]
+set ::active_servers {} ; # Pids of active Redis instances.
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@@ -200,11 +214,19 @@ proc test_server_main {} {
vwait forever
}
-# This function gets called 10 times per second, for now does nothing but
-# may be used in the future in order to detect test clients taking too much
-# time to execute the task.
+# This function gets called 10 times per second.
proc test_server_cron {} {
- # Do some work here.
+ set elapsed [expr {[clock seconds]-$::last_progress}]
+
+ if {$elapsed > $::timeout} {
+ set err "\[[colorstr red TIMEOUT]\]: clients state report follows."
+ puts $err
+ show_clients_state
+ kill_clients
+ force_kill_all_servers
+ the_end
+ }
+
after 100 test_server_cron
}
@@ -230,6 +252,8 @@ proc read_from_test_client fd {
set bytes [gets $fd]
set payload [read $fd $bytes]
foreach {status data} $payload break
+ set ::last_progress [clock seconds]
+
if {$status eq {ready}} {
if {!$::quiet} {
puts "\[$status\]: $data"
@@ -256,12 +280,15 @@ proc read_from_test_client fd {
set ::active_clients_task($fd) "(ERR) $data"
} elseif {$status eq {exception}} {
puts "\[[colorstr red $status]\]: $data"
- foreach p $::clients_pids {
- catch {exec kill -9 $p}
- }
+ kill_clients
+ force_kill_all_servers
exit 1
} elseif {$status eq {testing}} {
set ::active_clients_task($fd) "(IN PROGRESS) $data"
+ } elseif {$status eq {server-spawned}} {
+ lappend ::active_servers $data
+ } elseif {$status eq {server-killed}} {
+ set ::active_servers [lsearch -all -inline -not -exact $::active_servers $data]
} else {
if {!$::quiet} {
puts "\[$status\]: $data"
@@ -269,6 +296,31 @@ proc read_from_test_client fd {
}
}
+proc show_clients_state {} {
+ # The following loop is only useful for debugging tests that may
+ # enter an infinite loop. Commented out normally.
+ foreach x $::active_clients {
+ if {[info exist ::active_clients_task($x)]} {
+ puts "$x => $::active_clients_task($x)"
+ } else {
+ puts "$x => ???"
+ }
+ }
+}
+
+proc kill_clients {} {
+ foreach p $::clients_pids {
+ catch {exec kill $p}
+ }
+}
+
+proc force_kill_all_servers {} {
+ foreach p $::active_servers {
+ puts "Killing still running Redis server $p"
+ catch {exec kill -9 $p}
+ }
+}
+
# A new client is idle. Remove it from the list of active clients and
# if there are still test units to run, launch them.
proc signal_idle_client fd {
@@ -276,17 +328,7 @@ proc signal_idle_client fd {
set ::active_clients \
[lsearch -all -inline -not -exact $::active_clients $fd]
- if 0 {
- # The following loop is only useful for debugging tests that may
- # enter an infinite loop. Commented out normally.
- foreach x $::active_clients {
- if {[info exist ::active_clients_task($x)]} {
- puts "$x => $::active_clients_task($x)"
- } else {
- puts "$x => ???"
- }
- }
- }
+ if 0 {show_clients_state}
# New unit to process?
if {$::next_test != [llength $::all_tests]} {
@@ -306,7 +348,7 @@ proc signal_idle_client fd {
}
}
-# The the_end funciton gets called when all the test units were already
+# The the_end function gets called when all the test units were already
# executed, so the test finished.
proc the_end {} {
# TODO: print the status, exit with the rigth exit code.
@@ -357,11 +399,13 @@ proc send_data_packet {fd status data} {
proc print_help_screen {} {
puts [join {
"--valgrind Run the test over valgrind."
+ "--stack-logging Enable OSX leaks/malloc stack logging."
"--accurate Run slow randomized tests for more iterations."
"--quiet Don't show individual tests."
"--single <unit> Just execute the specified unit (see next option)."
"--list-tests List all the available test units."
- "--clients <num> Number of test clients (16)."
+ "--clients <num> Number of test clients (default 16)."
+ "--timeout <sec> Test timeout in seconds (default 10 min)."
"--force-failure Force the execution of a test that always fails."
"--help Print this help screen."
} "\n"]
@@ -382,6 +426,10 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
incr j
} elseif {$opt eq {--valgrind}} {
set ::valgrind 1
+ } elseif {$opt eq {--stack-logging}} {
+ if {[string match {*Darwin*} [exec uname -a]]} {
+ set ::stack_logging 1
+ }
} elseif {$opt eq {--quiet}} {
set ::quiet 1
} elseif {$opt eq {--host}} {
@@ -410,6 +458,9 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--clients}} {
set ::numclients $arg
incr j
+ } elseif {$opt eq {--timeout}} {
+ set ::timeout $arg
+ incr j
} elseif {$opt eq {--help}} {
print_help_screen
exit 0
@@ -426,8 +477,11 @@ proc attach_to_replication_stream {} {
flush $s
# Get the count
- set count [gets $s]
- set prefix [string range $count 0 0]
+ while 1 {
+ set count [gets $s]
+ set prefix [string range $count 0 0]
+ if {$prefix ne {}} break; # Newlines are allowed as PINGs.
+ }
if {$prefix ne {$}} {
error "attach_to_replication_stream error. Received '$count' as count."
}
diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl
index a2d74168f..dff7588ff 100644
--- a/tests/unit/aofrw.tcl
+++ b/tests/unit/aofrw.tcl
@@ -4,60 +4,63 @@ start_server {tags {"aofrw"}} {
r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite.
waitForBgrewriteaof r
- test {AOF rewrite during write load} {
- # Start a write load for 10 seconds
- set master [srv 0 client]
- set master_host [srv 0 host]
- set master_port [srv 0 port]
- set load_handle0 [start_write_load $master_host $master_port 10]
- set load_handle1 [start_write_load $master_host $master_port 10]
- set load_handle2 [start_write_load $master_host $master_port 10]
- set load_handle3 [start_write_load $master_host $master_port 10]
- set load_handle4 [start_write_load $master_host $master_port 10]
-
- # Make sure the instance is really receiving data
- wait_for_condition 50 100 {
- [r dbsize] > 0
- } else {
- fail "No write load detected."
- }
+ foreach rdbpre {yes no} {
+ r config set aof-use-rdb-preamble $rdbpre
+ test "AOF rewrite during write load: RDB preamble=$rdbpre" {
+ # Start a write load for 10 seconds
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set load_handle0 [start_write_load $master_host $master_port 10]
+ set load_handle1 [start_write_load $master_host $master_port 10]
+ set load_handle2 [start_write_load $master_host $master_port 10]
+ set load_handle3 [start_write_load $master_host $master_port 10]
+ set load_handle4 [start_write_load $master_host $master_port 10]
+
+ # Make sure the instance is really receiving data
+ wait_for_condition 50 100 {
+ [r dbsize] > 0
+ } else {
+ fail "No write load detected."
+ }
- # After 3 seconds, start a rewrite, while the write load is still
- # active.
- after 3000
- r bgrewriteaof
- waitForBgrewriteaof r
+ # After 3 seconds, start a rewrite, while the write load is still
+ # active.
+ after 3000
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # Let it run a bit more so that we'll append some data to the new
+ # AOF.
+ after 1000
+
+ # Stop the processes generating the load if they are still active
+ stop_write_load $load_handle0
+ stop_write_load $load_handle1
+ stop_write_load $load_handle2
+ stop_write_load $load_handle3
+ stop_write_load $load_handle4
+
+ # Make sure that we remain the only connected client.
+ # This step is needed to make sure there are no pending writes
+ # that will be processed between the two "debug digest" calls.
+ wait_for_condition 50 100 {
+ [llength [split [string trim [r client list]] "\n"]] == 1
+ } else {
+ puts [r client list]
+ fail "Clients generating loads are not disconnecting"
+ }
- # Let it run a bit more so that we'll append some data to the new
- # AOF.
- after 1000
+ # Get the data set digest
+ set d1 [r debug digest]
- # Stop the processes generating the load if they are still active
- stop_write_load $load_handle0
- stop_write_load $load_handle1
- stop_write_load $load_handle2
- stop_write_load $load_handle3
- stop_write_load $load_handle4
+ # Load the AOF
+ r debug loadaof
+ set d2 [r debug digest]
- # Make sure that we remain the only connected client.
- # This step is needed to make sure there are no pending writes
- # that will be processed between the two "debug digest" calls.
- wait_for_condition 50 100 {
- [llength [split [string trim [r client list]] "\n"]] == 1
- } else {
- puts [r client list]
- fail "Clients generating loads are not disconnecting"
+ # Make sure they are the same
+ assert {$d1 eq $d2}
}
-
- # Get the data set digest
- set d1 [r debug digest]
-
- # Load the AOF
- r debug loadaof
- set d2 [r debug digest]
-
- # Make sure they are the same
- assert {$d1 eq $d2}
}
}
@@ -70,17 +73,17 @@ start_server {tags {"aofrw"}} {
r config set appendonly no
r exec
wait_for_condition 50 100 {
- [string match {*Killing*AOF*child*} [exec tail -n5 < [srv 0 stdout]]]
+ [string match {*Killing*AOF*child*} [exec tail -5 < [srv 0 stdout]]]
} else {
fail "Can't find 'Killing AOF child' into recent logs"
}
}
foreach d {string int} {
- foreach e {ziplist linkedlist} {
+ foreach e {quicklist} {
test "AOF rewrite of list with $e encoding, $d data" {
r flushall
- if {$e eq {ziplist}} {set len 10} else {set len 1000}
+ set len 1000
for {set j 0} {$j < $len} {incr j} {
if {$d eq {string}} {
set data [randstring 0 16 alpha]
diff --git a/tests/unit/auth.tcl b/tests/unit/auth.tcl
index 15753e9e7..633cda95c 100644
--- a/tests/unit/auth.tcl
+++ b/tests/unit/auth.tcl
@@ -10,7 +10,7 @@ start_server {tags {"auth"} overrides {requirepass foobar}} {
catch {r auth wrong!} err
set _ $err
} {ERR*invalid password}
-
+
test {Arbitrary command gives an error when AUTH is required} {
catch {r set foo bar} err
set _ $err
diff --git a/tests/unit/bitfield.tcl b/tests/unit/bitfield.tcl
new file mode 100644
index 000000000..d76452b1b
--- /dev/null
+++ b/tests/unit/bitfield.tcl
@@ -0,0 +1,201 @@
+start_server {tags {"bitops"}} {
+ test {BITFIELD signed SET and GET basics} {
+ r del bits
+ set results {}
+ lappend results [r bitfield bits set i8 0 -100]
+ lappend results [r bitfield bits set i8 0 101]
+ lappend results [r bitfield bits get i8 0]
+ set results
+ } {0 -100 101}
+
+ test {BITFIELD unsigned SET and GET basics} {
+ r del bits
+ set results {}
+ lappend results [r bitfield bits set u8 0 255]
+ lappend results [r bitfield bits set u8 0 100]
+ lappend results [r bitfield bits get u8 0]
+ set results
+ } {0 255 100}
+
+ test {BITFIELD #<idx> form} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 65
+ r bitfield bits set u8 #1 66
+ r bitfield bits set u8 #2 67
+ r get bits
+ } {ABC}
+
+ test {BITFIELD basic INCRBY form} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 10
+ lappend results [r bitfield bits incrby u8 #0 100]
+ lappend results [r bitfield bits incrby u8 #0 100]
+ set results
+ } {110 210}
+
+ test {BITFIELD chaining of multiple commands} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 10
+ lappend results [r bitfield bits incrby u8 #0 100 incrby u8 #0 100]
+ set results
+ } {{110 210}}
+
+ test {BITFIELD unsigned overflow wrap} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow wrap incrby u8 #0 257]
+ lappend results [r bitfield bits get u8 #0]
+ lappend results [r bitfield bits overflow wrap incrby u8 #0 255]
+ lappend results [r bitfield bits get u8 #0]
+ } {101 101 100 100}
+
+ test {BITFIELD unsigned overflow sat} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow sat incrby u8 #0 257]
+ lappend results [r bitfield bits get u8 #0]
+ lappend results [r bitfield bits overflow sat incrby u8 #0 -255]
+ lappend results [r bitfield bits get u8 #0]
+ } {255 255 0 0}
+
+ test {BITFIELD signed overflow wrap} {
+ r del bits
+ set results {}
+ r bitfield bits set i8 #0 100
+ lappend results [r bitfield bits overflow wrap incrby i8 #0 257]
+ lappend results [r bitfield bits get i8 #0]
+ lappend results [r bitfield bits overflow wrap incrby i8 #0 255]
+ lappend results [r bitfield bits get i8 #0]
+ } {101 101 100 100}
+
+ test {BITFIELD signed overflow sat} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow sat incrby i8 #0 257]
+ lappend results [r bitfield bits get i8 #0]
+ lappend results [r bitfield bits overflow sat incrby i8 #0 -255]
+ lappend results [r bitfield bits get i8 #0]
+ } {127 127 -128 -128}
+
+ test {BITFIELD overflow detection fuzzing} {
+ for {set j 0} {$j < 1000} {incr j} {
+ set bits [expr {[randomInt 64]+1}]
+ set sign [randomInt 2]
+ set range [expr {2**$bits}]
+ if {$bits == 64} {set sign 1} ; # u64 is not supported by BITFIELD.
+ if {$sign} {
+ set min [expr {-($range/2)}]
+ set type "i$bits"
+ } else {
+ set min 0
+ set type "u$bits"
+ }
+ set max [expr {$min+$range-1}]
+
+ # Compare Tcl vs Redis
+ set range2 [expr {$range*2}]
+ set value [expr {($min*2)+[randomInt $range2]}]
+ set increment [expr {($min*2)+[randomInt $range2]}]
+ if {$value > 9223372036854775807} {
+ set value 9223372036854775807
+ }
+ if {$value < -9223372036854775808} {
+ set value -9223372036854775808
+ }
+ if {$increment > 9223372036854775807} {
+ set increment 9223372036854775807
+ }
+ if {$increment < -9223372036854775808} {
+ set increment -9223372036854775808
+ }
+
+ set overflow 0
+ if {$value > $max || $value < $min} {set overflow 1}
+ if {($value + $increment) > $max} {set overflow 1}
+ if {($value + $increment) < $min} {set overflow 1}
+
+ r del bits
+ set res1 [r bitfield bits overflow fail set $type 0 $value]
+ set res2 [r bitfield bits overflow fail incrby $type 0 $increment]
+
+ if {$overflow && [lindex $res1 0] ne {} &&
+ [lindex $res2 0] ne {}} {
+ fail "OW not detected where needed: $type $value+$increment"
+ }
+ if {!$overflow && ([lindex $res1 0] eq {} ||
+ [lindex $res2 0] eq {})} {
+ fail "OW detected where NOT needed: $type $value+$increment"
+ }
+ }
+ }
+
+ test {BITFIELD overflow wrap fuzzing} {
+ for {set j 0} {$j < 1000} {incr j} {
+ set bits [expr {[randomInt 64]+1}]
+ set sign [randomInt 2]
+ set range [expr {2**$bits}]
+ if {$bits == 64} {set sign 1} ; # u64 is not supported by BITFIELD.
+ if {$sign} {
+ set min [expr {-($range/2)}]
+ set type "i$bits"
+ } else {
+ set min 0
+ set type "u$bits"
+ }
+ set max [expr {$min+$range-1}]
+
+ # Compare Tcl vs Redis
+ set range2 [expr {$range*2}]
+ set value [expr {($min*2)+[randomInt $range2]}]
+ set increment [expr {($min*2)+[randomInt $range2]}]
+ if {$value > 9223372036854775807} {
+ set value 9223372036854775807
+ }
+ if {$value < -9223372036854775808} {
+ set value -9223372036854775808
+ }
+ if {$increment > 9223372036854775807} {
+ set increment 9223372036854775807
+ }
+ if {$increment < -9223372036854775808} {
+ set increment -9223372036854775808
+ }
+
+ r del bits
+ r bitfield bits overflow wrap set $type 0 $value
+ r bitfield bits overflow wrap incrby $type 0 $increment
+ set res [lindex [r bitfield bits get $type 0] 0]
+
+ set expected 0
+ if {$sign} {incr expected [expr {$max+1}]}
+ incr expected $value
+ incr expected $increment
+ set expected [expr {$expected % $range}]
+ if {$sign} {incr expected $min}
+
+ if {$res != $expected} {
+ fail "WRAP error: $type $value+$increment = $res, should be $expected"
+ }
+ }
+ }
+
+ test {BITFIELD regression for #3221} {
+ r set bits 1
+ r bitfield bits get u1 0
+ } {0}
+
+ test {BITFIELD regression for #3564} {
+ for {set j 0} {$j < 10} {incr j} {
+ r del mystring
+ set res [r BITFIELD mystring SET i8 0 10 SET i8 64 10 INCRBY i8 10 99900]
+ assert {$res eq {0 0 60}}
+ }
+ r del mystring
+ }
+}
diff --git a/tests/unit/bitops.tcl b/tests/unit/bitops.tcl
index 896310980..926f38295 100644
--- a/tests/unit/bitops.tcl
+++ b/tests/unit/bitops.tcl
@@ -1,4 +1,4 @@
-# Compare Redis commadns against Tcl implementations of the same commands.
+# Compare Redis commands against Tcl implementations of the same commands.
proc count_bits s {
binary scan $s b* bits
string length [regsub -all {0} $bits {}]
@@ -43,6 +43,16 @@ start_server {tags {"bitops"}} {
r bitcount no-key
} 0
+ test {BITCOUNT returns 0 with out of range indexes} {
+ r set str "xxxx"
+ r bitcount str 4 10
+ } 0
+
+ test {BITCOUNT returns 0 with negative indexes where start > end} {
+ r set str "xxxx"
+ r bitcount str -6 -7
+ } 0
+
catch {unset num}
foreach vec [list "" "\xaa" "\x00\x00\xff" "foobar" "123"] {
incr num
@@ -88,7 +98,7 @@ start_server {tags {"bitops"}} {
} {ERR*syntax*}
test {BITCOUNT regression test for github issue #582} {
- r del str
+ r del foo
r setbit foo 0 1
if {[catch {r bitcount foo 0 4294967296} e]} {
assert_match {*ERR*out of range*} $e
@@ -125,7 +135,7 @@ start_server {tags {"bitops"}} {
test {BITOP where dest and target are the same key} {
r set s "\xaa\x00\xff\x55"
r bitop not s s
- r get s
+ r get s
} "\x55\xff\x00\xaa"
test {BITOP AND|OR|XOR don't change the string with single input key} {
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index d39204f9f..f5a29a096 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -157,7 +157,7 @@ start_server {tags {"dump"}} {
test {MIGRATE can correctly transfer large values} {
set first [srv 0 client]
r del key
- for {set j 0} {$j < 5000} {incr j} {
+ for {set j 0} {$j < 40000} {incr j} {
r rpush key 1 2 3 4 5 6 7 8 9 10
r rpush key "item 1" "item 2" "item 3" "item 4" "item 5" \
"item 6" "item 7" "item 8" "item 9" "item 10"
@@ -175,7 +175,7 @@ start_server {tags {"dump"}} {
assert {[$first exists key] == 0}
assert {[$second exists key] == 1}
assert {[$second ttl key] == -1}
- assert {[$second llen key] == 5000*20}
+ assert {[$second llen key] == 40000*20}
}
}
@@ -217,4 +217,95 @@ start_server {tags {"dump"}} {
assert_match {IOERR*} $e
}
}
+
+ test {MIGRATE can migrate multiple keys at once} {
+ set first [srv 0 client]
+ r set key1 "v1"
+ r set key2 "v2"
+ r set key3 "v3"
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ assert {[$first exists key1] == 1}
+ assert {[$second exists key1] == 0}
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys key1 key2 key3]
+ assert {$ret eq {OK}}
+ assert {[$first exists key1] == 0}
+ assert {[$first exists key2] == 0}
+ assert {[$first exists key3] == 0}
+ assert {[$second get key1] eq {v1}}
+ assert {[$second get key2] eq {v2}}
+ assert {[$second get key3] eq {v3}}
+ }
+ }
+
+ test {MIGRATE with multiple keys must have empty key arg} {
+ catch {r MIGRATE 127.0.0.1 6379 NotEmpty 9 5000 keys a b c} e
+ set e
+ } {*empty string*}
+
+ test {MIGRATE with mutliple keys migrate just existing ones} {
+ set first [srv 0 client]
+ r set key1 "v1"
+ r set key2 "v2"
+ r set key3 "v3"
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys nokey-1 nokey-2 nokey-2]
+ assert {$ret eq {NOKEY}}
+
+ assert {[$first exists key1] == 1}
+ assert {[$second exists key1] == 0}
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys nokey-1 key1 nokey-2 key2 nokey-3 key3]
+ assert {$ret eq {OK}}
+ assert {[$first exists key1] == 0}
+ assert {[$first exists key2] == 0}
+ assert {[$first exists key3] == 0}
+ assert {[$second get key1] eq {v1}}
+ assert {[$second get key2] eq {v2}}
+ assert {[$second get key3] eq {v3}}
+ }
+ }
+
+ test {MIGRATE with multiple keys: stress command rewriting} {
+ set first [srv 0 client]
+ r flushdb
+ r mset a 1 b 2 c 3 d 4 c 5 e 6 f 7 g 8 h 9 i 10 l 11 m 12 n 13 o 14 p 15 q 16
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys a b c d e f g h i l m n o p q]
+
+ assert {[$first dbsize] == 0}
+ assert {[$second dbsize] == 15}
+ }
+ }
+
+ test {MIGRATE with multiple keys: delete just ack keys} {
+ set first [srv 0 client]
+ r flushdb
+ r mset a 1 b 2 c 3 d 4 c 5 e 6 f 7 g 8 h 9 i 10 l 11 m 12 n 13 o 14 p 15 q 16
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ $second mset c _ d _; # Two busy keys and no REPLACE used
+
+ catch {r -1 migrate $second_host $second_port "" 9 5000 keys a b c d e f g h i l m n o p q} e
+
+ assert {[$first dbsize] == 2}
+ assert {[$second dbsize] == 15}
+ assert {[$first exists c] == 1}
+ assert {[$first exists d] == 1}
+ }
+ }
+
}
diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl
index ff3dacb33..eddc7c303 100644
--- a/tests/unit/expire.tcl
+++ b/tests/unit/expire.tcl
@@ -198,4 +198,25 @@ start_server {tags {"expire"}} {
r set foo b
lsort [r keys *]
} {a e foo s t}
+
+ test {EXPIRE with empty string as TTL should report an error} {
+ r set foo bar
+ catch {r expire foo ""} e
+ set e
+ } {*not an integer*}
+
+ test {SET - use EX/PX option, TTL should not be reseted after loadaof} {
+ r config set appendonly yes
+ r set foo bar EX 100
+ after 2000
+ r debug loadaof
+ set ttl [r ttl foo]
+ assert {$ttl <= 98 && $ttl > 90}
+
+ r set foo bar PX 100000
+ after 2000
+ r debug loadaof
+ set ttl [r ttl foo]
+ assert {$ttl <= 98 && $ttl > 90}
+ }
}
diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl
new file mode 100644
index 000000000..604697be4
--- /dev/null
+++ b/tests/unit/geo.tcl
@@ -0,0 +1,311 @@
+# Helper functions to simulate search-in-radius in the Tcl side in order to
+# verify the Redis implementation with a fuzzy test.
+proc geo_degrad deg {expr {$deg*atan(1)*8/360}}
+
+proc geo_distance {lon1d lat1d lon2d lat2d} {
+ set lon1r [geo_degrad $lon1d]
+ set lat1r [geo_degrad $lat1d]
+ set lon2r [geo_degrad $lon2d]
+ set lat2r [geo_degrad $lat2d]
+ set v [expr {sin(($lon2r - $lon1r) / 2)}]
+ set u [expr {sin(($lat2r - $lat1r) / 2)}]
+ expr {2.0 * 6372797.560856 * \
+ asin(sqrt($u * $u + cos($lat1r) * cos($lat2r) * $v * $v))}
+}
+
+proc geo_random_point {lonvar latvar} {
+ upvar 1 $lonvar lon
+ upvar 1 $latvar lat
+ # Note that the actual latitude limit should be -85 to +85, we restrict
+ # the test to -70 to +70 since in this range the algorithm is more precise
+ # while outside this range occasionally some element may be missing.
+ set lon [expr {-180 + rand()*360}]
+ set lat [expr {-70 + rand()*140}]
+}
+
+# Return elements non common to both the lists.
+# This code is from http://wiki.tcl.tk/15489
+proc compare_lists {List1 List2} {
+ set DiffList {}
+ foreach Item $List1 {
+ if {[lsearch -exact $List2 $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ foreach Item $List2 {
+ if {[lsearch -exact $List1 $Item] == -1} {
+ if {[lsearch -exact $DiffList $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ }
+ return $DiffList
+}
+
+# The following list represents sets of random seed, search position
+# and radius that caused bugs in the past. It is used by the randomized
+# test later as a starting point. When the regression vectors are scanned
+# the code reverts to using random data.
+#
+# The format is: seed km lon lat
+set regression_vectors {
+ {1482225976969 7083 81.634948934258375 30.561509253718668}
+ {1482340074151 5416 -70.863281847379767 -46.347003465679947}
+ {1499014685896 6064 -89.818768962202014 -40.463868561416803}
+ {1412 156 149.29737817929004 15.95807862745508}
+ {441574 143 59.235461856813856 66.269555127373678}
+ {160645 187 -101.88575239939883 49.061997951502917}
+ {750269 154 -90.187939661642517 66.615930412251487}
+ {342880 145 163.03472387745728 64.012747720821181}
+ {729955 143 137.86663517256579 63.986745399416776}
+ {939895 151 59.149620271823181 65.204186651485145}
+ {1412 156 149.29737817929004 15.95807862745508}
+ {564862 149 84.062063109158544 -65.685403922426232}
+}
+set rv_idx 0
+
+start_server {tags {"geo"}} {
+ test {GEOADD create} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {1}
+
+ test {GEOADD update} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {0}
+
+ test {GEOADD invalid coordinates} {
+ catch {
+ r geoadd nyc -73.9454966 40.747533 "lic market" \
+ foo bar "luck market"
+ } err
+ set err
+ } {*valid*}
+
+ test {GEOADD multi add} {
+ r geoadd nyc -73.9733487 40.7648057 "central park n/q/r" -73.9903085 40.7362513 "union square" -74.0131604 40.7126674 "wtc one" -73.7858139 40.6428986 "jfk" -73.9375699 40.7498929 "q4" -73.9564142 40.7480973 4545
+ } {6}
+
+ test {Check geoset values} {
+ r zrange nyc 0 -1 withscores
+ } {{wtc one} 1791873972053020 {union square} 1791875485187452 {central park n/q/r} 1791875761332224 4545 1791875796750882 {lic market} 1791875804419201 q4 1791875830079666 jfk 1791895905559723}
+
+ test {GEORADIUS simple (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km asc
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS withdist (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km withdist asc
+ } {{{central park n/q/r} 0.7750} {4545 2.3651} {{union square} 2.7697}}
+
+ test {GEORADIUS with COUNT} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 3
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS with COUNT but missing integer argument} {
+ catch {r georadius nyc -73.9798091 40.7598464 10 km COUNT} e
+ set e
+ } {ERR*syntax*}
+
+ test {GEORADIUS with COUNT DESC} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 2 DESC
+ } {{wtc one} q4}
+
+ test {GEORADIUS HUGE, issue #2767} {
+ r geoadd users -47.271613776683807 -54.534504198047678 user_000000
+ llength [r GEORADIUS users 0 0 50000 km WITHCOORD]
+ } {1}
+
+ test {GEORADIUSBYMEMBER simple (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km
+ } {{wtc one} {union square} {central park n/q/r} 4545 {lic market}}
+
+ test {GEORADIUSBYMEMBER withdist (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km withdist
+ } {{{wtc one} 0.0000} {{union square} 3.2544} {{central park n/q/r} 6.7000} {4545 6.1975} {{lic market} 6.8969}}
+
+ test {GEOHASH is able to return geohash strings} {
+ # Example from Wikipedia.
+ r del points
+ r geoadd points -5.6 42.6 test
+ lindex [r geohash points test] 0
+ } {ezs42e44yx0}
+
+ test {GEOPOS simple} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lassign [lindex [r geopos points a b] 0] x1 y1
+ lassign [lindex [r geopos points a b] 1] x2 y2
+ assert {abs($x1 - 10) < 0.001}
+ assert {abs($y1 - 20) < 0.001}
+ assert {abs($x2 - 30) < 0.001}
+ assert {abs($y2 - 40) < 0.001}
+ }
+
+ test {GEOPOS missing element} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lindex [r geopos points a x b] 1
+ } {}
+
+ test {GEODIST simple & unit} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Catania]
+ assert {$m > 166274 && $m < 166275}
+ set km [r geodist points Palermo Catania km]
+ assert {$km > 166.2 && $km < 166.3}
+ }
+
+ test {GEODIST missing elements} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Agrigento]
+ assert {$m eq {}}
+ set m [r geodist points Ragusa Agrigento]
+ assert {$m eq {}}
+ set m [r geodist empty_key Palermo Catania]
+ assert {$m eq {}}
+ }
+
+ test {GEORADIUS STORE option: syntax error} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ catch {r georadius points 13.361389 38.115556 50 km store} e
+ set e
+ } {*ERR*syntax*}
+
+ test {GEORANGE STORE option: incompatible options} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withdist} e
+ assert_match {*ERR*} $e
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withhash} e
+ assert_match {*ERR*} $e
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withcoords} e
+ assert_match {*ERR*} $e
+ }
+
+ test {GEORANGE STORE option: plain usage} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km store points2
+ assert_equal [r zrange points 0 -1] [r zrange points2 0 -1]
+ }
+
+ test {GEORANGE STOREDIST option: plain usage} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km storedist points2
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 1] < 1}
+ assert {[lindex $res 3] > 166}
+ assert {[lindex $res 3] < 167}
+ }
+
+ test {GEORANGE STOREDIST option: COUNT ASC and DESC} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km storedist points2 asc count 1
+ assert {[r zcard points2] == 1}
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 0] eq "Palermo"}
+
+ r georadius points 13.361389 38.115556 500 km storedist points2 desc count 1
+ assert {[r zcard points2] == 1}
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 0] eq "Catania"}
+ }
+
+ test {GEOADD + GEORANGE randomized test} {
+ set attempt 30
+ while {[incr attempt -1]} {
+ set rv [lindex $regression_vectors $rv_idx]
+ incr rv_idx
+
+ unset -nocomplain debuginfo
+ set srand_seed [clock milliseconds]
+ if {$rv ne {}} {set srand_seed [lindex $rv 0]}
+ lappend debuginfo "srand_seed is $srand_seed"
+ expr {srand($srand_seed)} ; # If you need a reproducible run
+ r del mypoints
+
+ if {[randomInt 10] == 0} {
+ # From time to time use very big radiuses
+ set radius_km [expr {[randomInt 50000]+10}]
+ } else {
+ # Normally use a few - ~200km radiuses to stress
+ # test the code the most in edge cases.
+ set radius_km [expr {[randomInt 200]+10}]
+ }
+ if {$rv ne {}} {set radius_km [lindex $rv 1]}
+ set radius_m [expr {$radius_km*1000}]
+ geo_random_point search_lon search_lat
+ if {$rv ne {}} {
+ set search_lon [lindex $rv 2]
+ set search_lat [lindex $rv 3]
+ }
+ lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km"
+ set tcl_result {}
+ set argv {}
+ for {set j 0} {$j < 20000} {incr j} {
+ geo_random_point lon lat
+ lappend argv $lon $lat "place:$j"
+ set distance [geo_distance $lon $lat $search_lon $search_lat]
+ if {$distance < $radius_m} {
+ lappend tcl_result "place:$j"
+ }
+ lappend debuginfo "place:$j $lon $lat [expr {$distance/1000}] km"
+ }
+ r geoadd mypoints {*}$argv
+ set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]]
+ set res2 [lsort $tcl_result]
+ set test_result OK
+
+ if {$res != $res2} {
+ set rounding_errors 0
+ set diff [compare_lists $res $res2]
+ foreach place $diff {
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
+ # Make sure this is a real error and not a rounidng issue.
+ if {[llength $diff] == $rounding_errors} {
+ set res $res2; # Error silenced
+ }
+ }
+
+ if {$res != $res2} {
+ set diff [compare_lists $res $res2]
+ puts "*** Possible problem in GEO radius query ***"
+ puts "Redis: $res"
+ puts "Tcl : $res2"
+ puts "Diff : $diff"
+ puts [join $debuginfo "\n"]
+ foreach place $diff {
+ if {[lsearch -exact $res2 $place] != -1} {
+ set where "(only in Tcl)"
+ } else {
+ set where "(only in Redis)"
+ }
+ lassign [lindex [r geopos mypoints $place] 0] lon lat
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ puts "$place -> [r geopos mypoints $place] $mydist $where"
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
+ set test_result FAIL
+ }
+ unset -nocomplain debuginfo
+ if {$test_result ne {OK}} break
+ }
+ set test_result
+ } {OK}
+}
diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl
index 3f5142076..7d36b7a35 100644
--- a/tests/unit/hyperloglog.tcl
+++ b/tests/unit/hyperloglog.tcl
@@ -136,10 +136,9 @@ start_server {tags {"hll"}} {
r pfcount hll
} {5}
- test {PFCOUNT multiple-keys merge returns cardinality of union} {
+ test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1 hll2 hll3
for {set x 1} {$x < 10000} {incr x} {
- # Force dense representation of hll2
r pfadd hll1 "foo-$x"
r pfadd hll2 "bar-$x"
r pfadd hll3 "zap-$x"
@@ -151,6 +150,22 @@ start_server {tags {"hll"}} {
}
}
+ test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
+ r del hll1 hll2 hll3
+ set elements {}
+ for {set x 1} {$x < 10000} {incr x} {
+ for {set j 1} {$j <= 3} {incr j} {
+ set rint [randomInt 20000]
+ r pfadd hll$j $rint
+ lappend elements $rint
+ }
+ }
+ set realcard [llength [lsort -unique $elements]]
+ set card [r pfcount hll1 hll2 hll3]
+ set err [expr {abs($card-$realcard)}]
+ assert {$err < (double($card)/100)*5}
+ }
+
test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
r del hll
r pfadd hll 1 2 3
diff --git a/tests/unit/introspection-2.tcl b/tests/unit/introspection-2.tcl
new file mode 100644
index 000000000..350a8a016
--- /dev/null
+++ b/tests/unit/introspection-2.tcl
@@ -0,0 +1,23 @@
+start_server {tags {"introspection"}} {
+ test {TTL and TYPYE do not alter the last access time of a key} {
+ r set foo bar
+ after 3000
+ r ttl foo
+ r type foo
+ assert {[r object idletime foo] >= 2}
+ }
+
+ test {TOUCH alters the last access time of a key} {
+ r set foo bar
+ after 3000
+ r touch foo
+ assert {[r object idletime foo] < 2}
+ }
+
+ test {TOUCH returns the number of existing keys specified} {
+ r flushdb
+ r set key1 1
+ r set key2 2
+ r touch key0 key1 key2 key3
+ } 2
+}
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index 54742bb02..f6477d9c5 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -6,16 +6,17 @@ start_server {tags {"introspection"}} {
test {MONITOR can log executed commands} {
set rd [redis_deferring_client]
$rd monitor
+ assert_match {*OK*} [$rd read]
r set foo bar
r get foo
- list [$rd read] [$rd read] [$rd read]
- } {*OK*"set" "foo"*"get" "foo"*}
+ list [$rd read] [$rd read]
+ } {*"set" "foo"*"get" "foo"*}
test {MONITOR can log commands issued by the scripting engine} {
set rd [redis_deferring_client]
$rd monitor
- r eval {redis.call('set',KEYS[1],ARGV[1])} 1 foo bar
$rd read ;# Discard the OK
+ r eval {redis.call('set',KEYS[1],ARGV[1])} 1 foo bar
assert_match {*eval*} [$rd read]
assert_match {*lua*"set"*"foo"*"bar"*} [$rd read]
}
@@ -27,7 +28,7 @@ start_server {tags {"introspection"}} {
test {CLIENT LIST shows empty fields for unassigned names} {
r client list
} {*name= *}
-
+
test {CLIENT SETNAME does not accept spaces} {
catch {r client setname "foo bar"} e
set e
diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl
new file mode 100644
index 000000000..d4e7bf51c
--- /dev/null
+++ b/tests/unit/keyspace.tcl
@@ -0,0 +1,275 @@
+start_server {tags {"keyspace"}} {
+ test {DEL against a single item} {
+ r set x foo
+ assert {[r get x] eq "foo"}
+ r del x
+ r get x
+ } {}
+
+ test {Vararg DEL} {
+ r set foo1 a
+ r set foo2 b
+ r set foo3 c
+ list [r del foo1 foo2 foo3 foo4] [r mget foo1 foo2 foo3]
+ } {3 {{} {} {}}}
+
+ test {KEYS with pattern} {
+ foreach key {key_x key_y key_z foo_a foo_b foo_c} {
+ r set $key hello
+ }
+ lsort [r keys foo*]
+ } {foo_a foo_b foo_c}
+
+ test {KEYS to get all keys} {
+ lsort [r keys *]
+ } {foo_a foo_b foo_c key_x key_y key_z}
+
+ test {DBSIZE} {
+ r dbsize
+ } {6}
+
+ test {DEL all keys} {
+ foreach key [r keys *] {r del $key}
+ r dbsize
+ } {0}
+
+ test "DEL against expired key" {
+ r debug set-active-expire 0
+ r setex keyExpire 1 valExpire
+ after 1100
+ assert_equal 0 [r del keyExpire]
+ r debug set-active-expire 1
+ }
+
+ test {EXISTS} {
+ set res {}
+ r set newkey test
+ append res [r exists newkey]
+ r del newkey
+ append res [r exists newkey]
+ } {10}
+
+ test {Zero length value in key. SET/GET/EXISTS} {
+ r set emptykey {}
+ set res [r get emptykey]
+ append res [r exists emptykey]
+ r del emptykey
+ append res [r exists emptykey]
+ } {10}
+
+ test {Commands pipelining} {
+ set fd [r channel]
+ puts -nonewline $fd "SET k1 xyzk\r\nGET k1\r\nPING\r\n"
+ flush $fd
+ set res {}
+ append res [string match OK* [r read]]
+ append res [r read]
+ append res [string match PONG* [r read]]
+ format $res
+ } {1xyzk1}
+
+ test {Non existing command} {
+ catch {r foobaredcommand} err
+ string match ERR* $err
+ } {1}
+
+ test {RENAME basic usage} {
+ r set mykey hello
+ r rename mykey mykey1
+ r rename mykey1 mykey2
+ r get mykey2
+ } {hello}
+
+ test {RENAME source key should no longer exist} {
+ r exists mykey
+ } {0}
+
+ test {RENAME against already existing key} {
+ r set mykey a
+ r set mykey2 b
+ r rename mykey2 mykey
+ set res [r get mykey]
+ append res [r exists mykey2]
+ } {b0}
+
+ test {RENAMENX basic usage} {
+ r del mykey
+ r del mykey2
+ r set mykey foobar
+ r renamenx mykey mykey2
+ set res [r get mykey2]
+ append res [r exists mykey]
+ } {foobar0}
+
+ test {RENAMENX against already existing key} {
+ r set mykey foo
+ r set mykey2 bar
+ r renamenx mykey mykey2
+ } {0}
+
+ test {RENAMENX against already existing key (2)} {
+ set res [r get mykey]
+ append res [r get mykey2]
+ } {foobar}
+
+ test {RENAME against non existing source key} {
+ catch {r rename nokey foobar} err
+ format $err
+ } {ERR*}
+
+ test {RENAME where source and dest key are the same (existing)} {
+ r set mykey foo
+ r rename mykey mykey
+ } {OK}
+
+ test {RENAMENX where source and dest key are the same (existing)} {
+ r set mykey foo
+ r renamenx mykey mykey
+ } {0}
+
+ test {RENAME where source and dest key are the same (non existing)} {
+ r del mykey
+ catch {r rename mykey mykey} err
+ format $err
+ } {ERR*}
+
+ test {RENAME with volatile key, should move the TTL as well} {
+ r del mykey mykey2
+ r set mykey foo
+ r expire mykey 100
+ assert {[r ttl mykey] > 95 && [r ttl mykey] <= 100}
+ r rename mykey mykey2
+ assert {[r ttl mykey2] > 95 && [r ttl mykey2] <= 100}
+ }
+
+ test {RENAME with volatile key, should not inherit TTL of target key} {
+ r del mykey mykey2
+ r set mykey foo
+ r set mykey2 bar
+ r expire mykey2 100
+ assert {[r ttl mykey] == -1 && [r ttl mykey2] > 0}
+ r rename mykey mykey2
+ r ttl mykey2
+ } {-1}
+
+ test {DEL all keys again (DB 0)} {
+ foreach key [r keys *] {
+ r del $key
+ }
+ r dbsize
+ } {0}
+
+ test {DEL all keys again (DB 1)} {
+ r select 10
+ foreach key [r keys *] {
+ r del $key
+ }
+ set res [r dbsize]
+ r select 9
+ format $res
+ } {0}
+
+ test {MOVE basic usage} {
+ r set mykey foobar
+ r move mykey 10
+ set res {}
+ lappend res [r exists mykey]
+ lappend res [r dbsize]
+ r select 10
+ lappend res [r get mykey]
+ lappend res [r dbsize]
+ r select 9
+ format $res
+ } [list 0 0 foobar 1]
+
+ test {MOVE against key existing in the target DB} {
+ r set mykey hello
+ r move mykey 10
+ } {0}
+
+ test {MOVE against non-integer DB (#1428)} {
+ r set mykey hello
+ catch {r move mykey notanumber} e
+ set e
+ } {*ERR*index out of range}
+
+ test {MOVE can move key expire metadata as well} {
+ r select 10
+ r flushdb
+ r select 9
+ r set mykey foo ex 100
+ r move mykey 10
+ assert {[r ttl mykey] == -2}
+ r select 10
+ assert {[r ttl mykey] > 0 && [r ttl mykey] <= 100}
+ assert {[r get mykey] eq "foo"}
+ r select 9
+ }
+
+ test {MOVE does not create an expire if it does not exist} {
+ r select 10
+ r flushdb
+ r select 9
+ r set mykey foo
+ r move mykey 10
+ assert {[r ttl mykey] == -2}
+ r select 10
+ assert {[r ttl mykey] == -1}
+ assert {[r get mykey] eq "foo"}
+ r select 9
+ }
+
+ test {SET/GET keys in different DBs} {
+ r set a hello
+ r set b world
+ r select 10
+ r set a foo
+ r set b bared
+ r select 9
+ set res {}
+ lappend res [r get a]
+ lappend res [r get b]
+ r select 10
+ lappend res [r get a]
+ lappend res [r get b]
+ r select 9
+ format $res
+ } {hello world foo bared}
+
+ test {RANDOMKEY} {
+ r flushdb
+ r set foo x
+ r set bar y
+ set foo_seen 0
+ set bar_seen 0
+ for {set i 0} {$i < 100} {incr i} {
+ set rkey [r randomkey]
+ if {$rkey eq {foo}} {
+ set foo_seen 1
+ }
+ if {$rkey eq {bar}} {
+ set bar_seen 1
+ }
+ }
+ list $foo_seen $bar_seen
+ } {1 1}
+
+ test {RANDOMKEY against empty DB} {
+ r flushdb
+ r randomkey
+ } {}
+
+ test {RANDOMKEY regression 1} {
+ r flushdb
+ r set x 10
+ r del x
+ r randomkey
+ } {}
+
+ test {KEYS * two times with long key, Github issue #1208} {
+ r flushdb
+ r set dlskeriewrioeuwqoirueioqwrueoqwrueqw test
+ r keys *
+ r keys *
+ } {dlskeriewrioeuwqoirueioqwrueoqwrueqw}
+}
diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl
new file mode 100644
index 000000000..4e994494b
--- /dev/null
+++ b/tests/unit/lazyfree.tcl
@@ -0,0 +1,39 @@
+start_server {tags {"lazyfree"}} {
+ test "UNLINK can reclaim memory in background" {
+ set orig_mem [s used_memory]
+ set args {}
+ for {set i 0} {$i < 100000} {incr i} {
+ lappend args $i
+ }
+ r sadd myset {*}$args
+ assert {[r scard myset] == 100000}
+ set peak_mem [s used_memory]
+ assert {[r unlink myset] == 1}
+ assert {$peak_mem > $orig_mem+1000000}
+ wait_for_condition 50 100 {
+ [s used_memory] < $peak_mem &&
+ [s used_memory] < $orig_mem*2
+ } else {
+ fail "Memory is not reclaimed by UNLINK"
+ }
+ }
+
+ test "FLUSHDB ASYNC can reclaim memory in background" {
+ set orig_mem [s used_memory]
+ set args {}
+ for {set i 0} {$i < 100000} {incr i} {
+ lappend args $i
+ }
+ r sadd myset {*}$args
+ assert {[r scard myset] == 100000}
+ set peak_mem [s used_memory]
+ r flushdb async
+ assert {$peak_mem > $orig_mem+1000000}
+ wait_for_condition 50 100 {
+ [s used_memory] < $peak_mem &&
+ [s used_memory] < $orig_mem*2
+ } else {
+ fail "Memory is not reclaimed by FLUSHDB ASYNC"
+ }
+ }
+}
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index 1431a2ac7..0c3f6b32c 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -24,11 +24,11 @@ start_server {tags {"maxmemory"}} {
}
foreach policy {
- allkeys-random allkeys-lru volatile-lru volatile-random volatile-ttl
+ allkeys-random allkeys-lru allkeys-lfu volatile-lru volatile-lfu volatile-random volatile-ttl
} {
test "maxmemory - is the memory limit honoured? (policy $policy)" {
# make sure to start with a blank instance
- r flushall
+ r flushall
# Get the current memory limit and calculate a new limit.
# We just add 100k to the current memory size so that it is
# fast for us to reach that limit.
@@ -60,7 +60,7 @@ start_server {tags {"maxmemory"}} {
} {
test "maxmemory - only allkeys-* should remove non-volatile keys ($policy)" {
# make sure to start with a blank instance
- r flushall
+ r flushall
# Get the current memory limit and calculate a new limit.
# We just add 100k to the current memory size so that it is
# fast for us to reach that limit.
@@ -98,11 +98,11 @@ start_server {tags {"maxmemory"}} {
}
foreach policy {
- volatile-lru volatile-random volatile-ttl
+ volatile-lru volatile-lfu volatile-random volatile-ttl
} {
test "maxmemory - policy $policy should only remove volatile keys." {
# make sure to start with a blank instance
- r flushall
+ r flushall
# Get the current memory limit and calculate a new limit.
# We just add 100k to the current memory size so that it is
# fast for us to reach that limit.
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 14e135ced..f452f0224 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -1,15 +1,20 @@
proc test_memory_efficiency {range} {
r flushall
+ set rd [redis_deferring_client]
set base_mem [s used_memory]
set written 0
for {set j 0} {$j < 10000} {incr j} {
set key key:$j
set val [string repeat A [expr {int(rand()*$range)}]]
- r set $key $val
+ $rd set $key $val
incr written [string length $key]
incr written [string length $val]
incr written 2 ;# A separator is the minimum to store key-value data.
}
+ for {set j 0} {$j < 10000} {incr j} {
+ $rd read ; # Discard replies
+ }
+
set current_mem [s used_memory]
set used [expr {$current_mem-$base_mem}]
set efficiency [expr {double($written)/$used}]
@@ -30,3 +35,51 @@ start_server {tags {"memefficiency"}} {
}
}
}
+
+if 0 {
+ start_server {tags {"defrag"}} {
+ if {[string match {*jemalloc*} [s mem_allocator]]} {
+ test "Active defrag" {
+ r config set activedefrag no
+ r config set active-defrag-threshold-lower 5
+ r config set active-defrag-ignore-bytes 2mb
+ r config set maxmemory 100mb
+ r config set maxmemory-policy allkeys-lru
+ r debug populate 700000 asdf 150
+ r debug populate 170000 asdf 300
+ set frag [s mem_fragmentation_ratio]
+ assert {$frag >= 1.7}
+ r config set activedefrag yes
+ after 1500 ;# active defrag tests the status once a second.
+ set hits [s active_defrag_hits]
+
+ # wait for the active defrag to stop working
+ set tries 0
+ while { True } {
+ incr tries
+ after 500
+ set prev_hits $hits
+ set hits [s active_defrag_hits]
+ if {$hits == $prev_hits} {
+ break
+ }
+ assert {$tries < 100}
+ }
+
+ # TODO: we need to expose more accurate fragmentation info
+ # i.e. the allocator used and active pages
+ # instead we currently look at RSS so we need to ask for purge
+ r memory purge
+
+ # Test the the fragmentation is lower and that the defragger
+ # stopped working
+ set frag [s mem_fragmentation_ratio]
+ assert {$frag < 1.55}
+ set misses [s active_defrag_misses]
+ after 500
+ set misses2 [s active_defrag_misses]
+ assert {$misses2 == $misses}
+ }
+ }
+ }
+}
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index a53f3f5c8..1d21b561a 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -52,7 +52,7 @@ start_server {tags {"other"}} {
test {SELECT an out of range DB} {
catch {r select 1000000} err
set _ $err
- } {*invalid*}
+ } {*index is out of range*}
tags {consistency} {
if {![catch {package require sha1}]} {
@@ -194,6 +194,7 @@ start_server {tags {"other"}} {
}
test {APPEND basics} {
+ r del foo
list [r append foo bar] [r get foo] \
[r append foo 100] [r get foo]
} {3 bar 6 bar100}
diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl
index 18033bdf2..9c7a43bf0 100644
--- a/tests/unit/pubsub.tcl
+++ b/tests/unit/pubsub.tcl
@@ -196,6 +196,10 @@ start_server {tags {"pubsub"}} {
$rd1 close
}
+ test "NUMSUB returns numbers, not strings (#1561)" {
+ r pubsub numsub abc def
+ } {abc 0 def 0}
+
test "Mix SUBSCRIBE and PSUBSCRIBE" {
set rd1 [redis_deferring_client]
assert_equal {1} [subscribe $rd1 {foo.bar}]
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 2b1033e39..1d84f128d 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -226,4 +226,14 @@ start_server {tags {"scan"}} {
set res [r zscan mykey 0 MATCH foo* COUNT 10000]
lsort -unique [lindex $res 1]
}
+
+ test "ZSCAN scores: regression test for issue #2175" {
+ r del mykey
+ for {set j 0} {$j < 500} {incr j} {
+ r zadd mykey 9.8813129168249309e-323 $j
+ }
+ set res [lindex [r zscan mykey 0] 1]
+ set first_score [lindex $res 1]
+ assert {$first_score != 0}
+ }
}
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 4190a0a49..be82e1559 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -62,18 +62,19 @@ start_server {tags {"scripting"}} {
} {NOSCRIPT*}
test {EVAL - Redis integer -> Lua type conversion} {
+ r set x 0
r eval {
- local foo = redis.pcall('incr','x')
+ local foo = redis.pcall('incr',KEYS[1])
return {type(foo),foo}
- } 0
+ } 1 x
} {number 1}
test {EVAL - Redis bulk -> Lua type conversion} {
r set mykey myval
r eval {
- local foo = redis.pcall('get','mykey')
+ local foo = redis.pcall('get',KEYS[1])
return {type(foo),foo}
- } 0
+ } 1 mykey
} {string myval}
test {EVAL - Redis multi bulk -> Lua type conversion} {
@@ -82,39 +83,39 @@ start_server {tags {"scripting"}} {
r rpush mylist b
r rpush mylist c
r eval {
- local foo = redis.pcall('lrange','mylist',0,-1)
+ local foo = redis.pcall('lrange',KEYS[1],0,-1)
return {type(foo),foo[1],foo[2],foo[3],# foo}
- } 0
+ } 1 mylist
} {table a b c 3}
test {EVAL - Redis status reply -> Lua type conversion} {
r eval {
- local foo = redis.pcall('set','mykey','myval')
+ local foo = redis.pcall('set',KEYS[1],'myval')
return {type(foo),foo['ok']}
- } 0
+ } 1 mykey
} {table OK}
test {EVAL - Redis error reply -> Lua type conversion} {
r set mykey myval
r eval {
- local foo = redis.pcall('incr','mykey')
+ local foo = redis.pcall('incr',KEYS[1])
return {type(foo),foo['err']}
- } 0
+ } 1 mykey
} {table {ERR value is not an integer or out of range}}
test {EVAL - Redis nil bulk reply -> Lua type conversion} {
r del mykey
r eval {
- local foo = redis.pcall('get','mykey')
+ local foo = redis.pcall('get',KEYS[1])
return {type(foo),foo == false}
- } 0
+ } 1 mykey
} {boolean 1}
test {EVAL - Is the Lua client using the currently selected DB?} {
r set mykey "this is DB 9"
r select 10
r set mykey "this is DB 10"
- r eval {return redis.pcall('get','mykey')} 0
+ r eval {return redis.pcall('get',KEYS[1])} 1 mykey
} {this is DB 10}
test {EVAL - SELECT inside Lua should not affect the caller} {
@@ -141,7 +142,7 @@ start_server {tags {"scripting"}} {
test {EVAL - Scripts can't run certain commands} {
set e {}
- catch {r eval {return redis.pcall('spop','x')} 0} e
+ catch {r eval {return redis.pcall('blpop','x',0)} 0} e
set e
} {*not allowed*}
@@ -184,6 +185,98 @@ start_server {tags {"scripting"}} {
set e
} {*against a key*}
+ test {EVAL - JSON numeric decoding} {
+ # We must return the table as a string because otherwise
+ # Redis converts floats to ints and we get 0 and 1023 instead
+ # of 0.0003 and 1023.2 as the parsed output.
+ r eval {return
+ table.concat(
+ cjson.decode(
+ "[0.0, -5e3, -1, 0.3e-3, 1023.2, 0e10]"), " ")
+ } 0
+ } {0 -5000 -1 0.0003 1023.2 0}
+
+ test {EVAL - JSON string decoding} {
+ r eval {local decoded = cjson.decode('{"keya": "a", "keyb": "b"}')
+ return {decoded.keya, decoded.keyb}
+ } 0
+ } {a b}
+
+ test {EVAL - cmsgpack can pack double?} {
+ r eval {local encoded = cmsgpack.pack(0.1)
+ local h = ""
+ for i = 1, #encoded do
+ h = h .. string.format("%02x",string.byte(encoded,i))
+ end
+ return h
+ } 0
+ } {cb3fb999999999999a}
+
+ test {EVAL - cmsgpack can pack negative int64?} {
+ r eval {local encoded = cmsgpack.pack(-1099511627776)
+ local h = ""
+ for i = 1, #encoded do
+ h = h .. string.format("%02x",string.byte(encoded,i))
+ end
+ return h
+ } 0
+ } {d3ffffff0000000000}
+
+ test {EVAL - cmsgpack can pack and unpack circular references?} {
+ r eval {local a = {x=nil,y=5}
+ local b = {x=a}
+ a['x'] = b
+ local encoded = cmsgpack.pack(a)
+ local h = ""
+ -- cmsgpack encodes to a depth of 16, but can't encode
+ -- references, so the encoded object has a deep copy recusive
+ -- depth of 16.
+ for i = 1, #encoded do
+ h = h .. string.format("%02x",string.byte(encoded,i))
+ end
+ -- when unpacked, re.x.x != re because the unpack creates
+ -- individual tables down to a depth of 16.
+ -- (that's why the encoded output is so large)
+ local re = cmsgpack.unpack(encoded)
+ assert(re)
+ assert(re.x)
+ assert(re.x.x.y == re.y)
+ assert(re.x.x.x.x.y == re.y)
+ assert(re.x.x.x.x.x.x.y == re.y)
+ assert(re.x.x.x.x.x.x.x.x.x.x.y == re.y)
+ -- maximum working depth:
+ assert(re.x.x.x.x.x.x.x.x.x.x.x.x.x.x.y == re.y)
+ -- now the last x would be b above and has no y
+ assert(re.x.x.x.x.x.x.x.x.x.x.x.x.x.x.x)
+ -- so, the final x.x is at the depth limit and was assigned nil
+ assert(re.x.x.x.x.x.x.x.x.x.x.x.x.x.x.x.x == nil)
+ return {h, re.x.x.x.x.x.x.x.x.y == re.y, re.y == 5}
+ } 0
+ } {82a17905a17881a17882a17905a17881a17882a17905a17881a17882a17905a17881a17882a17905a17881a17882a17905a17881a17882a17905a17881a17882a17905a17881a178c0 1 1}
+
+ test {EVAL - Numerical sanity check from bitop} {
+ r eval {assert(0x7fffffff == 2147483647, "broken hex literals");
+ assert(0xffffffff == -1 or 0xffffffff == 2^32-1,
+ "broken hex literals");
+ assert(tostring(-1) == "-1", "broken tostring()");
+ assert(tostring(0xffffffff) == "-1" or
+ tostring(0xffffffff) == "4294967295",
+ "broken tostring()")
+ } 0
+ } {}
+
+ test {EVAL - Verify minimal bitop functionality} {
+ r eval {assert(bit.tobit(1) == 1);
+ assert(bit.band(1) == 1);
+ assert(bit.bxor(1,2) == 3);
+ assert(bit.bor(1,2,4,8,16,32,64,128) == 255)
+ } 0
+ } {}
+
+ test {EVAL - Able to parse trailing comments} {
+ r eval {return 'hello' --trailing comment} 0
+ } {hello}
+
test {SCRIPTING FLUSH - is able to clear the scripts cache?} {
r set mykey myval
set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
@@ -237,7 +330,7 @@ start_server {tags {"scripting"}} {
test {Globals protection reading an undeclared global variable} {
catch {r eval {return a} 0} e
set e
- } {*ERR*attempted to access unexisting global*}
+ } {*ERR*attempted to access * global*}
test {Globals protection setting an undeclared global*} {
catch {r eval {a=10} 0} e
@@ -325,7 +418,7 @@ start_server {tags {"scripting"}} {
r sadd myset a b c
r mset a 1 b 2 c 3 d 4
assert {[r spop myset] ne {}}
- assert {[r spop myset] ne {}}
+ assert {[r spop myset 1] ne {}}
assert {[r spop myset] ne {}}
assert {[r mget a b c d] eq {1 2 3 4}}
assert {[r spop myset] eq {}}
@@ -358,6 +451,32 @@ start_server {tags {"scripting"}} {
return redis.call("get", "key")
} 0
} {12039611435714932082}
+
+ test {Verify negative arg count is error instead of crash (issue #1842)} {
+ catch { r eval { return "hello" } -12 } e
+ set e
+ } {ERR Number of keys can't be negative}
+
+ test {Correct handling of reused argv (issue #1939)} {
+ r eval {
+ for i = 0, 10 do
+ redis.call('SET', 'a', '1')
+ redis.call('MGET', 'a', 'b', 'c')
+ redis.call('EXPIRE', 'a', 0)
+ redis.call('GET', 'a')
+ redis.call('MGET', 'a', 'b', 'c')
+ end
+ } 0
+ }
+
+ test {Functions in the Redis namespace are able to report errors} {
+ catch {
+ r eval {
+ redis.sha1hex()
+ } 0
+ } e
+ set e
+ } {*wrong number*}
}
# Start a new server since the last test in this stanza will kill the
@@ -407,20 +526,111 @@ start_server {tags {"scripting"}} {
}
}
-start_server {tags {"scripting repl"}} {
- start_server {} {
- test {Before the slave connects we issue two EVAL commands} {
- # One with an error, but still executing a command.
- # SHA is: 67164fc43fa971f76fd1aaeeaf60c1c178d25876
- catch {
- r eval {redis.call('incr',KEYS[1]); redis.call('nonexisting')} 1 x
+foreach cmdrepl {0 1} {
+ start_server {tags {"scripting repl"}} {
+ start_server {} {
+ if {$cmdrepl == 1} {
+ set rt "(commmands replication)"
+ } else {
+ set rt "(scripts replication)"
+ r debug lua-always-replicate-commands 1
+ }
+
+ test "Before the slave connects we issue two EVAL commands $rt" {
+ # One with an error, but still executing a command.
+ # SHA is: 67164fc43fa971f76fd1aaeeaf60c1c178d25876
+ catch {
+ r eval {redis.call('incr',KEYS[1]); redis.call('nonexisting')} 1 x
+ }
+ # One command is correct:
+ # SHA is: 6f5ade10a69975e903c6d07b10ea44c6382381a5
+ r eval {return redis.call('incr',KEYS[1])} 1 x
+ } {2}
+
+ test "Connect a slave to the master instance $rt" {
+ r -1 slaveof [srv 0 host] [srv 0 port]
+ wait_for_condition 50 100 {
+ [s -1 role] eq {slave} &&
+ [string match {*master_link_status:up*} [r -1 info replication]]
+ } else {
+ fail "Can't turn the instance into a slave"
+ }
+ }
+
+ test "Now use EVALSHA against the master, with both SHAs $rt" {
+ # The server should replicate successful and unsuccessful
+ # commands as EVAL instead of EVALSHA.
+ catch {
+ r evalsha 67164fc43fa971f76fd1aaeeaf60c1c178d25876 1 x
+ }
+ r evalsha 6f5ade10a69975e903c6d07b10ea44c6382381a5 1 x
+ } {4}
+
+ test "If EVALSHA was replicated as EVAL, 'x' should be '4' $rt" {
+ wait_for_condition 50 100 {
+ [r -1 get x] eq {4}
+ } else {
+ fail "Expected 4 in x, but value is '[r -1 get x]'"
+ }
}
- # One command is correct:
- # SHA is: 6f5ade10a69975e903c6d07b10ea44c6382381a5
- r eval {return redis.call('incr',KEYS[1])} 1 x
- } {2}
- test {Connect a slave to the main instance} {
+ test "Replication of script multiple pushes to list with BLPOP $rt" {
+ set rd [redis_deferring_client]
+ $rd brpop a 0
+ r eval {
+ redis.call("lpush",KEYS[1],"1");
+ redis.call("lpush",KEYS[1],"2");
+ } 1 a
+ set res [$rd read]
+ $rd close
+ wait_for_condition 50 100 {
+ [r -1 lrange a 0 -1] eq [r lrange a 0 -1]
+ } else {
+ fail "Expected list 'a' in slave and master to be the same, but they are respectively '[r -1 lrange a 0 -1]' and '[r lrange a 0 -1]'"
+ }
+ set res
+ } {a 1}
+
+ test "EVALSHA replication when first call is readonly $rt" {
+ r del x
+ r eval {if tonumber(ARGV[1]) > 0 then redis.call('incr', KEYS[1]) end} 1 x 0
+ r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 0
+ r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 1
+ wait_for_condition 50 100 {
+ [r -1 get x] eq {1}
+ } else {
+ fail "Expected 1 in x, but value is '[r -1 get x]'"
+ }
+ }
+
+ test "Lua scripts using SELECT are replicated correctly $rt" {
+ r eval {
+ redis.call("set","foo1","bar1")
+ redis.call("select","10")
+ redis.call("incr","x")
+ redis.call("select","11")
+ redis.call("incr","z")
+ } 0
+ r eval {
+ redis.call("set","foo1","bar1")
+ redis.call("select","10")
+ redis.call("incr","x")
+ redis.call("select","11")
+ redis.call("incr","z")
+ } 0
+ wait_for_condition 50 100 {
+ [r -1 debug digest] eq [r debug digest]
+ } else {
+ fail "Master-Slave desync after Lua script using SELECT."
+ }
+ }
+ }
+ }
+}
+
+start_server {tags {"scripting repl"}} {
+ start_server {overrides {appendonly yes}} {
+ test "Connect a slave to the master instance" {
r -1 slaveof [srv 0 host] [srv 0 port]
wait_for_condition 50 100 {
[s -1 role] eq {slave} &&
@@ -430,72 +640,96 @@ start_server {tags {"scripting repl"}} {
}
}
- test {Now use EVALSHA against the master, with both SHAs} {
- # The server should replicate successful and unsuccessful
- # commands as EVAL instead of EVALSHA.
+ test "Redis.replicate_commands() must be issued before any write" {
+ r eval {
+ redis.call('set','foo','bar');
+ return redis.replicate_commands();
+ } 0
+ } {}
+
+ test "Redis.replicate_commands() must be issued before any write (2)" {
+ r eval {
+ return redis.replicate_commands();
+ } 0
+ } {1}
+
+ test "Redis.set_repl() must be issued after replicate_commands()" {
catch {
- r evalsha 67164fc43fa971f76fd1aaeeaf60c1c178d25876 1 x
- }
- r evalsha 6f5ade10a69975e903c6d07b10ea44c6382381a5 1 x
- } {4}
+ r eval {
+ redis.set_repl(redis.REPL_ALL);
+ } 0
+ } e
+ set e
+ } {*only after turning on*}
- test {If EVALSHA was replicated as EVAL, 'x' should be '4'} {
- wait_for_condition 50 100 {
- [r -1 get x] eq {4}
- } else {
- fail "Expected 4 in x, but value is '[r -1 get x]'"
- }
- }
+ test "Redis.set_repl() don't accept invalid values" {
+ catch {
+ r eval {
+ redis.replicate_commands();
+ redis.set_repl(12345);
+ } 0
+ } e
+ set e
+ } {*Invalid*flags*}
- test {Replication of script multiple pushes to list with BLPOP} {
- set rd [redis_deferring_client]
- $rd brpop a 0
+ test "Test selective replication of certain Redis commands from Lua" {
+ r del a b c d
r eval {
- redis.call("lpush",KEYS[1],"1");
- redis.call("lpush",KEYS[1],"2");
- } 1 a
- set res [$rd read]
- $rd close
- wait_for_condition 50 100 {
- [r -1 lrange a 0 -1] eq [r lrange a 0 -1]
- } else {
- fail "Expected list 'a' in slave and master to be the same, but they are respectively '[r -1 lrange a 0 -1]' and '[r lrange a 0 -1]'"
- }
- set res
- } {a 1}
-
- test {EVALSHA replication when first call is readonly} {
- r del x
- r eval {if tonumber(ARGV[1]) > 0 then redis.call('incr', KEYS[1]) end} 1 x 0
- r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 0
- r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 1
+ redis.replicate_commands();
+ redis.call('set','a','1');
+ redis.set_repl(redis.REPL_NONE);
+ redis.call('set','b','2');
+ redis.set_repl(redis.REPL_AOF);
+ redis.call('set','c','3');
+ redis.set_repl(redis.REPL_ALL);
+ redis.call('set','d','4');
+ } 0
+
wait_for_condition 50 100 {
- [r -1 get x] eq {1}
+ [r -1 mget a b c d] eq {1 {} {} 4}
} else {
- fail "Expected 1 in x, but value is '[r -1 get x]'"
+ fail "Only a and c should be replicated to slave"
}
+
+ # Master should have everything right now
+ assert {[r mget a b c d] eq {1 2 3 4}}
+
+ # After an AOF reload only a, c and d should exist
+ r debug loadaof
+
+ assert {[r mget a b c d] eq {1 {} 3 4}}
}
- test {Lua scripts using SELECT are replicated correctly} {
- r eval {
- redis.call("set","foo1","bar1")
- redis.call("select","10")
- redis.call("incr","x")
- redis.call("select","11")
- redis.call("incr","z")
- } 0
+ test "PRNG is seeded randomly for command replication" {
+ set a [
+ r eval {
+ redis.replicate_commands();
+ return math.random()*100000;
+ } 0
+ ]
+ set b [
+ r eval {
+ redis.replicate_commands();
+ return math.random()*100000;
+ } 0
+ ]
+ assert {$a ne $b}
+ }
+
+ test "Using side effects is not a problem with command replication" {
r eval {
- redis.call("set","foo1","bar1")
- redis.call("select","10")
- redis.call("incr","x")
- redis.call("select","11")
- redis.call("incr","z")
+ redis.replicate_commands();
+ redis.call('set','time',redis.call('time')[1])
} 0
+
+ assert {[r get time] ne {}}
+
wait_for_condition 50 100 {
- [r -1 debug digest] eq [r debug digest]
+ [r get time] eq [r -1 get time]
} else {
- fail "Master-Slave desync after Lua script using SELECT."
+ fail "Time key does not match between master and slave"
}
}
}
}
+
diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl
index b25b91e2c..fce02498b 100644
--- a/tests/unit/slowlog.tcl
+++ b/tests/unit/slowlog.tcl
@@ -31,12 +31,14 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
} {0}
test {SLOWLOG - logged entry sanity check} {
+ r client setname foobar
r debug sleep 0.2
set e [lindex [r slowlog get] 0]
- assert_equal [llength $e] 4
+ assert_equal [llength $e] 6
assert_equal [lindex $e 0] 105
assert_equal [expr {[lindex $e 2] > 100000}] 1
assert_equal [lindex $e 3] {debug sleep 0.2}
+ assert_equal {foobar} [lindex $e 5]
}
test {SLOWLOG - commands with too many arguments are trimmed} {
@@ -67,4 +69,13 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
set e [lindex [r slowlog get] 0]
assert_equal [lindex $e 3] {debug sleep 0.2}
}
+
+ test {SLOWLOG - can clean older entires} {
+ r client setname lastentry_client
+ r config set slowlog-max-len 1
+ r debug sleep 0.2
+ assert {[llength [r slowlog get]] == 1}
+ set e [lindex [r slowlog get] 0]
+ assert_equal {lastentry_client} [lindex $e 5]
+ }
}
diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl
index f48f88b5d..083c4540d 100644
--- a/tests/unit/sort.tcl
+++ b/tests/unit/sort.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"sort"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 32
+ "list-max-ziplist-size" 32
"set-max-intset-entries" 32
}
} {
@@ -36,9 +35,9 @@ start_server {
}
foreach {num cmd enc title} {
- 16 lpush ziplist "Ziplist"
- 1000 lpush linkedlist "Linked list"
- 10000 lpush linkedlist "Big Linked list"
+ 16 lpush quicklist "Old Ziplist"
+ 1000 lpush quicklist "Old Linked list"
+ 10000 lpush quicklist "Old Big Linked list"
16 sadd intset "Intset"
1000 sadd hashtable "Hash table"
10000 sadd hashtable "Big Hash table"
@@ -85,16 +84,24 @@ start_server {
r sort tosort BY weight_* store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
test "SORT BY hash field STORE" {
r sort tosort BY wobj_*->weight store sort-res
assert_equal $result [r lrange sort-res 0 -1]
assert_equal 16 [r llen sort-res]
- assert_encoding ziplist sort-res
+ assert_encoding quicklist sort-res
}
+ test "SORT extracts STORE correctly" {
+ r command getkeys sort abc store def
+ } {abc def}
+
+ test "SORT extracts multiple STORE correctly" {
+ r command getkeys sort abc store invalid store stillbad store def
+ } {abc def}
+
test "SORT DESC" {
assert_equal [lsort -decreasing -integer $result] [r sort tosort DESC]
}
@@ -179,7 +186,7 @@ start_server {
assert_equal [lsort -real $floats] [r sort mylist]
}
- test "SORT with STORE returns zero if result is empty (github isse 224)" {
+ test "SORT with STORE returns zero if result is empty (github issue 224)" {
r flushdb
r sort foo store bar
} {0}
@@ -238,6 +245,24 @@ start_server {
r sort mylist by num get x:*->
} {100}
+ test "SORT by nosort retains native order for lists" {
+ r del testa
+ r lpush testa 2 1 4 3 5
+ r sort testa by nosort
+ } {5 3 4 1 2}
+
+ test "SORT by nosort plus store retains native order for lists" {
+ r del testa
+ r lpush testa 2 1 4 3 5
+ r sort testa by nosort store testb
+ r lrange testb 0 -1
+ } {5 3 4 1 2}
+
+ test "SORT by nosort with limit returns based on original list order" {
+ r sort testa by nosort limit 0 3 store testb
+ r lrange testb 0 -1
+ } {5 3 4}
+
tags {"slow"} {
set num 100
set res [create_random_dataset $num lpush]
diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl
index fa52afd16..d2c679d32 100644
--- a/tests/unit/type/hash.tcl
+++ b/tests/unit/type/hash.tcl
@@ -2,8 +2,8 @@ start_server {tags {"hash"}} {
test {HSET/HLEN - Small hash creation} {
array set smallhash {}
for {set i 0} {$i < 8} {incr i} {
- set key [randstring 0 8 alpha]
- set val [randstring 0 8 alpha]
+ set key __avoid_collisions__[randstring 0 8 alpha]
+ set val __avoid_collisions__[randstring 0 8 alpha]
if {[info exists smallhash($key)]} {
incr i -1
continue
@@ -21,8 +21,8 @@ start_server {tags {"hash"}} {
test {HSET/HLEN - Big hash creation} {
array set bighash {}
for {set i 0} {$i < 1024} {incr i} {
- set key [randstring 0 8 alpha]
- set val [randstring 0 8 alpha]
+ set key __avoid_collisions__[randstring 0 8 alpha]
+ set val __avoid_collisions__[randstring 0 8 alpha]
if {[info exists bighash($key)]} {
incr i -1
continue
@@ -33,7 +33,7 @@ start_server {tags {"hash"}} {
list [r hlen bighash]
} {1024}
- test {Is the big hash encoded with a ziplist?} {
+ test {Is the big hash encoded with an hash table?} {
assert_encoding hashtable bighash
}
@@ -390,6 +390,54 @@ start_server {tags {"hash"}} {
lappend rv [string match "ERR*not*float*" $bigerr]
} {1 1}
+ test {HSTRLEN against the small hash} {
+ set err {}
+ foreach k [array names smallhash *] {
+ if {[string length $smallhash($k)] ne [r hstrlen smallhash $k]} {
+ set err "[string length $smallhash($k)] != [r hstrlen smallhash $k]"
+ break
+ }
+ }
+ set _ $err
+ } {}
+
+ test {HSTRLEN against the big hash} {
+ set err {}
+ foreach k [array names bighash *] {
+ if {[string length $bighash($k)] ne [r hstrlen bighash $k]} {
+ set err "[string length $bighash($k)] != [r hstrlen bighash $k]"
+ puts "HSTRLEN and logical length mismatch:"
+ puts "key: $k"
+ puts "Logical content: $bighash($k)"
+ puts "Server content: [r hget bighash $k]"
+ }
+ }
+ set _ $err
+ } {}
+
+ test {HSTRLEN against non existing field} {
+ set rv {}
+ lappend rv [r hstrlen smallhash __123123123__]
+ lappend rv [r hstrlen bighash __123123123__]
+ set _ $rv
+ } {0 0}
+
+ test {HSTRLEN corner cases} {
+ set vals {
+ -9223372036854775808 9223372036854775807 9223372036854775808
+ {} 0 -1 x
+ }
+ foreach v $vals {
+ r hmset smallhash field $v
+ r hmset bighash field $v
+ set len1 [string length $v]
+ set len2 [r hstrlen smallhash field]
+ set len3 [r hstrlen bighash field]
+ assert {$len1 == $len2}
+ assert {$len2 == $len3}
+ }
+ }
+
test {Hash ziplist regression test for large keys} {
r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk a
r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk b
@@ -467,4 +515,22 @@ start_server {tags {"hash"}} {
assert {[r object encoding myhash] eq {hashtable}}
}
}
+
+ # The following test can only be executed if we don't use Valgrind, and if
+ # we are using x86_64 architecture, because:
+ #
+ # 1) Valgrind has floating point limitations, no support for 80 bits math.
+ # 2) Other archs may have the same limits.
+ #
+ # 1.23 cannot be represented correctly with 64 bit doubles, so we skip
+ # the test, since we are only testing pretty printing here and is not
+ # a bug if the program outputs things like 1.299999...
+ if {!$::valgrind && [string match *x86_64* [exec uname -a]]} {
+ test {Test HINCRBYFLOAT for correct float representation (issue #2846)} {
+ r del myhash
+ assert {[r hincrbyfloat myhash float 1.23] eq {1.23}}
+ assert {[r hincrbyfloat myhash float 0.77] eq {2}}
+ assert {[r hincrbyfloat myhash float -0.1] eq {1.9}}
+ }
+ }
}
diff --git a/tests/unit/type/incr.tcl b/tests/unit/type/incr.tcl
new file mode 100644
index 000000000..2287aaae2
--- /dev/null
+++ b/tests/unit/type/incr.tcl
@@ -0,0 +1,147 @@
+start_server {tags {"incr"}} {
+ test {INCR against non existing key} {
+ set res {}
+ append res [r incr novar]
+ append res [r get novar]
+ } {11}
+
+ test {INCR against key created by incr itself} {
+ r incr novar
+ } {2}
+
+ test {INCR against key originally set with SET} {
+ r set novar 100
+ r incr novar
+ } {101}
+
+ test {INCR over 32bit value} {
+ r set novar 17179869184
+ r incr novar
+ } {17179869185}
+
+ test {INCRBY over 32bit value with over 32bit increment} {
+ r set novar 17179869184
+ r incrby novar 17179869184
+ } {34359738368}
+
+ test {INCR fails against key with spaces (left)} {
+ r set novar " 11"
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against key with spaces (right)} {
+ r set novar "11 "
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against key with spaces (both)} {
+ r set novar " 11 "
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against a key holding a list} {
+ r rpush mylist 1
+ catch {r incr mylist} err
+ r rpop mylist
+ format $err
+ } {WRONGTYPE*}
+
+ test {DECRBY over 32bit value with over 32bit increment, negative res} {
+ r set novar 17179869184
+ r decrby novar 17179869185
+ } {-1}
+
+ test {INCR uses shared objects in the 0-9999 range} {
+ r set foo -1
+ r incr foo
+ assert {[r object refcount foo] > 1}
+ r set foo 9998
+ r incr foo
+ assert {[r object refcount foo] > 1}
+ r incr foo
+ assert {[r object refcount foo] == 1}
+ }
+
+ test {INCR can modify objects in-place} {
+ r set foo 20000
+ r incr foo
+ assert {[r object refcount foo] == 1}
+ set old [lindex [split [r debug object foo]] 1]
+ r incr foo
+ set new [lindex [split [r debug object foo]] 1]
+ assert {[string range $old 0 2] eq "at:"}
+ assert {[string range $new 0 2] eq "at:"}
+ assert {$old eq $new}
+ }
+
+ test {INCRBYFLOAT against non existing key} {
+ r del novar
+ list [roundFloat [r incrbyfloat novar 1]] \
+ [roundFloat [r get novar]] \
+ [roundFloat [r incrbyfloat novar 0.25]] \
+ [roundFloat [r get novar]]
+ } {1 1 1.25 1.25}
+
+ test {INCRBYFLOAT against key originally set with SET} {
+ r set novar 1.5
+ roundFloat [r incrbyfloat novar 1.5]
+ } {3}
+
+ test {INCRBYFLOAT over 32bit value} {
+ r set novar 17179869184
+ r incrbyfloat novar 1.5
+ } {17179869185.5}
+
+ test {INCRBYFLOAT over 32bit value with over 32bit increment} {
+ r set novar 17179869184
+ r incrbyfloat novar 17179869184
+ } {34359738368}
+
+ test {INCRBYFLOAT fails against key with spaces (left)} {
+ set err {}
+ r set novar " 11"
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against key with spaces (right)} {
+ set err {}
+ r set novar "11 "
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against key with spaces (both)} {
+ set err {}
+ r set novar " 11 "
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against a key holding a list} {
+ r del mylist
+ set err {}
+ r rpush mylist 1
+ catch {r incrbyfloat mylist 1.0} err
+ r del mylist
+ format $err
+ } {WRONGTYPE*}
+
+ test {INCRBYFLOAT does not allow NaN or Infinity} {
+ r set foo 0
+ set err {}
+ catch {r incrbyfloat foo +inf} err
+ set err
+ # p.s. no way I can force NaN to test it from the API because
+ # there is no way to increment / decrement by infinity nor to
+ # perform divisions.
+ } {ERR*would produce*}
+
+ test {INCRBYFLOAT decrement} {
+ r set foo 1
+ roundFloat [r incrbyfloat foo -1.1]
+ } {-0.1}
+}
diff --git a/tests/unit/type/list-2.tcl b/tests/unit/type/list-2.tcl
index bf6a055eb..4c7d6d91c 100644
--- a/tests/unit/type/list-2.tcl
+++ b/tests/unit/type/list-2.tcl
@@ -1,8 +1,7 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 4
}
} {
source "tests/unit/type/list-common.tcl"
@@ -28,14 +27,18 @@ start_server {
for {set i 0} {$i < 1000} {incr i} {
set min [expr {int(rand()*$startlen)}]
set max [expr {$min+int(rand()*$startlen)}]
+ set before_len [llength $mylist]
+ set before_len_r [r llen mylist]
set mylist [lrange $mylist $min $max]
r ltrim mylist $min $max
- assert_equal $mylist [r lrange mylist 0 -1]
+ assert_equal $mylist [r lrange mylist 0 -1] "failed trim"
+ set starting [r llen mylist]
for {set j [r llen mylist]} {$j < $startlen} {incr j} {
set str [randomInt 9223372036854775807]
r rpush mylist $str
lappend mylist $str
+ assert_equal $mylist [r lrange mylist 0 -1] "failed append match"
}
}
}
diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl
index 94f9a0b79..b5bd48cb0 100644
--- a/tests/unit/type/list-3.tcl
+++ b/tests/unit/type/list-3.tcl
@@ -1,8 +1,7 @@
start_server {
tags {list ziplist}
overrides {
- "list-max-ziplist-value" 200000
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 16
}
} {
test {Explicit regression for a list bug} {
@@ -14,6 +13,50 @@ start_server {
assert_equal [r lindex l 1] [lindex $mylist 1]
}
+ test {Regression for quicklist #3343 bug} {
+ r del mylist
+ r lpush mylist 401
+ r lpush mylist 392
+ r rpush mylist [string repeat x 5105]"799"
+ r lset mylist -1 [string repeat x 1014]"702"
+ r lpop mylist
+ r lset mylist -1 [string repeat x 4149]"852"
+ r linsert mylist before 401 [string repeat x 9927]"12"
+ r lrange mylist 0 -1
+ r ping ; # It's enough if the server is still alive
+ } {PONG}
+
+ test {Stress tester for #3343-alike bugs} {
+ r del key
+ for {set j 0} {$j < 10000} {incr j} {
+ set op [randomInt 6]
+ set small_signed_count [expr 5-[randomInt 10]]
+ if {[randomInt 2] == 0} {
+ set ele [randomInt 1000]
+ } else {
+ set ele [string repeat x [randomInt 10000]][randomInt 1000]
+ }
+ switch $op {
+ 0 {r lpush key $ele}
+ 1 {r rpush key $ele}
+ 2 {r lpop key}
+ 3 {r rpop key}
+ 4 {
+ catch {r lset key $small_signed_count $ele}
+ }
+ 5 {
+ set otherele [randomInt 1000]
+ if {[randomInt 2] == 0} {
+ set where before
+ } else {
+ set where after
+ }
+ r linsert key $where $otherele $ele
+ }
+ }
+ }
+ }
+
tags {slow} {
test {ziplist implementation: value encoding and backlink} {
if {$::accurate} {set iterations 100} else {set iterations 10}
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index c8e26602b..1557082a2 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1,25 +1,24 @@
start_server {
tags {"list"}
overrides {
- "list-max-ziplist-value" 16
- "list-max-ziplist-entries" 256
+ "list-max-ziplist-size" 5
}
} {
source "tests/unit/type/list-common.tcl"
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - ziplist} {
# first lpush then rpush
- assert_equal 1 [r lpush myziplist1 a]
- assert_equal 2 [r rpush myziplist1 b]
- assert_equal 3 [r rpush myziplist1 c]
+ assert_equal 1 [r lpush myziplist1 aa]
+ assert_equal 2 [r rpush myziplist1 bb]
+ assert_equal 3 [r rpush myziplist1 cc]
assert_equal 3 [r llen myziplist1]
- assert_equal a [r lindex myziplist1 0]
- assert_equal b [r lindex myziplist1 1]
- assert_equal c [r lindex myziplist1 2]
+ assert_equal aa [r lindex myziplist1 0]
+ assert_equal bb [r lindex myziplist1 1]
+ assert_equal cc [r lindex myziplist1 2]
assert_equal {} [r lindex myziplist2 3]
- assert_equal c [r rpop myziplist1]
- assert_equal a [r lpop myziplist1]
- assert_encoding ziplist myziplist1
+ assert_equal cc [r rpop myziplist1]
+ assert_equal aa [r lpop myziplist1]
+ assert_encoding quicklist myziplist1
# first rpush then lpush
assert_equal 1 [r rpush myziplist2 a]
@@ -32,13 +31,13 @@ start_server {
assert_equal {} [r lindex myziplist2 3]
assert_equal a [r rpop myziplist2]
assert_equal c [r lpop myziplist2]
- assert_encoding ziplist myziplist2
+ assert_encoding quicklist myziplist2
}
test {LPUSH, RPUSH, LLENGTH, LINDEX, LPOP - regular list} {
# first lpush then rpush
assert_equal 1 [r lpush mylist1 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist1
+ assert_encoding quicklist mylist1
assert_equal 2 [r rpush mylist1 b]
assert_equal 3 [r rpush mylist1 c]
assert_equal 3 [r llen mylist1]
@@ -51,7 +50,7 @@ start_server {
# first rpush then lpush
assert_equal 1 [r rpush mylist2 $largevalue(linkedlist)]
- assert_encoding linkedlist mylist2
+ assert_encoding quicklist mylist2
assert_equal 2 [r lpush mylist2 b]
assert_equal 3 [r lpush mylist2 c]
assert_equal 3 [r llen mylist2]
@@ -74,34 +73,22 @@ start_server {
assert_equal {d c b a 0 1 2 3} [r lrange mylist 0 -1]
}
- test {DEL a list - ziplist} {
- assert_equal 1 [r del myziplist2]
- assert_equal 0 [r exists myziplist2]
- assert_equal 0 [r llen myziplist2]
- }
-
- test {DEL a list - regular list} {
+ test {DEL a list} {
assert_equal 1 [r del mylist2]
assert_equal 0 [r exists mylist2]
assert_equal 0 [r llen mylist2]
}
- proc create_ziplist {key entries} {
- r del $key
- foreach entry $entries { r rpush $key $entry }
- assert_encoding ziplist $key
- }
-
- proc create_linkedlist {key entries} {
+ proc create_list {key entries} {
r del $key
foreach entry $entries { r rpush $key $entry }
- assert_encoding linkedlist $key
+ assert_encoding quicklist $key
}
foreach {type large} [array get largevalue] {
test "BLPOP, BRPOP: single existing list - $type" {
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd blpop blist 1
assert_equal {blist a} [$rd read]
@@ -116,8 +103,8 @@ start_server {
test "BLPOP, BRPOP: multiple existing lists - $type" {
set rd [redis_deferring_client]
- create_$type blist1 "a $large c"
- create_$type blist2 "d $large f"
+ create_list blist1 "a $large c"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist1 a} [$rd read]
@@ -137,7 +124,7 @@ start_server {
test "BLPOP, BRPOP: second list has an entry - $type" {
set rd [redis_deferring_client]
r del blist1
- create_$type blist2 "d $large f"
+ create_list blist2 "d $large f"
$rd blpop blist1 blist2 1
assert_equal {blist2 d} [$rd read]
@@ -151,7 +138,7 @@ start_server {
r del target
set rd [redis_deferring_client]
- create_$type blist "a b $large c d"
+ create_list blist "a b $large c d"
$rd brpoplpush blist target 1
assert_equal d [$rd read]
@@ -517,28 +504,30 @@ start_server {
foreach {type large} [array get largevalue] {
test "LPUSHX, RPUSHX - $type" {
- create_$type xlist "$large c"
+ create_list xlist "$large c"
assert_equal 3 [r rpushx xlist d]
assert_equal 4 [r lpushx xlist a]
- assert_equal "a $large c d" [r lrange xlist 0 -1]
+ assert_equal 6 [r rpushx xlist 42 x]
+ assert_equal 9 [r lpushx xlist y3 y2 y1]
+ assert_equal "y1 y2 y3 a $large c d 42 x" [r lrange xlist 0 -1]
}
test "LINSERT - $type" {
- create_$type xlist "a $large c d"
- assert_equal 5 [r linsert xlist before c zz]
- assert_equal "a $large zz c d" [r lrange xlist 0 10]
- assert_equal 6 [r linsert xlist after c yy]
- assert_equal "a $large zz c yy d" [r lrange xlist 0 10]
- assert_equal 7 [r linsert xlist after d dd]
- assert_equal -1 [r linsert xlist after bad ddd]
- assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10]
- assert_equal 8 [r linsert xlist before a aa]
- assert_equal -1 [r linsert xlist before bad aaa]
- assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10]
+ create_list xlist "a $large c d"
+ assert_equal 5 [r linsert xlist before c zz] "before c"
+ assert_equal "a $large zz c d" [r lrange xlist 0 10] "lrangeA"
+ assert_equal 6 [r linsert xlist after c yy] "after c"
+ assert_equal "a $large zz c yy d" [r lrange xlist 0 10] "lrangeB"
+ assert_equal 7 [r linsert xlist after d dd] "after d"
+ assert_equal -1 [r linsert xlist after bad ddd] "after bad"
+ assert_equal "a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeC"
+ assert_equal 8 [r linsert xlist before a aa] "before a"
+ assert_equal -1 [r linsert xlist before bad aaa] "before bad"
+ assert_equal "aa a $large zz c yy d dd" [r lrange xlist 0 10] "lrangeD"
# check inserting integer encoded value
- assert_equal 9 [r linsert xlist before aa 42]
- assert_equal 42 [r lrange xlist 0 0]
+ assert_equal 9 [r linsert xlist before aa 42] "before aa"
+ assert_equal 42 [r lrange xlist 0 0] "lrangeE"
}
}
@@ -547,55 +536,7 @@ start_server {
set e
} {*ERR*syntax*error*}
- test {LPUSHX, RPUSHX convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is pushed
- create_ziplist xlist a
- assert_equal 2 [r rpushx xlist $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r lpushx xlist $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r rpushx xlist b]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r lpushx xlist b]
- assert_encoding linkedlist xlist
- }
-
- test {LINSERT convert from ziplist to list} {
- set large $largevalue(linkedlist)
-
- # convert when a large value is inserted
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist before a $large]
- assert_encoding linkedlist xlist
- create_ziplist xlist a
- assert_equal 2 [r linsert xlist after a $large]
- assert_encoding linkedlist xlist
-
- # convert when the length threshold is exceeded
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist before a a]
- assert_encoding linkedlist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal 257 [r linsert xlist after a a]
- assert_encoding linkedlist xlist
-
- # don't convert when the value could not be inserted
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist before foo a]
- assert_encoding ziplist xlist
- create_ziplist xlist [lrepeat 256 a]
- assert_equal -1 [r linsert xlist after foo a]
- assert_encoding ziplist xlist
- }
-
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
proc check_numbered_list_consistency {key} {
set len [r llen $key]
for {set i 0} {$i < $len} {incr i} {
@@ -664,16 +605,16 @@ start_server {
foreach {type large} [array get largevalue] {
test "RPOPLPUSH base case - $type" {
r del mylist1 mylist2
- create_$type mylist1 "a $large c d"
+ create_list mylist1 "a $large c d"
assert_equal d [r rpoplpush mylist1 mylist2]
assert_equal c [r rpoplpush mylist1 mylist2]
assert_equal "a $large" [r lrange mylist1 0 -1]
assert_equal "c d" [r lrange mylist2 0 -1]
- assert_encoding ziplist mylist2
+ assert_encoding quicklist mylist2
}
test "RPOPLPUSH with the same list as src and dst - $type" {
- create_$type mylist "a $large c"
+ create_list mylist "a $large c"
assert_equal "a $large c" [r lrange mylist 0 -1]
assert_equal c [r rpoplpush mylist mylist]
assert_equal "c a $large" [r lrange mylist 0 -1]
@@ -681,8 +622,8 @@ start_server {
foreach {othertype otherlarge} [array get largevalue] {
test "RPOPLPUSH with $type source and existing target $othertype" {
- create_$type srclist "a b c $large"
- create_$othertype dstlist "$otherlarge"
+ create_list srclist "a b c $large"
+ create_list dstlist "$otherlarge"
assert_equal $large [r rpoplpush srclist dstlist]
assert_equal c [r rpoplpush srclist dstlist]
assert_equal "a b" [r lrange srclist 0 -1]
@@ -691,7 +632,7 @@ start_server {
# When we rpoplpush'ed a large value, dstlist should be
# converted to the same encoding as srclist.
if {$type eq "linkedlist"} {
- assert_encoding linkedlist dstlist
+ assert_encoding quicklist dstlist
}
}
}
@@ -713,7 +654,7 @@ start_server {
}
test {RPOPLPUSH against non list dst key} {
- create_ziplist srclist {a b c d}
+ create_list srclist {a b c d}
r set dstlist x
assert_error WRONGTYPE* {r rpoplpush srclist dstlist}
assert_type string dstlist
@@ -727,7 +668,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "Basic LPOP/RPOP - $type" {
- create_$type mylist "$large 1 2"
+ create_list mylist "$large 1 2"
assert_equal $large [r lpop mylist]
assert_equal 2 [r rpop mylist]
assert_equal 1 [r lpop mylist]
@@ -745,7 +686,7 @@ start_server {
assert_error WRONGTYPE* {r rpop notalist}
}
- foreach {type num} {ziplist 250 linkedlist 500} {
+ foreach {type num} {quicklist 250 quicklist 500} {
test "Mass RPOP/LPOP - $type" {
r del mylist
set sum1 0
@@ -765,24 +706,24 @@ start_server {
foreach {type large} [array get largevalue] {
test "LRANGE basics - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {1 2 3 4 5 6 7 8} [r lrange mylist 1 -2]
assert_equal {7 8 9} [r lrange mylist -3 -1]
assert_equal {4} [r lrange mylist 4 4]
}
test "LRANGE inverted indexes - $type" {
- create_$type mylist "$large 1 2 3 4 5 6 7 8 9"
+ create_list mylist "$large 1 2 3 4 5 6 7 8 9"
assert_equal {} [r lrange mylist 6 2]
}
test "LRANGE out of range indexes including the full list - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal "$large 1 2 3" [r lrange mylist -1000 1000]
}
test "LRANGE out of range negative end index - $type" {
- create_$type mylist "$large 1 2 3"
+ create_list mylist "$large 1 2 3"
assert_equal $large [r lrange mylist 0 -4]
assert_equal {} [r lrange mylist 0 -5]
}
@@ -796,7 +737,7 @@ start_server {
proc trim_list {type min max} {
upvar 1 large large
r del mylist
- create_$type mylist "1 2 3 4 $large"
+ create_list mylist "1 2 3 4 $large"
r ltrim mylist $min $max
r lrange mylist 0 -1
}
@@ -825,7 +766,7 @@ start_server {
foreach {type large} [array get largevalue] {
test "LSET - $type" {
- create_$type mylist "99 98 $large 96 95"
+ create_list mylist "99 98 $large 96 95"
r lset mylist 1 foo
r lset mylist -1 bar
assert_equal "99 foo $large 96 bar" [r lrange mylist 0 -1]
@@ -847,7 +788,7 @@ start_server {
foreach {type e} [array get largevalue] {
test "LREM remove all the occurrences - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo"
assert_equal 2 [r lrem mylist 0 bar]
assert_equal "$e foo foobar foobared zap test foo" [r lrange mylist 0 -1]
}
@@ -863,7 +804,7 @@ start_server {
}
test "LREM starting from tail with negative count - $type" {
- create_$type mylist "$e foo bar foobar foobared zap bar test foo foo"
+ create_list mylist "$e foo bar foobar foobared zap bar test foo foo"
assert_equal 1 [r lrem mylist -1 bar]
assert_equal "$e foo bar foobar foobared zap test foo foo" [r lrange mylist 0 -1]
}
@@ -874,7 +815,7 @@ start_server {
}
test "LREM deleting objects that may be int encoded - $type" {
- create_$type myotherlist "$e 1 2 3"
+ create_list myotherlist "$e 1 2 3"
assert_equal 1 [r lrem myotherlist 1 2]
assert_equal 3 [r llen myotherlist]
}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 162de0af7..7b467f1c4 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -293,6 +293,13 @@ start_server {
assert_equal 0 [r scard myset]
}
+ test "SPOP with <count>=1 - $type" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [list [r spop myset 1] [r spop myset 1] [r spop myset 1]]]
+ assert_equal 0 [r scard myset]
+ }
+
test "SRANDMEMBER - $type" {
create_set myset $contents
unset -nocomplain myset
@@ -304,6 +311,68 @@ start_server {
}
}
+ foreach {type contents} {
+ hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+ intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+ } {
+ test "SPOP with <count>" {
+ create_set myset $contents
+ assert_encoding $type myset
+ assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
+ assert_equal 0 [r scard myset]
+ }
+ }
+
+ # As seen in intsetRandomMembers
+ test "SPOP using integers, testing Knuth's and Floyd's algorithm" {
+ create_set myset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ assert_encoding intset myset
+ assert_equal 20 [r scard myset]
+ r spop myset 1
+ assert_equal 19 [r scard myset]
+ r spop myset 2
+ assert_equal 17 [r scard myset]
+ r spop myset 3
+ assert_equal 14 [r scard myset]
+ r spop myset 10
+ assert_equal 4 [r scard myset]
+ r spop myset 10
+ assert_equal 0 [r scard myset]
+ r spop myset 1
+ assert_equal 0 [r scard myset]
+ } {}
+
+ test "SPOP using integers with Knuth's algorithm" {
+ r spop nonexisting_key 100
+ } {}
+
+ test "SPOP new implementation: code path #1" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 30]
+ assert {[lsort $content] eq [lsort $res]}
+ }
+
+ test "SPOP new implementation: code path #2" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 2]
+ assert {[llength $res] == 2}
+ assert {[r scard myset] == 18}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
+ test "SPOP new implementation: code path #3" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 18]
+ assert {[llength $res] == 18}
+ assert {[r scard myset] == 2}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
test "SRANDMEMBER with <count> against non existing key" {
r srandmember nonexisting_key 100
} {}
@@ -450,6 +519,7 @@ start_server {
test "SMOVE non existing key" {
setup_move
assert_equal 0 [r smove myset1 myset2 foo]
+ assert_equal 0 [r smove myset1 myset1 foo]
assert_equal {1 a b} [lsort [r smembers myset1]]
assert_equal {2 3 4} [lsort [r smembers myset2]]
}
diff --git a/tests/unit/basic.tcl b/tests/unit/type/string.tcl
index 6f7fe292c..7122fd987 100644
--- a/tests/unit/basic.tcl
+++ b/tests/unit/type/string.tcl
@@ -1,9 +1,4 @@
-start_server {tags {"basic"}} {
- test {DEL all keys to start with a clean DB} {
- foreach key [r keys *] {r del $key}
- r dbsize
- } {0}
-
+start_server {tags {"string"}} {
test {SET and GET an item} {
r set x foobar
r get x
@@ -14,38 +9,6 @@ start_server {tags {"basic"}} {
r get x
} {}
- test {DEL against a single item} {
- r del x
- r get x
- } {}
-
- test {Vararg DEL} {
- r set foo1 a
- r set foo2 b
- r set foo3 c
- list [r del foo1 foo2 foo3 foo4] [r mget foo1 foo2 foo3]
- } {3 {{} {} {}}}
-
- test {KEYS with pattern} {
- foreach key {key_x key_y key_z foo_a foo_b foo_c} {
- r set $key hello
- }
- lsort [r keys foo*]
- } {foo_a foo_b foo_c}
-
- test {KEYS to get all keys} {
- lsort [r keys *]
- } {foo_a foo_b foo_c key_x key_y key_z}
-
- test {DBSIZE} {
- r dbsize
- } {6}
-
- test {DEL all keys} {
- foreach key [r keys *] {r del $key}
- r dbsize
- } {0}
-
test {Very big payload in GET/SET} {
set buf [string repeat "abcd" 1000000]
r set foo $buf
@@ -75,6 +38,7 @@ start_server {tags {"basic"}} {
} {}
test {SET 10000 numeric keys and access all them in reverse order} {
+ r flushdb
set err {}
for {set x 0} {$x < 10000} {incr x} {
r set $x $x
@@ -83,141 +47,18 @@ start_server {tags {"basic"}} {
for {set x 9999} {$x >= 0} {incr x -1} {
set val [r get $x]
if {$val ne $x} {
- set err "Eleemnt at position $x is $val instead of $x"
+ set err "Element at position $x is $val instead of $x"
break
}
}
set _ $err
} {}
- test {DBSIZE should be 10101 now} {
+ test {DBSIZE should be 10000 now} {
r dbsize
- } {10101}
+ } {10000}
}
- test {INCR against non existing key} {
- set res {}
- append res [r incr novar]
- append res [r get novar]
- } {11}
-
- test {INCR against key created by incr itself} {
- r incr novar
- } {2}
-
- test {INCR against key originally set with SET} {
- r set novar 100
- r incr novar
- } {101}
-
- test {INCR over 32bit value} {
- r set novar 17179869184
- r incr novar
- } {17179869185}
-
- test {INCRBY over 32bit value with over 32bit increment} {
- r set novar 17179869184
- r incrby novar 17179869184
- } {34359738368}
-
- test {INCR fails against key with spaces (left)} {
- r set novar " 11"
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against key with spaces (right)} {
- r set novar "11 "
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against key with spaces (both)} {
- r set novar " 11 "
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against a key holding a list} {
- r rpush mylist 1
- catch {r incr mylist} err
- r rpop mylist
- format $err
- } {WRONGTYPE*}
-
- test {DECRBY over 32bit value with over 32bit increment, negative res} {
- r set novar 17179869184
- r decrby novar 17179869185
- } {-1}
-
- test {INCRBYFLOAT against non existing key} {
- r del novar
- list [roundFloat [r incrbyfloat novar 1]] \
- [roundFloat [r get novar]] \
- [roundFloat [r incrbyfloat novar 0.25]] \
- [roundFloat [r get novar]]
- } {1 1 1.25 1.25}
-
- test {INCRBYFLOAT against key originally set with SET} {
- r set novar 1.5
- roundFloat [r incrbyfloat novar 1.5]
- } {3}
-
- test {INCRBYFLOAT over 32bit value} {
- r set novar 17179869184
- r incrbyfloat novar 1.5
- } {17179869185.5}
-
- test {INCRBYFLOAT over 32bit value with over 32bit increment} {
- r set novar 17179869184
- r incrbyfloat novar 17179869184
- } {34359738368}
-
- test {INCRBYFLOAT fails against key with spaces (left)} {
- set err {}
- r set novar " 11"
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against key with spaces (right)} {
- set err {}
- r set novar "11 "
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against key with spaces (both)} {
- set err {}
- r set novar " 11 "
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against a key holding a list} {
- r del mylist
- set err {}
- r rpush mylist 1
- catch {r incrbyfloat mylist 1.0} err
- r del mylist
- format $err
- } {WRONGTYPE*}
-
- test {INCRBYFLOAT does not allow NaN or Infinity} {
- r set foo 0
- set err {}
- catch {r incrbyfloat foo +inf} err
- set err
- # p.s. no way I can force NaN to test it from the API because
- # there is no way to increment / decrement by infinity nor to
- # perform divisions.
- } {ERR*would produce*}
-
- test {INCRBYFLOAT decrement} {
- r set foo 1
- roundFloat [r incrbyfloat foo -1.1]
- } {-0.1}
-
test "SETNX target key missing" {
r del novar
assert_equal 1 [r setnx novar foobared]
@@ -261,166 +102,6 @@ start_server {tags {"basic"}} {
assert_equal 20 [r get x]
}
- test "DEL against expired key" {
- r debug set-active-expire 0
- r setex keyExpire 1 valExpire
- after 1100
- assert_equal 0 [r del keyExpire]
- r debug set-active-expire 1
- }
-
- test {EXISTS} {
- set res {}
- r set newkey test
- append res [r exists newkey]
- r del newkey
- append res [r exists newkey]
- } {10}
-
- test {Zero length value in key. SET/GET/EXISTS} {
- r set emptykey {}
- set res [r get emptykey]
- append res [r exists emptykey]
- r del emptykey
- append res [r exists emptykey]
- } {10}
-
- test {Commands pipelining} {
- set fd [r channel]
- puts -nonewline $fd "SET k1 xyzk\r\nGET k1\r\nPING\r\n"
- flush $fd
- set res {}
- append res [string match OK* [r read]]
- append res [r read]
- append res [string match PONG* [r read]]
- format $res
- } {1xyzk1}
-
- test {Non existing command} {
- catch {r foobaredcommand} err
- string match ERR* $err
- } {1}
-
- test {RENAME basic usage} {
- r set mykey hello
- r rename mykey mykey1
- r rename mykey1 mykey2
- r get mykey2
- } {hello}
-
- test {RENAME source key should no longer exist} {
- r exists mykey
- } {0}
-
- test {RENAME against already existing key} {
- r set mykey a
- r set mykey2 b
- r rename mykey2 mykey
- set res [r get mykey]
- append res [r exists mykey2]
- } {b0}
-
- test {RENAMENX basic usage} {
- r del mykey
- r del mykey2
- r set mykey foobar
- r renamenx mykey mykey2
- set res [r get mykey2]
- append res [r exists mykey]
- } {foobar0}
-
- test {RENAMENX against already existing key} {
- r set mykey foo
- r set mykey2 bar
- r renamenx mykey mykey2
- } {0}
-
- test {RENAMENX against already existing key (2)} {
- set res [r get mykey]
- append res [r get mykey2]
- } {foobar}
-
- test {RENAME against non existing source key} {
- catch {r rename nokey foobar} err
- format $err
- } {ERR*}
-
- test {RENAME where source and dest key is the same} {
- catch {r rename mykey mykey} err
- format $err
- } {ERR*}
-
- test {RENAME with volatile key, should move the TTL as well} {
- r del mykey mykey2
- r set mykey foo
- r expire mykey 100
- assert {[r ttl mykey] > 95 && [r ttl mykey] <= 100}
- r rename mykey mykey2
- assert {[r ttl mykey2] > 95 && [r ttl mykey2] <= 100}
- }
-
- test {RENAME with volatile key, should not inherit TTL of target key} {
- r del mykey mykey2
- r set mykey foo
- r set mykey2 bar
- r expire mykey2 100
- assert {[r ttl mykey] == -1 && [r ttl mykey2] > 0}
- r rename mykey mykey2
- r ttl mykey2
- } {-1}
-
- test {DEL all keys again (DB 0)} {
- foreach key [r keys *] {
- r del $key
- }
- r dbsize
- } {0}
-
- test {DEL all keys again (DB 1)} {
- r select 10
- foreach key [r keys *] {
- r del $key
- }
- set res [r dbsize]
- r select 9
- format $res
- } {0}
-
- test {MOVE basic usage} {
- r set mykey foobar
- r move mykey 10
- set res {}
- lappend res [r exists mykey]
- lappend res [r dbsize]
- r select 10
- lappend res [r get mykey]
- lappend res [r dbsize]
- r select 9
- format $res
- } [list 0 0 foobar 1]
-
- test {MOVE against key existing in the target DB} {
- r set mykey hello
- r move mykey 10
- } {0}
-
- test {SET/GET keys in different DBs} {
- r set a hello
- r set b world
- r select 10
- r set a foo
- r set b bared
- r select 9
- set res {}
- lappend res [r get a]
- lappend res [r get b]
- r select 10
- lappend res [r get a]
- lappend res [r get b]
- r select 9
- format $res
- } {hello world foo bared}
-
test {MGET} {
r flushdb
r set foo BAR
@@ -438,37 +119,8 @@ start_server {tags {"basic"}} {
r mget foo baazz bar myset
} {BAR {} FOO {}}
- test {RANDOMKEY} {
- r flushdb
- r set foo x
- r set bar y
- set foo_seen 0
- set bar_seen 0
- for {set i 0} {$i < 100} {incr i} {
- set rkey [r randomkey]
- if {$rkey eq {foo}} {
- set foo_seen 1
- }
- if {$rkey eq {bar}} {
- set bar_seen 1
- }
- }
- list $foo_seen $bar_seen
- } {1 1}
-
- test {RANDOMKEY against empty DB} {
- r flushdb
- r randomkey
- } {}
-
- test {RANDOMKEY regression 1} {
- r flushdb
- r set x 10
- r del x
- r randomkey
- } {}
-
test {GETSET (set new value)} {
+ r del foo
list [r getset foo xyz] [r get foo]
} {{} xyz}
@@ -476,7 +128,7 @@ start_server {tags {"basic"}} {
r set foo bar
list [r getset foo xyz] [r get foo]
} {bar xyz}
-
+
test {MSET base case} {
r mset x 10 y "foo bar" z "x x x x x x x\n\n\r\n"
r mget x y z
@@ -763,10 +415,8 @@ start_server {tags {"basic"}} {
assert {$ttl <= 10 && $ttl > 5}
}
- test {KEYS * two times with long key, Github issue #1208} {
- r flushdb
- r set dlskeriewrioeuwqoirueioqwrueoqwrueqw test
- r keys *
- r keys *
- } {dlskeriewrioeuwqoirueioqwrueoqwrueqw}
+ test {GETRANGE with huge ranges, Github issue #1844} {
+ r set foo bar
+ r getrange foo 0 4294967297
+ } {bar}
}
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index 238eebb9d..82f76befe 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -43,6 +43,84 @@ start_server {tags {"zset"}} {
assert_error "*not*float*" {r zadd myzset nan abc}
}
+ test "ZADD with options syntax error with incomplete pair" {
+ r del ztmp
+ catch {r zadd ztmp xx 10 x 20} err
+ set err
+ } {ERR*}
+
+ test "ZADD XX option without key - $encoding" {
+ r del ztmp
+ assert {[r zadd ztmp xx 10 x] == 0}
+ assert {[r type ztmp] eq {none}}
+ }
+
+ test "ZADD XX existing key - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 x
+ assert {[r zadd ztmp xx 20 y] == 0}
+ assert {[r zcard ztmp] == 1}
+ }
+
+ test "ZADD XX returns the number of elements actually added" {
+ r del ztmp
+ r zadd ztmp 10 x
+ set retval [r zadd ztmp 10 x 20 y 30 z]
+ assert {$retval == 2}
+ }
+
+ test "ZADD XX updates existing elements score" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp xx 5 foo 11 x 21 y 40 zap
+ assert {[r zcard ztmp] == 3}
+ assert {[r zscore ztmp x] == 11}
+ assert {[r zscore ztmp y] == 21}
+ }
+
+ test "ZADD XX and NX are not compatible" {
+ r del ztmp
+ catch {r zadd ztmp xx nx 10 x} err
+ set err
+ } {ERR*}
+
+ test "ZADD NX with non exisitng key" {
+ r del ztmp
+ r zadd ztmp nx 10 x 20 y 30 z
+ assert {[r zcard ztmp] == 3}
+ }
+
+ test "ZADD NX only add new elements without updating old ones" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp nx 11 x 21 y 100 a 200 b] == 2}
+ assert {[r zscore ztmp x] == 10}
+ assert {[r zscore ztmp y] == 20}
+ assert {[r zscore ztmp a] == 100}
+ assert {[r zscore ztmp b] == 200}
+ }
+
+ test "ZADD INCR works like ZINCRBY" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp INCR 15 x
+ assert {[r zscore ztmp x] == 25}
+ }
+
+ test "ZADD INCR works with a single score-elemenet pair" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ catch {r zadd ztmp INCR 15 x 10 y} err
+ set err
+ } {ERR*}
+
+ test "ZADD CH option changes return value to all changed elements" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp 11 x 21 y 30 z] == 0}
+ assert {[r zadd ztmp ch 12 x 22 y 30 z] == 2}
+ }
+
test "ZINCRBY calls leading to NaN result in error" {
r zincrby myzset +inf abc
assert_error "*NaN*" {r zincrby myzset -inf abc}
@@ -77,6 +155,8 @@ start_server {tags {"zset"}} {
}
test "ZCARD basics - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 a 20 b 30 c
assert_equal 3 [r zcard ztmp]
assert_equal 0 [r zcard zdoesntexist]
}
@@ -210,6 +290,12 @@ start_server {tags {"zset"}} {
assert_equal 6 [r zscore zset bar]
}
+ test "ZINCRBY return value" {
+ r del ztmp
+ set retval [r zincrby ztmp 1.0 x]
+ assert {$retval == 1.0}
+ }
+
proc create_default_zset {} {
create_zset zset {-inf a 1 b 2 c 3 d 4 e 5 f +inf g}
}
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
new file mode 100644
index 000000000..e2f5d2942
--- /dev/null
+++ b/tests/unit/wait.tcl
@@ -0,0 +1,42 @@
+start_server {tags {"wait"}} {
+start_server {} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+
+ test {Setup slave} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {WAIT should acknowledge 1 additional copy of the data} {
+ $master set foo 0
+ $master incr foo
+ $master incr foo
+ $master incr foo
+ assert {[$master wait 1 5000] == 1}
+ assert {[$slave get foo] == 3}
+ }
+
+ test {WAIT should not acknowledge 2 additional copies of the data} {
+ $master incr foo
+ assert {[$master wait 2 1000] <= 1}
+ }
+
+ test {WAIT should not acknowledge 1 additional copy if slave is blocked} {
+ exec src/redis-cli -h $slave_host -p $slave_port debug sleep 5 > /dev/null 2> /dev/null &
+ after 1000 ;# Give redis-cli the time to execute the command.
+ $master set foo 0
+ $master incr foo
+ $master incr foo
+ $master incr foo
+ assert {[$master wait 1 3000] == 0}
+ }
+}}
diff --git a/utils/cluster_fail_time.tcl b/utils/cluster_fail_time.tcl
new file mode 100644
index 000000000..87399495f
--- /dev/null
+++ b/utils/cluster_fail_time.tcl
@@ -0,0 +1,50 @@
+# This simple script is used in order to estimate the average PFAIL->FAIL
+# state switch after a failure.
+
+set ::sleep_time 10 ; # How much to sleep to trigger PFAIL.
+set ::fail_port 30016 ; # Node to put in sleep.
+set ::other_port 30001 ; # Node to use to monitor the flag switch.
+
+proc avg vector {
+ set sum 0.0
+ foreach x $vector {
+ set sum [expr {$sum+$x}]
+ }
+ expr {$sum/[llength $vector]}
+}
+
+set samples {}
+while 1 {
+ exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null &
+
+ # Wait for fail? to appear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {[string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL?"
+ set start [clock milliseconds]
+
+ # Wait for fail? to disappear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {![string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL"
+ set now [clock milliseconds]
+ set elapsed [expr {$now-$start}]
+ puts $elapsed
+ lappend samples $elapsed
+
+ puts "AVG([llength $samples]): [avg $samples]"
+
+ # Wait for the instance to be available again.
+ exec redis-cli -p $::fail_port ping
+
+ # Wait for the fail flag to be cleared.
+ after 2000
+}
diff --git a/utils/corrupt_rdb.c b/utils/corrupt_rdb.c
new file mode 100644
index 000000000..7ba9caeee
--- /dev/null
+++ b/utils/corrupt_rdb.c
@@ -0,0 +1,44 @@
+/* Trivia program to corrupt an RDB file in order to check the RDB check
+ * program behavior and effectiveness.
+ *
+ * Copyright (C) 2016 Salvatore Sanfilippo.
+ * This software is released in the 3-clause BSD license. */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+
+int main(int argc, char **argv) {
+ struct stat stat;
+ int fd, cycles;
+
+ if (argc != 3) {
+ fprintf(stderr,"Usage: <filename> <cycles>\n");
+ exit(1);
+ }
+
+ srand(time(NULL));
+ cycles = atoi(argv[2]);
+ fd = open("dump.rdb",O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+ fstat(fd,&stat);
+
+ while(cycles--) {
+ unsigned char buf[32];
+ unsigned long offset = rand()%stat.st_size;
+ int writelen = 1+rand()%31;
+ int j;
+
+ for (j = 0; j < writelen; j++) buf[j] = (char)rand();
+ lseek(fd,offset,SEEK_SET);
+ printf("Writing %d bytes at offset %lu\n", writelen, offset);
+ write(fd,buf,writelen);
+ }
+ return 0;
+}
diff --git a/utils/create-cluster/.gitignore b/utils/create-cluster/.gitignore
new file mode 100644
index 000000000..2988ee919
--- /dev/null
+++ b/utils/create-cluster/.gitignore
@@ -0,0 +1,5 @@
+config.sh
+*.rdb
+*.aof
+*.conf
+*.log
diff --git a/utils/create-cluster/README b/utils/create-cluster/README
new file mode 100644
index 000000000..f2a89839b
--- /dev/null
+++ b/utils/create-cluster/README
@@ -0,0 +1,27 @@
+Create-custer is a small script used to easily start a big number of Redis
+instances configured to run in cluster mode. Its main goal is to allow manual
+testing in a condition which is not easy to replicate with the Redis cluster
+unit tests, for example when a lot of instances are needed in order to trigger
+a given bug.
+
+The tool can also be used just to easily create a number of instances in a
+Redis Cluster in order to experiment a bit with the system.
+
+USAGE
+---
+
+To create a cluster, follow these steps:
+
+1. Edit create-cluster and change the start / end port, depending on the
+number of instances you want to create.
+2. Use "./create-cluster start" in order to run the instances.
+3. Use "./create-cluster create" in order to execute redis-trib create, so that
+an actual Redis cluster will be created.
+4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory.
+
+In order to stop a cluster:
+
+1. Use "./create-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change your mind.
+2. Use "./create-cluster clean" to remove all the AOF / log files to restart with a clean environment.
+
+Use the command "./create-cluster help" to get the full list of features.
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
new file mode 100755
index 000000000..d821683f6
--- /dev/null
+++ b/utils/create-cluster/create-cluster
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Settings
+PORT=30000
+TIMEOUT=2000
+NODES=6
+REPLICAS=1
+
+# You may want to put the above config parameters into config.sh in order to
+# override the defaults without modifying this script.
+
+if [ -a config.sh ]
+then
+ source "config.sh"
+fi
+
+# Computed vars
+ENDPORT=$((PORT+NODES))
+
+if [ "$1" == "start" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Starting $PORT"
+ ../../src/redis-server --port $PORT --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes
+ done
+ exit 0
+fi
+
+if [ "$1" == "create" ]
+then
+ HOSTS=""
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ HOSTS="$HOSTS 127.0.0.1:$PORT"
+ done
+ ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS
+ exit 0
+fi
+
+if [ "$1" == "stop" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Stopping $PORT"
+ ../../src/redis-cli -p $PORT shutdown nosave
+ done
+ exit 0
+fi
+
+if [ "$1" == "watch" ]
+then
+ PORT=$((PORT+1))
+ while [ 1 ]; do
+ clear
+ date
+ ../../src/redis-cli -p $PORT cluster nodes | head -30
+ sleep 1
+ done
+ exit 0
+fi
+
+if [ "$1" == "tail" ]
+then
+ INSTANCE=$2
+ PORT=$((PORT+INSTANCE))
+ tail -f ${PORT}.log
+ exit 0
+fi
+
+if [ "$1" == "call" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9
+ done
+ exit 0
+fi
+
+if [ "$1" == "clean" ]
+then
+ rm -rf *.log
+ rm -rf appendonly*.aof
+ rm -rf dump*.rdb
+ rm -rf nodes*.conf
+ exit 0
+fi
+
+if [ "$1" == "clean-logs" ]
+then
+ rm -rf *.log
+ exit 0
+fi
+
+echo "Usage: $0 [start|create|stop|watch|tail|clean]"
+echo "start -- Launch Redis Cluster instances."
+echo "create -- Create a cluster using redis-trib create."
+echo "stop -- Stop Redis Cluster instances."
+echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node."
+echo "tail <id> -- Run tail -f of instance at base port + ID."
+echo "clean -- Remove all instances data, logs, configs."
+echo "clean-logs -- Remove just instances logs."
diff --git a/utils/generate-command-help.rb b/utils/generate-command-help.rb
index 47fbc645c..f3dfb31b3 100755
--- a/utils/generate-command-help.rb
+++ b/utils/generate-command-help.rb
@@ -12,7 +12,9 @@ GROUPS = [
"connection",
"server",
"scripting",
- "hyperloglog"
+ "hyperloglog",
+ "cluster",
+ "geo"
].freeze
GROUPS_BY_NAME = Hash[*
@@ -50,7 +52,7 @@ def commands
require "json"
require "uri"
- url = URI.parse "https://raw.github.com/antirez/redis-doc/master/commands.json"
+ url = URI.parse "https://raw.githubusercontent.com/antirez/redis-doc/master/commands.json"
client = Net::HTTP.new url.host, url.port
client.use_ssl = true
response = client.get url.path
diff --git a/utils/graphs/commits-over-time/README.md b/utils/graphs/commits-over-time/README.md
new file mode 100644
index 000000000..b28019ecc
--- /dev/null
+++ b/utils/graphs/commits-over-time/README.md
@@ -0,0 +1,16 @@
+This Tcl script is what I used in order to generate the graph you
+can find at http://antirez.com/news/98. It's really quick & dirty, more
+a trow away program than anything else, but probably could be reused or
+modified in the future in order to visualize other similar data or an
+updated version of the same data.
+
+The usage is trivial:
+
+ ./genhtml.tcl > output.html
+
+The generated HTML is quite broken but good enough to grab a screenshot
+from the browser. Feel free to improve it if you got time / interest.
+
+Note that the code filtering the tags, and the hardcoded branch name, does
+not make the script, as it is, able to analyze a different repository.
+However the changes needed are trivial.
diff --git a/utils/graphs/commits-over-time/genhtml.tcl b/utils/graphs/commits-over-time/genhtml.tcl
new file mode 100755
index 000000000..c4b4e0989
--- /dev/null
+++ b/utils/graphs/commits-over-time/genhtml.tcl
@@ -0,0 +1,96 @@
+#!/usr/bin/env tclsh
+
+# Load commits history as "sha1 unixtime".
+set commits [exec git log unstable {--pretty="%H %at"}]
+set raw_tags [exec git tag]
+
+# Load all the tags that are about stable releases.
+foreach tag $raw_tags {
+ if {[string match v*-stable $tag]} {
+ set tag [string range $tag 1 end-7]
+ puts $tag
+ }
+ if {[regexp {^[0-9]+.[0-9]+.[0-9]+$} $tag]} {
+ lappend tags $tag
+ }
+}
+
+# For each tag, create a list of "name unixtime"
+foreach tag $tags {
+ set taginfo [exec git log $tag -n 1 "--pretty=\"$tag %at\""]
+ set taginfo [string trim $taginfo {"}]
+ lappend labels $taginfo
+}
+
+# For each commit, check the amount of code changed and create an array
+# mapping the commit to the number of lines affected.
+foreach c $commits {
+ set stat [exec git show --oneline --numstat [lindex $c 0]]
+ set linenum 0
+ set affected 0
+ foreach line [split $stat "\n"] {
+ incr linenum
+ if {$linenum == 1 || [string match *deps/* $line]} continue
+ if {[catch {llength $line} numfields]} continue
+ if {$numfields == 0} continue
+ catch {
+ incr affected [lindex $line 0]
+ incr affected [lindex $line 1]
+ }
+ }
+ set commit_to_affected([lindex $c 0]) $affected
+}
+
+set base_time [lindex [lindex $commits end] 1]
+puts [clock format $base_time]
+
+# Generate a graph made of HTML DIVs.
+puts {<html>
+<style>
+.box {
+ position:absolute;
+ width:10px;
+ height:5px;
+ border:1px black solid;
+ background-color:#44aa33;
+ opacity: 0.04;
+}
+.label {
+ position:absolute;
+ background-color:#dddddd;
+ font-family:helvetica;
+ font-size:12px;
+ padding:2px;
+ color:#666;
+ border:1px #aaa solid;
+ border-radius: 5px;
+}
+#outer {
+ position:relative;
+ width:1500;
+ height:500;
+ border:1px #aaa solid;
+}
+</style>
+<div id="outer">
+}
+foreach c $commits {
+ set sha [lindex $c 0]
+ set t [expr {([lindex $c 1]-$base_time)/(3600*24*2)}]
+ set affected [expr $commit_to_affected($sha)]
+ set left $t
+ set height [expr {log($affected)*20}]
+ puts "<div class=\"box\" style=\"left:$left; bottom:0; height:$height\"></div>"
+}
+
+set bottom -30
+foreach l $labels {
+ set name [lindex $l 0]
+ set t [expr {([lindex $l 1]-$base_time)/(3600*24*2)}]
+ set left $t
+ if {$left < 0} continue
+ incr bottom -20
+ if {$bottom == -210} {set bottom -30}
+ puts "<div class=\"label\" style=\"left:$left; bottom:$bottom\">$name</div>"
+}
+puts {</div></html>}
diff --git a/utils/hashtable/README b/utils/hashtable/README
new file mode 100644
index 000000000..e2862f012
--- /dev/null
+++ b/utils/hashtable/README
@@ -0,0 +1,13 @@
+Hash table implementation related utilities.
+
+rehashing.c
+---
+
+Visually show buckets in the two hash tables between rehashings. Also stress
+test getRandomKeys() implementation, that may actually disappear from
+Redis soon, however visualizaiton some code is reusable in new bugs
+investigation.
+
+Compile with:
+
+ cc -I ../../src/ rehashing.c ../../src/zmalloc.c ../../src/dict.c -o rehashing_test
diff --git a/utils/hashtable/rehashing.c b/utils/hashtable/rehashing.c
new file mode 100644
index 000000000..b57a9043a
--- /dev/null
+++ b/utils/hashtable/rehashing.c
@@ -0,0 +1,142 @@
+#include "redis.h"
+#include "dict.h"
+
+void _redisAssert(char *x, char *y, int l) {
+ printf("ASSERT: %s %s %d\n",x,y,l);
+ exit(1);
+}
+
+unsigned int dictKeyHash(const void *keyp) {
+ unsigned long key = (unsigned long)keyp;
+ key = dictGenHashFunction(&key,sizeof(key));
+ key += ~(key << 15);
+ key ^= (key >> 10);
+ key += (key << 3);
+ key ^= (key >> 6);
+ key += ~(key << 11);
+ key ^= (key >> 16);
+ return key;
+}
+
+int dictKeyCompare(void *privdata, const void *key1, const void *key2) {
+ unsigned long k1 = (unsigned long)key1;
+ unsigned long k2 = (unsigned long)key2;
+ return k1 == k2;
+}
+
+dictType dictTypeTest = {
+ dictKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+void showBuckets(dictht ht) {
+ if (ht.table == NULL) {
+ printf("NULL\n");
+ } else {
+ int j;
+ for (j = 0; j < ht.size; j++) {
+ printf("%c", ht.table[j] ? '1' : '0');
+ }
+ printf("\n");
+ }
+}
+
+void show(dict *d) {
+ int j;
+ if (d->rehashidx != -1) {
+ printf("rhidx: ");
+ for (j = 0; j < d->rehashidx; j++)
+ printf(".");
+ printf("|\n");
+ }
+ printf("ht[0]: ");
+ showBuckets(d->ht[0]);
+ printf("ht[1]: ");
+ showBuckets(d->ht[1]);
+ printf("\n");
+}
+
+int sortPointers(const void *a, const void *b) {
+ unsigned long la, lb;
+
+ la = (long) (*((dictEntry**)a));
+ lb = (long) (*((dictEntry**)b));
+ return la-lb;
+}
+
+void stressGetKeys(dict *d, int times, int *perfect_run, int *approx_run) {
+ int j;
+
+ dictEntry **des = zmalloc(sizeof(dictEntry*)*dictSize(d));
+ for (j = 0; j < times; j++) {
+ int requested = rand() % (dictSize(d)+1);
+ int returned = dictGetSomeKeys(d, des, requested);
+ int dup = 0;
+
+ qsort(des,returned,sizeof(dictEntry*),sortPointers);
+ if (returned > 1) {
+ int i;
+ for (i = 0; i < returned-1; i++) {
+ if (des[i] == des[i+1]) dup++;
+ }
+ }
+
+ if (requested == returned && dup == 0) {
+ (*perfect_run)++;
+ } else {
+ (*approx_run)++;
+ printf("Requested, returned, duplicated: %d %d %d\n",
+ requested, returned, dup);
+ }
+ }
+ zfree(des);
+}
+
+#define MAX1 120
+#define MAX2 1000
+int main(void) {
+ dict *d = dictCreate(&dictTypeTest,NULL);
+ unsigned long i;
+ srand(time(NULL));
+
+ for (i = 0; i < MAX1; i++) {
+ dictAdd(d,(void*)i,NULL);
+ show(d);
+ }
+ printf("Size: %d\n", (int)dictSize(d));
+
+ for (i = 0; i < MAX1; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ show(d);
+ }
+ dictRelease(d);
+
+ d = dictCreate(&dictTypeTest,NULL);
+
+ printf("Stress testing dictGetSomeKeys\n");
+ int perfect_run = 0, approx_run = 0;
+
+ for (i = 0; i < MAX2; i++) {
+ dictAdd(d,(void*)i,NULL);
+ stressGetKeys(d,100,&perfect_run,&approx_run);
+ }
+
+ for (i = 0; i < MAX2; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ stressGetKeys(d,100,&perfect_run,&approx_run);
+ }
+
+ printf("dictGetSomeKey, %d perfect runs, %d approximated runs\n",
+ perfect_run, approx_run);
+
+ dictRelease(d);
+
+ printf("TEST PASSED!\n");
+ return 0;
+}
diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb
index 745baddcf..6c7596d17 100644
--- a/utils/hyperloglog/hll-gnuplot-graph.rb
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@@ -30,7 +30,7 @@ def run_experiment(r,seed,max,step)
elements << ele
i += 1
}
- r.pfadd('hll',*elements)
+ r.pfadd('hll',elements)
approx = r.pfcount('hll')
err = approx-i
rel_err = 100.to_f*err/i
diff --git a/utils/install_server.sh b/utils/install_server.sh
index 3a5fc3147..7eb341417 100755
--- a/utils/install_server.sh
+++ b/utils/install_server.sh
@@ -25,9 +25,25 @@
#
################################################################################
#
-# Interactive service installer for redis server
-# this generates a redis config file and an /etc/init.d script, and installs them
-# this scripts should be run as root
+# Service installer for redis server, runs interactively by default.
+#
+# To run this script non-interactively (for automation/provisioning purposes),
+# feed the variables into the script. Any missing variables will be prompted!
+# Tip: Environment variables also support command substitution (see REDIS_EXECUTABLE)
+#
+# Example:
+#
+# sudo REDIS_PORT=1234 \
+# REDIS_CONFIG_FILE=/etc/redis/1234.conf \
+# REDIS_LOG_FILE=/var/log/redis_1234.log \
+# REDIS_DATA_DIR=/var/lib/redis/1234 \
+# REDIS_EXECUTABLE=`command -v redis-server` ./utils/install_server.sh
+#
+# This generates a redis config file and an /etc/init.d script, and installs them.
+#
+# /!\ This script should be run as root
+#
+################################################################################
die () {
echo "ERROR: $1. Aborting!"
@@ -42,6 +58,7 @@ SCRIPTPATH=$(dirname $SCRIPT)
#Initial defaults
_REDIS_PORT=6379
+_MANUAL_EXECUTION=false
echo "Welcome to the redis service installer"
echo "This script will help you easily set up a running redis server"
@@ -53,47 +70,61 @@ if [ "$(id -u)" -ne 0 ] ; then
exit 1
fi
-#Read the redis port
-read -p "Please select the redis port for this instance: [$_REDIS_PORT] " REDIS_PORT
if ! echo $REDIS_PORT | egrep -q '^[0-9]+$' ; then
- echo "Selecting default: $_REDIS_PORT"
- REDIS_PORT=$_REDIS_PORT
+ _MANUAL_EXECUTION=true
+ #Read the redis port
+ read -p "Please select the redis port for this instance: [$_REDIS_PORT] " REDIS_PORT
+ if ! echo $REDIS_PORT | egrep -q '^[0-9]+$' ; then
+ echo "Selecting default: $_REDIS_PORT"
+ REDIS_PORT=$_REDIS_PORT
+ fi
fi
-#read the redis config file
-_REDIS_CONFIG_FILE="/etc/redis/$REDIS_PORT.conf"
-read -p "Please select the redis config file name [$_REDIS_CONFIG_FILE] " REDIS_CONFIG_FILE
if [ -z "$REDIS_CONFIG_FILE" ] ; then
- REDIS_CONFIG_FILE=$_REDIS_CONFIG_FILE
- echo "Selected default - $REDIS_CONFIG_FILE"
+ _MANUAL_EXECUTION=true
+ #read the redis config file
+ _REDIS_CONFIG_FILE="/etc/redis/$REDIS_PORT.conf"
+ read -p "Please select the redis config file name [$_REDIS_CONFIG_FILE] " REDIS_CONFIG_FILE
+ if [ -z "$REDIS_CONFIG_FILE" ] ; then
+ REDIS_CONFIG_FILE=$_REDIS_CONFIG_FILE
+ echo "Selected default - $REDIS_CONFIG_FILE"
+ fi
fi
-#read the redis log file path
-_REDIS_LOG_FILE="/var/log/redis_$REDIS_PORT.log"
-read -p "Please select the redis log file name [$_REDIS_LOG_FILE] " REDIS_LOG_FILE
if [ -z "$REDIS_LOG_FILE" ] ; then
- REDIS_LOG_FILE=$_REDIS_LOG_FILE
- echo "Selected default - $REDIS_LOG_FILE"
+ _MANUAL_EXECUTION=true
+ #read the redis log file path
+ _REDIS_LOG_FILE="/var/log/redis_$REDIS_PORT.log"
+ read -p "Please select the redis log file name [$_REDIS_LOG_FILE] " REDIS_LOG_FILE
+ if [ -z "$REDIS_LOG_FILE" ] ; then
+ REDIS_LOG_FILE=$_REDIS_LOG_FILE
+ echo "Selected default - $REDIS_LOG_FILE"
+ fi
fi
-
-#get the redis data directory
-_REDIS_DATA_DIR="/var/lib/redis/$REDIS_PORT"
-read -p "Please select the data directory for this instance [$_REDIS_DATA_DIR] " REDIS_DATA_DIR
if [ -z "$REDIS_DATA_DIR" ] ; then
- REDIS_DATA_DIR=$_REDIS_DATA_DIR
- echo "Selected default - $REDIS_DATA_DIR"
+ _MANUAL_EXECUTION=true
+ #get the redis data directory
+ _REDIS_DATA_DIR="/var/lib/redis/$REDIS_PORT"
+ read -p "Please select the data directory for this instance [$_REDIS_DATA_DIR] " REDIS_DATA_DIR
+ if [ -z "$REDIS_DATA_DIR" ] ; then
+ REDIS_DATA_DIR=$_REDIS_DATA_DIR
+ echo "Selected default - $REDIS_DATA_DIR"
+ fi
fi
-#get the redis executable path
-_REDIS_EXECUTABLE=`command -v redis-server`
-read -p "Please select the redis executable path [$_REDIS_EXECUTABLE] " REDIS_EXECUTABLE
if [ ! -x "$REDIS_EXECUTABLE" ] ; then
- REDIS_EXECUTABLE=$_REDIS_EXECUTABLE
-
+ _MANUAL_EXECUTION=true
+ #get the redis executable path
+ _REDIS_EXECUTABLE=`command -v redis-server`
+ read -p "Please select the redis executable path [$_REDIS_EXECUTABLE] " REDIS_EXECUTABLE
if [ ! -x "$REDIS_EXECUTABLE" ] ; then
- echo "Mmmmm... it seems like you don't have a redis executable. Did you run make install yet?"
- exit 1
+ REDIS_EXECUTABLE=$_REDIS_EXECUTABLE
+
+ if [ ! -x "$REDIS_EXECUTABLE" ] ; then
+ echo "Mmmmm... it seems like you don't have a redis executable. Did you run make install yet?"
+ exit 1
+ fi
fi
fi
@@ -112,7 +143,9 @@ echo "Data dir : $REDIS_DATA_DIR"
echo "Executable : $REDIS_EXECUTABLE"
echo "Cli Executable : $CLI_EXEC"
-read -p "Is this ok? Then press ENTER to go on or Ctrl-C to abort." _UNUSED_
+if $_MANUAL_EXECUTION == true ; then
+ read -p "Is this ok? Then press ENTER to go on or Ctrl-C to abort." _UNUSED_
+fi
mkdir -p `dirname "$REDIS_CONFIG_FILE"` || die "Could not create redis config directory"
mkdir -p `dirname "$REDIS_LOG_FILE"` || die "Could not create redis log dir"
@@ -152,7 +185,7 @@ rm -f $TMP_FILE
#we hard code the configs here to avoid issues with templates containing env vars
#kinda lame but works!
REDIS_INIT_HEADER=\
-"#/bin/sh\n
+"#!/bin/sh\n
#Configurations injected by install_server below....\n\n
EXEC=$REDIS_EXECUTABLE\n
CLIEXEC=$CLI_EXEC\n
@@ -193,7 +226,7 @@ fi
# warning if init info is not available.
cat > ${TMP_FILE} <<EOT
-#/bin/sh
+#!/bin/sh
#Configurations injected by install_server below....
EXEC=$REDIS_EXECUTABLE
diff --git a/utils/lru/README b/utils/lru/README
index 288189e3e..f043b2979 100644
--- a/utils/lru/README
+++ b/utils/lru/README
@@ -3,11 +3,17 @@ Redis approximated LRU algorithm against the theoretical output of true
LRU algorithm.
In order to use the program you need to recompile Redis setting the define
-REDIS_LRU_CLOCK_RESOLUTION to 1, by editing redis.h.
+REDIS_LRU_CLOCK_RESOLUTION to 1, by editing the file server.h.
This allows to execute the program in a fast way since the 1 ms resolution
is enough for all the objects to have a different enough time stamp during
the test.
The program is executed like this:
- ruby test-lru.rb > /tmp/lru.html
+ ruby test-lru.rb /tmp/lru.html
+
+You can optionally specify a number of times to run, so that the program
+will output averages of different runs, by adding an additional argument.
+For instance in order to run the test 10 times use:
+
+ ruby test-lru.rb /tmp/lru.html 10
diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c
new file mode 100644
index 000000000..6aa5911ac
--- /dev/null
+++ b/utils/lru/lfu-simulation.c
@@ -0,0 +1,158 @@
+#include <stdio.h>
+#include <time.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+int decr_every = 1;
+int keyspace_size = 1000000;
+time_t switch_after = 30; /* Switch access pattern after N seconds. */
+
+struct entry {
+ /* Field that the LFU Redis implementation will have (we have
+ * 24 bits of total space in the object->lru field). */
+ uint8_t counter; /* Logarithmic counter. */
+ uint16_t decrtime; /* (Reduced precision) time of last decrement. */
+
+ /* Fields only useful for visualization. */
+ uint64_t hits; /* Number of real accesses. */
+ time_t ctime; /* Key creation time. */
+};
+
+#define to_16bit_minutes(x) ((x/60) & 65535)
+#define COUNTER_INIT_VAL 5
+
+/* Compute the difference in minutes between two 16 bit minutes times
+ * obtained with to_16bit_minutes(). Since they can wrap around if
+ * we detect the overflow we account for it as if the counter wrapped
+ * a single time. */
+uint16_t minutes_diff(uint16_t now, uint16_t prev) {
+ if (now >= prev) return now-prev;
+ return 65535-prev+now;
+}
+
+/* Increment a couter logaritmically: the greatest is its value, the
+ * less likely is that the counter is really incremented.
+ * The maximum value of the counter is saturated at 255. */
+uint8_t log_incr(uint8_t counter) {
+ if (counter == 255) return counter;
+ double r = (double)rand()/RAND_MAX;
+ double baseval = counter-COUNTER_INIT_VAL;
+ if (baseval < 0) baseval = 0;
+ double limit = 1.0/(baseval*10+1);
+ if (r < limit) counter++;
+ return counter;
+}
+
+/* Simulate an access to an entry. */
+void access_entry(struct entry *e) {
+ e->counter = log_incr(e->counter);
+ e->hits++;
+}
+
+/* Return the entry LFU value and as a side effect decrement the
+ * entry value if the decrement time was reached. */
+uint8_t scan_entry(struct entry *e) {
+ if (minutes_diff(to_16bit_minutes(time(NULL)),e->decrtime)
+ >= decr_every)
+ {
+ if (e->counter) {
+ if (e->counter > COUNTER_INIT_VAL*2) {
+ e->counter /= 2;
+ } else {
+ e->counter--;
+ }
+ }
+ e->decrtime = to_16bit_minutes(time(NULL));
+ }
+ return e->counter;
+}
+
+/* Print the entry info. */
+void show_entry(long pos, struct entry *e) {
+ char *tag = "normal ";
+
+ if (pos >= 10 && pos <= 14) tag = "new no access";
+ if (pos >= 15 && pos <= 19) tag = "new accessed ";
+ if (pos >= keyspace_size -5) tag= "old no access";
+
+ printf("%ld] <%s> frequency:%d decrtime:%d [%lu hits | age:%ld sec]\n",
+ pos, tag, e->counter, e->decrtime, (unsigned long)e->hits,
+ time(NULL) - e->ctime);
+}
+
+int main(void) {
+ time_t start = time(NULL);
+ time_t new_entry_time = start;
+ time_t display_time = start;
+ struct entry *entries = malloc(sizeof(*entries)*keyspace_size);
+ long j;
+
+ /* Initialize. */
+ for (j = 0; j < keyspace_size; j++) {
+ entries[j].counter = COUNTER_INIT_VAL;
+ entries[j].decrtime = to_16bit_minutes(start);
+ entries[j].hits = 0;
+ entries[j].ctime = time(NULL);
+ }
+
+ while(1) {
+ time_t now = time(NULL);
+ long idx;
+
+ /* Scan N random entries (simulates the eviction under maxmemory). */
+ for (j = 0; j < 3; j++) {
+ scan_entry(entries+(rand()%keyspace_size));
+ }
+
+ /* Access a random entry: use a power-law access pattern up to
+ * 'switch_after' seconds. Then revert to flat access pattern. */
+ if (now-start < switch_after) {
+ /* Power law. */
+ idx = 1;
+ while((rand() % 21) != 0 && idx < keyspace_size) idx *= 2;
+ if (idx > keyspace_size) idx = keyspace_size;
+ idx = rand() % idx;
+ } else {
+ /* Flat. */
+ idx = rand() % keyspace_size;
+ }
+
+ /* Never access entries between position 10 and 14, so that
+ * we simulate what happens to new entries that are never
+ * accessed VS new entries which are accessed in positions
+ * 15-19.
+ *
+ * Also never access last 5 entry, so that we have keys which
+ * are never recreated (old), and never accessed. */
+ if ((idx < 10 || idx > 14) && (idx < keyspace_size-5))
+ access_entry(entries+idx);
+
+ /* Simulate the addition of new entries at positions between
+ * 10 and 19, a random one every 10 seconds. */
+ if (new_entry_time <= now) {
+ idx = 10+(rand()%10);
+ entries[idx].counter = COUNTER_INIT_VAL;
+ entries[idx].decrtime = to_16bit_minutes(time(NULL));
+ entries[idx].hits = 0;
+ entries[idx].ctime = time(NULL);
+ new_entry_time = now+10;
+ }
+
+ /* Show the first 20 entries and the last 20 entries. */
+ if (display_time != now) {
+ printf("=============================\n");
+ printf("Current minutes time: %d\n", (int)to_16bit_minutes(now));
+ printf("Access method: %s\n",
+ (now-start < switch_after) ? "power-law" : "flat");
+
+ for (j = 0; j < 20; j++)
+ show_entry(j,entries+j);
+
+ for (j = keyspace_size-20; j < keyspace_size; j++)
+ show_entry(j,entries+j);
+ display_time = now;
+ }
+ }
+ return 0;
+}
+
diff --git a/utils/lru/test-lru.rb b/utils/lru/test-lru.rb
index d4b0f88cf..d511e206f 100644
--- a/utils/lru/test-lru.rb
+++ b/utils/lru/test-lru.rb
@@ -1,112 +1,223 @@
require 'rubygems'
require 'redis'
-r = Redis.new
-r.config("SET","maxmemory","2000000")
-r.config("SET","maxmemory-policy","allkeys-lru")
-r.config("SET","maxmemory-samples",5)
-r.config("RESETSTAT")
-r.flushall
-
-puts <<EOF
-<html>
-<body>
-<style>
-.box {
- width:5px;
- height:5px;
- float:left;
- margin: 1px;
-}
-
-.old {
- border: 1px black solid;
-}
-
-.new {
- border: 1px green solid;
-}
-
-.ex {
- background-color: #666;
-}
-</style>
-<pre>
+$runs = []; # Remember the error rate of each run for average purposes.
+$o = {}; # Options set parsing arguments
+
+def testit(filename)
+ r = Redis.new
+ r.config("SET","maxmemory","2000000")
+ if $o[:ttl]
+ r.config("SET","maxmemory-policy","volatile-ttl")
+ else
+ r.config("SET","maxmemory-policy","allkeys-lru")
+ end
+ r.config("SET","maxmemory-samples",5)
+ r.config("RESETSTAT")
+ r.flushall
+
+ html = ""
+ html << <<EOF
+ <html>
+ <body>
+ <style>
+ .box {
+ width:5px;
+ height:5px;
+ float:left;
+ margin: 1px;
+ }
+
+ .old {
+ border: 1px black solid;
+ }
+
+ .new {
+ border: 1px green solid;
+ }
+
+ .otherdb {
+ border: 1px red solid;
+ }
+
+ .ex {
+ background-color: #666;
+ }
+ </style>
+ <pre>
EOF
-# Fill
-oldsize = r.dbsize
-id = 0
-while true
- id += 1
- r.set(id,"foo")
- newsize = r.dbsize
- break if newsize == oldsize
- oldsize = newsize
-end
+ # Fill the DB up to the first eviction.
+ oldsize = r.dbsize
+ id = 0
+ while true
+ id += 1
+ begin
+ r.set(id,"foo")
+ rescue
+ break
+ end
+ newsize = r.dbsize
+ break if newsize == oldsize # A key was evicted? Stop.
+ oldsize = newsize
+ end
-inserted = r.dbsize
-first_set_max_id = id
-puts "#{r.dbsize} keys inserted"
+ inserted = r.dbsize
+ first_set_max_id = id
+ html << "#{r.dbsize} keys inserted.\n"
-# Access keys sequencially
+ # Access keys sequentially, so that in theory the first part will be expired
+ # and the latter part will not, according to perfect LRU.
-puts "Access keys sequencially"
-(1..first_set_max_id).each{|id|
- r.get(id)
-# sleep 0.001
-}
+ if $o[:ttl]
+ STDERR.puts "Set increasing expire value"
+ (1..first_set_max_id).each{|id|
+ r.expire(id,1000+id)
+ STDERR.print(".") if (id % 150) == 0
+ }
+ else
+ STDERR.puts "Access keys sequentially"
+ (1..first_set_max_id).each{|id|
+ r.get(id)
+ sleep 0.001
+ STDERR.print(".") if (id % 150) == 0
+ }
+ end
+ STDERR.puts
+
+ # Insert more 50% keys. We expect that the new keys will rarely be expired
+ # since their last access time is recent compared to the others.
+ #
+ # Note that we insert the first 100 keys of the new set into DB1 instead
+ # of DB0, so that we can try how cross-DB eviction works.
+ half = inserted/2
+ html << "Insert enough keys to evict half the keys we inserted.\n"
+ add = 0
+
+ otherdb_start_idx = id+1
+ otherdb_end_idx = id+100
+ while true
+ add += 1
+ id += 1
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ r.set(id,"foo")
+ r.select(0)
+ else
+ r.set(id,"foo")
+ end
+ break if r.info['evicted_keys'].to_i >= half
+ end
+
+ html << "#{add} additional keys added.\n"
+ html << "#{r.dbsize} keys in DB.\n"
+
+ # Check if evicted keys respect LRU
+ # We consider errors from 1 to N progressively more serious as they violate
+ # more the access pattern.
+
+ errors = 0
+ e = 1
+ error_per_key = 100000.0/first_set_max_id
+ half_set_size = first_set_max_id/2
+ maxerr = 0
+ (1..(first_set_max_id/2)).each{|id|
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ exists = r.exists(id)
+ r.select(0)
+ else
+ exists = r.exists(id)
+ end
+ if id < first_set_max_id/2
+ thiserr = error_per_key * ((half_set_size-id).to_f/half_set_size)
+ maxerr += thiserr
+ errors += thiserr if exists
+ elsif id >= first_set_max_id/2
+ thiserr = error_per_key * ((id-half_set_size).to_f/half_set_size)
+ maxerr += thiserr
+ errors += thiserr if !exists
+ end
+ }
+ errors = errors*100/maxerr
+
+ STDERR.puts "Test finished with #{errors}% error! Generating HTML on stdout."
+
+ html << "#{errors}% error!\n"
+ html << "</pre>"
+ $runs << errors
+
+ # Generate the graphical representation
+ (1..id).each{|id|
+ # Mark first set and added items in a different way.
+ c = "box"
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ c << " otherdb"
+ elsif id <= first_set_max_id
+ c << " old"
+ else
+ c << " new"
+ end
+
+ # Add class if exists
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ exists = r.exists(id)
+ r.select(0)
+ else
+ exists = r.exists(id)
+ end
+
+ c << " ex" if exists
+ html << "<div title=\"#{id}\" class=\"#{c}\"></div>"
+ }
+
+ # Close HTML page
+
+ html << <<EOF
+ </body>
+ </html>
+EOF
-# Insert more 50% keys. We expect that the new keys
-half = inserted/2
-puts "Insert enough keys to evict half the keys we inserted"
-add = 0
-while true
- add += 1
- id += 1
- r.set(id,"foo")
- break if r.info['evicted_keys'].to_i >= half
+ f = File.open(filename,"w")
+ f.write(html)
+ f.close
end
-puts "#{add} additional keys added."
-puts "#{r.dbsize} keys in DB"
-
-# Check if evicted keys respect LRU
-# We consider errors from 1 to N progressively more serious as they violate
-# more the access pattern.
-
-errors = 0
-e = 1
-edecr = 1.0/(first_set_max_id/2)
-(1..(first_set_max_id/2)).each{|id|
- e -= edecr if e > 0
- e = 0 if e < 0
- if r.exists(id)
- errors += e
- end
-}
+def print_avg
+ avg = ($runs.reduce {|a,b| a+b}) / $runs.length
+ puts "#{$runs.length} runs, AVG is #{avg}"
+end
-puts "#{errors} errors!"
-puts "</pre>"
+if ARGV.length < 1
+ STDERR.puts "Usage: ruby test-lru.rb <html-output-filename> [--runs <count>] [--ttl]"
+ STDERR.puts "Options:"
+ STDERR.puts " --runs <count> Execute the test <count> times."
+ STDERR.puts " --ttl Set keys with increasing TTL values"
+ STDERR.puts " (starting from 1000 seconds) in order to"
+ STDERR.puts " test the volatile-lru policy."
+ exit 1
+end
-# Generate the graphical representation
-(1..id).each{|id|
- # Mark first set and added items in a different way.
- c = "box"
- if id <= first_set_max_id
- c << " old"
+filename = ARGV[0]
+$o[:numruns] = 1
+
+# Options parsing
+i = 1
+while i < ARGV.length
+ if ARGV[i] == '--runs'
+ $o[:numruns] = ARGV[i+1].to_i
+ i+= 1
+ elsif ARGV[i] == '--ttl'
+ $o[:ttl] = true
else
- c << " new"
+ STDERR.puts "Unknown option #{ARGV[i]}"
+ exit 1
end
+ i+= 1
+end
- # Add class if exists
- c << " ex" if r.exists(id)
- puts "<div class=\"#{c}\"></div>"
+$o[:numruns].times {
+ testit(filename)
+ print_avg if $o[:numruns] != 1
}
-
-# Close HTML page
-
-puts <<EOF
-</body>
-</html>
-EOF
diff --git a/utils/redis_init_script.tpl b/utils/redis_init_script.tpl
index d65086312..2e5b61301 100755
--- a/utils/redis_init_script.tpl
+++ b/utils/redis_init_script.tpl
@@ -26,11 +26,12 @@ case "$1" in
fi
;;
status)
- if [ ! -f $PIDFILE ]
+ PID=$(cat $PIDFILE)
+ if [ ! -x /proc/${PID} ]
then
echo 'Redis is not running'
else
- echo "Redis is running ($(<$PIDFILE))"
+ echo "Redis is running ($PID)"
fi
;;
restart)
diff --git a/utils/mkrelease.sh b/utils/releasetools/01_create_tarball.sh
index 2316fff96..54bca8c04 100755
--- a/utils/mkrelease.sh
+++ b/utils/releasetools/01_create_tarball.sh
@@ -8,6 +8,7 @@ fi
TAG=$1
TARNAME="redis-${TAG}.tar"
echo "Generating /tmp/${TARNAME}"
+cd ~/hack/redis
git archive $TAG --prefix redis-${TAG}/ > /tmp/$TARNAME || exit 1
echo "Gizipping the archive"
rm -f /tmp/$TARNAME.gz
diff --git a/utils/releasetools/02_upload_tarball.sh b/utils/releasetools/02_upload_tarball.sh
new file mode 100755
index 000000000..ed7065388
--- /dev/null
+++ b/utils/releasetools/02_upload_tarball.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+echo "Uploading..."
+scp /tmp/redis-${1}.tar.gz antirez@antirez.com:/var/virtual/download.redis.io/httpdocs/releases/
+echo "Updating web site... (press any key if it is a stable release, or Ctrl+C)"
+read x
+ssh antirez@antirez.com "cd /var/virtual/download.redis.io/httpdocs; ./update.sh ${1}"
diff --git a/utils/releasetools/03_test_release.sh b/utils/releasetools/03_test_release.sh
new file mode 100755
index 000000000..3dfdcd6a3
--- /dev/null
+++ b/utils/releasetools/03_test_release.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+if [ $# != "1" ]
+then
+ echo "Usage: ${0} <git-ref>"
+ exit 1
+fi
+
+TAG=$1
+TARNAME="redis-${TAG}.tar.gz"
+DOWNLOADURL="http://download.redis.io/releases/${TARNAME}"
+
+ssh antirez@metal "export TERM=xterm;
+ cd /tmp;
+ rm -rf test_release_tmp_dir;
+ cd test_release_tmp_dir;
+ rm -f $TARNAME;
+ rm -rf redis-${TAG};
+ wget $DOWNLOADURL;
+ tar xvzf $TARNAME;
+ cd redis-${TAG};
+ make;
+ ./runtest;
+ ./runtest-sentinel;
+ if [ -x runtest-cluster ]; then
+ ./runtest-cluster;
+ fi"
diff --git a/utils/releasetools/04_release_hash.sh b/utils/releasetools/04_release_hash.sh
new file mode 100755
index 000000000..9d5c6ad4b
--- /dev/null
+++ b/utils/releasetools/04_release_hash.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+SHA=$(curl -s http://download.redis.io/releases/redis-${1}.tar.gz | shasum -a 256 | cut -f 1 -d' ')
+ENTRY="hash redis-${1}.tar.gz sha256 $SHA http://download.redis.io/releases/redis-${1}.tar.gz"
+echo $ENTRY >> ~/hack/redis-hashes/README
+vi ~/hack/redis-hashes/README
+echo "Press any key to commit, Ctrl-C to abort)."
+read yes
+(cd ~/hack/redis-hashes; git commit -a -m "${1} hash."; git push)
diff --git a/utils/releasetools/changelog.tcl b/utils/releasetools/changelog.tcl
new file mode 100755
index 000000000..9b3a2cddc
--- /dev/null
+++ b/utils/releasetools/changelog.tcl
@@ -0,0 +1,30 @@
+#!/usr/bin/env tclsh
+
+if {[llength $::argv] != 2} {
+ puts "Usage: $::argv0 <branch> <version>"
+ exit 1
+}
+
+set branch [lindex $::argv 0]
+set ver [lindex $::argv 1]
+
+set template {
+================================================================================
+Redis %ver% Released %date%
+================================================================================
+
+Upgrade urgency <URGENCY>: <DESCRIPTION>
+}
+
+set template [string trim $template]
+append template "\n\n"
+set date [clock format [clock seconds]]
+set template [string map [list %ver% $ver %date% $date] $template]
+
+append template [exec git log $branch~100..$branch "--format=format:%an in commit %h:%n %s" --shortstat]
+
+#Older, more verbose version.
+#
+#append template [exec git log $branch~30..$branch "--format=format:+-------------------------------------------------------------------------------%n| %s%n| By %an, %ai%n+--------------------------------------------------------------------------------%nhttps://github.com/antirez/redis/commit/%H%n%n%b" --stat]
+
+puts $template
diff --git a/utils/whatisdoing.sh b/utils/whatisdoing.sh
index 8f441cfc0..e4059caed 100755
--- a/utils/whatisdoing.sh
+++ b/utils/whatisdoing.sh
@@ -1,9 +1,15 @@
# This script is from http://poormansprofiler.org/
+#
+# NOTE: Instead of using this script, you should use the Redis
+# Software Watchdog, which provides a similar functionality but in
+# a more reliable / easy to use way.
+#
+# Check http://redis.io/topics/latency for more information.
#!/bin/bash
nsamples=1
sleeptime=0
-pid=$(pidof redis-server)
+pid=$(ps auxww | grep '[r]edis-server' | awk '{print $2}')
for x in $(seq 1 $nsamples)
do