summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2010-06-22 00:07:48 +0200
committerantirez <antirez@gmail.com>2010-07-01 14:38:51 +0200
commite2641e09cc0daf44f63f654230f72d22acf3a9af (patch)
treef0443876d28414f7c80787593e5f35a9f9c87747
parentc2ff0e90b8ce84d7b966622ffe0178303bb0a625 (diff)
downloadredis-e2641e09cc0daf44f63f654230f72d22acf3a9af.tar.gz
redis.c split into many different C files.
networking related stuff moved into networking.c moved more code more work on layout of source code SDS instantaneuos memory saving. By Pieter and Salvatore at VMware ;) cleanly compiling again after the first split, now splitting it in more C files moving more things around... work in progress split replication code splitting more Sets split Hash split replication split even more splitting more splitting minor change
-rw-r--r--redis.c11621
-rw-r--r--redis.h75
-rw-r--r--src/Makefile (renamed from Makefile)9
-rw-r--r--src/adlist.c (renamed from adlist.c)0
-rw-r--r--src/adlist.h (renamed from adlist.h)0
-rw-r--r--src/ae.c (renamed from ae.c)0
-rw-r--r--src/ae.h (renamed from ae.h)0
-rw-r--r--src/ae_epoll.c (renamed from ae_epoll.c)0
-rw-r--r--src/ae_kqueue.c (renamed from ae_kqueue.c)0
-rw-r--r--src/ae_select.c (renamed from ae_select.c)0
-rw-r--r--src/anet.c (renamed from anet.c)0
-rw-r--r--src/anet.h (renamed from anet.h)0
-rw-r--r--src/aof.c694
-rw-r--r--src/config.c438
-rw-r--r--src/config.h (renamed from config.h)0
-rw-r--r--src/db.c508
-rw-r--r--src/debug.c309
-rw-r--r--src/dict.c (renamed from dict.c)0
-rw-r--r--src/dict.h (renamed from dict.h)0
-rw-r--r--src/fmacros.h (renamed from fmacros.h)0
-rw-r--r--src/linenoise.c (renamed from linenoise.c)0
-rw-r--r--src/linenoise.h (renamed from linenoise.h)0
-rw-r--r--src/lzf.h (renamed from lzf.h)0
-rw-r--r--src/lzfP.h (renamed from lzfP.h)0
-rw-r--r--src/lzf_c.c (renamed from lzf_c.c)0
-rw-r--r--src/lzf_d.c (renamed from lzf_d.c)0
-rwxr-xr-xsrc/mkreleasehdr.sh (renamed from mkreleasehdr.sh)0
-rw-r--r--src/multi.c266
-rw-r--r--src/networking.c589
-rw-r--r--src/object.c405
-rw-r--r--src/pqsort.c (renamed from pqsort.c)0
-rw-r--r--src/pqsort.h (renamed from pqsort.h)0
-rw-r--r--src/pubsub.c259
-rw-r--r--src/rdb.c886
-rw-r--r--src/redis-benchmark.c (renamed from redis-benchmark.c)0
-rw-r--r--src/redis-check-aof.c (renamed from redis-check-aof.c)0
-rw-r--r--src/redis-check-dump.c (renamed from redis-check-dump.c)0
-rw-r--r--src/redis-cli.c (renamed from redis-cli.c)0
-rw-r--r--src/redis.c1516
-rw-r--r--src/redis.h885
-rw-r--r--src/release.c (renamed from release.c)0
-rw-r--r--src/replication.c475
-rw-r--r--src/sds.c (renamed from sds.c)27
-rw-r--r--src/sds.h (renamed from sds.h)7
-rw-r--r--src/sha1.c (renamed from sha1.c)0
-rw-r--r--src/sha1.h (renamed from sha1.h)0
-rw-r--r--src/solarisfixes.h (renamed from solarisfixes.h)0
-rw-r--r--src/sort.c383
-rw-r--r--src/t_hash.c397
-rw-r--r--src/t_list.c829
-rw-r--r--src/t_set.c349
-rw-r--r--src/t_string.c251
-rw-r--r--src/t_zset.c985
-rw-r--r--src/util.c223
-rw-r--r--src/version.h1
-rw-r--r--src/vm.c1126
-rw-r--r--src/ziplist.c (renamed from ziplist.c)0
-rw-r--r--src/ziplist.h (renamed from ziplist.h)0
-rw-r--r--src/zipmap.c (renamed from zipmap.c)0
-rw-r--r--src/zipmap.h (renamed from zipmap.h)0
-rw-r--r--src/zmalloc.c (renamed from zmalloc.c)0
-rw-r--r--src/zmalloc.h (renamed from zmalloc.h)0
-rw-r--r--staticsymbols.h374
-rw-r--r--tests/integration/aof.tcl4
-rw-r--r--tests/support/server.tcl4
65 files changed, 11811 insertions, 12084 deletions
diff --git a/redis.c b/redis.c
deleted file mode 100644
index 1e1cd7816..000000000
--- a/redis.c
+++ /dev/null
@@ -1,11621 +0,0 @@
-/*
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define REDIS_VERSION "2.1.1"
-
-#include "fmacros.h"
-#include "config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-#include <signal.h>
-
-#ifdef HAVE_BACKTRACE
-#include <execinfo.h>
-#include <ucontext.h>
-#endif /* HAVE_BACKTRACE */
-
-#include <sys/wait.h>
-#include <errno.h>
-#include <assert.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <inttypes.h>
-#include <arpa/inet.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <sys/uio.h>
-#include <limits.h>
-#include <float.h>
-#include <math.h>
-#include <pthread.h>
-
-#if defined(__sun)
-#include "solarisfixes.h"
-#endif
-
-#include "redis.h"
-#include "ae.h" /* Event driven programming library */
-#include "sds.h" /* Dynamic safe strings */
-#include "anet.h" /* Networking the easy way */
-#include "dict.h" /* Hash tables */
-#include "adlist.h" /* Linked lists */
-#include "zmalloc.h" /* total memory usage aware version of malloc/free */
-#include "lzf.h" /* LZF compression library */
-#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
-#include "zipmap.h" /* Compact dictionary-alike data structure */
-#include "ziplist.h" /* Compact list data structure */
-#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
-
-/* Error codes */
-#define REDIS_OK 0
-#define REDIS_ERR -1
-
-/* Static server configuration */
-#define REDIS_SERVERPORT 6379 /* TCP port */
-#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
-#define REDIS_IOBUF_LEN 1024
-#define REDIS_LOADBUF_LEN 1024
-#define REDIS_STATIC_ARGS 8
-#define REDIS_DEFAULT_DBNUM 16
-#define REDIS_CONFIGLINE_MAX 1024
-#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
-#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
-#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
-#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
-#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
-
-/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
-#define REDIS_WRITEV_THRESHOLD 3
-/* Max number of iovecs used for each writev call */
-#define REDIS_WRITEV_IOVEC_COUNT 256
-
-/* Hash table parameters */
-#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
-
-/* Command flags */
-#define REDIS_CMD_BULK 1 /* Bulk write command */
-#define REDIS_CMD_INLINE 2 /* Inline command */
-/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
- this flags will return an error when the 'maxmemory' option is set in the
- config file and the server is using more than maxmemory bytes of memory.
- In short this commands are denied on low memory conditions. */
-#define REDIS_CMD_DENYOOM 4
-#define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
-
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-#define REDIS_VMPOINTER 8
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
-#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
-#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
-#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
-
-static char* strencoding[] = {
- "raw", "int", "hashtable", "zipmap", "linkedlist", "ziplist"
-};
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lenghts up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
-/* Virtual memory object->where field. */
-#define REDIS_VM_MEMORY 0 /* The object is on memory */
-#define REDIS_VM_SWAPPED 1 /* The object is on disk */
-#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
-#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
-
-/* Virtual memory static configuration stuff.
- * Check vmFindContiguousPages() to know more about this magic numbers. */
-#define REDIS_VM_MAX_NEAR_PAGES 65536
-#define REDIS_VM_MAX_RANDOM_JUMP 4096
-#define REDIS_VM_MAX_THREADS 32
-#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
-/* The following is the *percentage* of completed I/O jobs to process when the
- * handelr is called. While Virtual Memory I/O operations are performed by
- * threads, this operations must be processed by the main thread when completed
- * in order to take effect. */
-#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
-
-/* Client flags */
-#define REDIS_SLAVE 1 /* This client is a slave server */
-#define REDIS_MASTER 2 /* This client is a master server */
-#define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
-#define REDIS_MULTI 8 /* This client is in a MULTI context */
-#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
-#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
-#define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
-
-/* Slave replication state - slave side */
-#define REDIS_REPL_NONE 0 /* No active replication */
-#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_CONNECTED 2 /* Connected to master */
-
-/* Slave replication state - from the point of view of master
- * Note that in SEND_BULK and ONLINE state the slave receives new updates
- * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
- * to start the next background saving in order to send updates to it. */
-#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
-#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
-#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
-#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
-
-/* List related stuff */
-#define REDIS_HEAD 0
-#define REDIS_TAIL 1
-
-/* Sort operations */
-#define REDIS_SORT_GET 0
-#define REDIS_SORT_ASC 1
-#define REDIS_SORT_DESC 2
-#define REDIS_SORTKEY_MAX 1024
-
-/* Log levels */
-#define REDIS_DEBUG 0
-#define REDIS_VERBOSE 1
-#define REDIS_NOTICE 2
-#define REDIS_WARNING 3
-
-/* Anti-warning macro... */
-#define REDIS_NOTUSED(V) ((void) V)
-
-#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
-#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
-
-/* Append only defines */
-#define APPENDFSYNC_NO 0
-#define APPENDFSYNC_ALWAYS 1
-#define APPENDFSYNC_EVERYSEC 2
-
-/* Zip structure related defaults */
-#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
-#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
-#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
-#define REDIS_LIST_MAX_ZIPLIST_VALUE 32
-
-/* We can print the stacktrace, so our assert is defined this way: */
-#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
-#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
-static void _redisAssert(char *estr, char *file, int line);
-static void _redisPanic(char *msg, char *file, int line);
-
-/*================================= Data types ============================== */
-
-/* A redis object, that is a type able to hold a string / list / set */
-
-/* The actual Redis Object */
-typedef struct redisObject {
- unsigned type:4;
- unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
- unsigned encoding:4;
- unsigned lru:22; /* lru time (relative to server.lruclock) */
- int refcount;
- void *ptr;
- /* VM fields are only allocated if VM is active, otherwise the
- * object allocation function will just allocate
- * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
- * Redis without VM active will not have any overhead. */
-} robj;
-
-/* The VM pointer structure - identifies an object in the swap file.
- *
- * This object is stored in place of the value
- * object in the main key->value hash table representing a database.
- * Note that the first fields (type, storage) are the same as the redisObject
- * structure so that vmPointer strucuters can be accessed even when casted
- * as redisObject structures.
- *
- * This is useful as we don't know if a value object is or not on disk, but we
- * are always able to read obj->storage to check this. For vmPointer
- * structures "type" is set to REDIS_VMPOINTER (even if without this field
- * is still possible to check the kind of object from the value of 'storage').*/
-typedef struct vmPointer {
- unsigned type:4;
- unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
- unsigned notused:26;
- unsigned int vtype; /* type of the object stored in the swap file */
- off_t page; /* the page at witch the object is stored on disk */
- off_t usedpages; /* number of pages used on disk */
-} vmpointer;
-
-/* Macro used to initalize a Redis object allocated on the stack.
- * Note that this macro is taken near the structure definition to make sure
- * we'll update it when the structure is changed, to avoid bugs like
- * bug #85 introduced exactly in this way. */
-#define initStaticStringObject(_var,_ptr) do { \
- _var.refcount = 1; \
- _var.type = REDIS_STRING; \
- _var.encoding = REDIS_ENCODING_RAW; \
- _var.ptr = _ptr; \
- _var.storage = REDIS_VM_MEMORY; \
-} while(0);
-
-typedef struct redisDb {
- dict *dict; /* The keyspace for this DB */
- dict *expires; /* Timeout of keys with a timeout set */
- dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
- dict *io_keys; /* Keys with clients waiting for VM I/O */
- dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
- int id;
-} redisDb;
-
-/* Client MULTI/EXEC state */
-typedef struct multiCmd {
- robj **argv;
- int argc;
- struct redisCommand *cmd;
-} multiCmd;
-
-typedef struct multiState {
- multiCmd *commands; /* Array of MULTI commands */
- int count; /* Total number of MULTI commands */
-} multiState;
-
-/* With multiplexing we need to take per-clinet state.
- * Clients are taken in a liked list. */
-typedef struct redisClient {
- int fd;
- redisDb *db;
- int dictid;
- sds querybuf;
- robj **argv, **mbargv;
- int argc, mbargc;
- int bulklen; /* bulk read len. -1 if not in bulk read mode */
- int multibulk; /* multi bulk command format active */
- list *reply;
- int sentlen;
- time_t lastinteraction; /* time of the last interaction, used for timeout */
- int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
- int slaveseldb; /* slave selected db, if this client is a slave */
- int authenticated; /* when requirepass is non-NULL */
- int replstate; /* replication state if this is a slave */
- int repldbfd; /* replication DB file descriptor */
- long repldboff; /* replication DB file offset */
- off_t repldbsize; /* replication DB file size */
- multiState mstate; /* MULTI/EXEC state */
- robj **blocking_keys; /* The key we are waiting to terminate a blocking
- * operation such as BLPOP. Otherwise NULL. */
- int blocking_keys_num; /* Number of blocking keys */
- time_t blockingto; /* Blocking operation timeout. If UNIX current time
- * is >= blockingto then the operation timed out. */
- list *io_keys; /* Keys this client is waiting to be loaded from the
- * swap file in order to continue. */
- list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
- dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
- list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
-} redisClient;
-
-struct saveparam {
- time_t seconds;
- int changes;
-};
-
-/* Global server state structure */
-struct redisServer {
- int port;
- int fd;
- redisDb *db;
- long long dirty; /* changes to DB from the last save */
- list *clients;
- list *slaves, *monitors;
- char neterr[ANET_ERR_LEN];
- aeEventLoop *el;
- int cronloops; /* number of times the cron function run */
- list *objfreelist; /* A list of freed objects to avoid malloc() */
- time_t lastsave; /* Unix time of last save succeeede */
- /* Fields used only for stats */
- time_t stat_starttime; /* server start time */
- long long stat_numcommands; /* number of processed commands */
- long long stat_numconnections; /* number of connections received */
- long long stat_expiredkeys; /* number of expired keys */
- /* Configuration */
- int verbosity;
- int glueoutputbuf;
- int maxidletime;
- int dbnum;
- int daemonize;
- int appendonly;
- int appendfsync;
- int no_appendfsync_on_rewrite;
- int shutdown_asap;
- time_t lastfsync;
- int appendfd;
- int appendseldb;
- char *pidfile;
- pid_t bgsavechildpid;
- pid_t bgrewritechildpid;
- sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
- sds aofbuf; /* AOF buffer, written before entering the event loop */
- struct saveparam *saveparams;
- int saveparamslen;
- char *logfile;
- char *bindaddr;
- char *dbfilename;
- char *appendfilename;
- char *requirepass;
- int rdbcompression;
- int activerehashing;
- /* Replication related */
- int isslave;
- char *masterauth;
- char *masterhost;
- int masterport;
- redisClient *master; /* client that is master for this slave */
- int replstate;
- unsigned int maxclients;
- unsigned long long maxmemory;
- unsigned int blpop_blocked_clients;
- unsigned int vm_blocked_clients;
- /* Sort parameters - qsort_r() is only available under BSD so we
- * have to take this state global, in order to pass it to sortCompare() */
- int sort_desc;
- int sort_alpha;
- int sort_bypattern;
- /* Virtual memory configuration */
- int vm_enabled;
- char *vm_swap_file;
- off_t vm_page_size;
- off_t vm_pages;
- unsigned long long vm_max_memory;
- /* Zip structure config */
- size_t hash_max_zipmap_entries;
- size_t hash_max_zipmap_value;
- size_t list_max_ziplist_entries;
- size_t list_max_ziplist_value;
- /* Virtual memory state */
- FILE *vm_fp;
- int vm_fd;
- off_t vm_next_page; /* Next probably empty page */
- off_t vm_near_pages; /* Number of pages allocated sequentially */
- unsigned char *vm_bitmap; /* Bitmap of free/used pages */
- time_t unixtime; /* Unix time sampled every second. */
- /* Virtual memory I/O threads stuff */
- /* An I/O thread process an element taken from the io_jobs queue and
- * put the result of the operation in the io_done list. While the
- * job is being processed, it's put on io_processing queue. */
- list *io_newjobs; /* List of VM I/O jobs yet to be processed */
- list *io_processing; /* List of VM I/O jobs being processed */
- list *io_processed; /* List of VM I/O jobs already processed */
- list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
- pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
- pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
- pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
- pthread_attr_t io_threads_attr; /* attributes for threads creation */
- int io_active_threads; /* Number of running I/O threads */
- int vm_max_threads; /* Max number of I/O threads running at the same time */
- /* Our main thread is blocked on the event loop, locking for sockets ready
- * to be read or written, so when a threaded I/O operation is ready to be
- * processed by the main thread, the I/O thread will use a unix pipe to
- * awake the main thread. The followings are the two pipe FDs. */
- int io_ready_pipe_read;
- int io_ready_pipe_write;
- /* Virtual memory stats */
- unsigned long long vm_stats_used_pages;
- unsigned long long vm_stats_swapped_objects;
- unsigned long long vm_stats_swapouts;
- unsigned long long vm_stats_swapins;
- /* Pubsub */
- dict *pubsub_channels; /* Map channels to list of subscribed clients */
- list *pubsub_patterns; /* A list of pubsub_patterns */
- /* Misc */
- FILE *devnull;
- unsigned lruclock:22; /* clock incrementing every minute, for LRU */
- unsigned lruclock_padding:10;
-};
-
-typedef struct pubsubPattern {
- redisClient *client;
- robj *pattern;
-} pubsubPattern;
-
-typedef void redisCommandProc(redisClient *c);
-typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
-struct redisCommand {
- char *name;
- redisCommandProc *proc;
- int arity;
- int flags;
- /* Use a function to determine which keys need to be loaded
- * in the background prior to executing this command. Takes precedence
- * over vm_firstkey and others, ignored when NULL */
- redisVmPreloadProc *vm_preload_proc;
- /* What keys should be loaded in background when calling this command? */
- int vm_firstkey; /* The first argument that's a key (0 = no keys) */
- int vm_lastkey; /* THe last argument that's a key */
- int vm_keystep; /* The step between first and last key */
-};
-
-struct redisFunctionSym {
- char *name;
- unsigned long pointer;
-};
-
-typedef struct _redisSortObject {
- robj *obj;
- union {
- double score;
- robj *cmpobj;
- } u;
-} redisSortObject;
-
-typedef struct _redisSortOperation {
- int type;
- robj *pattern;
-} redisSortOperation;
-
-/* ZSETs use a specialized version of Skiplists */
-
-typedef struct zskiplistNode {
- struct zskiplistNode **forward;
- struct zskiplistNode *backward;
- unsigned int *span;
- double score;
- robj *obj;
-} zskiplistNode;
-
-typedef struct zskiplist {
- struct zskiplistNode *header, *tail;
- unsigned long length;
- int level;
-} zskiplist;
-
-typedef struct zset {
- dict *dict;
- zskiplist *zsl;
-} zset;
-
-/* Our shared "common" objects */
-
-#define REDIS_SHARED_INTEGERS 10000
-struct sharedObjectsStruct {
- robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
- *colon, *nullbulk, *nullmultibulk, *queued,
- *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *plus,
- *select0, *select1, *select2, *select3, *select4,
- *select5, *select6, *select7, *select8, *select9,
- *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
- *mbulk4, *psubscribebulk, *punsubscribebulk,
- *integers[REDIS_SHARED_INTEGERS];
-} shared;
-
-/* Global vars that are actally used as constants. The following double
- * values are used for double on-disk serialization, and are initialized
- * at runtime to avoid strange compiler optimizations. */
-
-static double R_Zero, R_PosInf, R_NegInf, R_Nan;
-
-/* VM threaded I/O request message */
-#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
-#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
-#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
-typedef struct iojob {
- int type; /* Request type, REDIS_IOJOB_* */
- redisDb *db;/* Redis database */
- robj *key; /* This I/O request is about swapping this key */
- robj *id; /* Unique identifier of this job:
- this is the object to swap for REDIS_IOREQ_*_SWAP, or the
- vmpointer objct for REDIS_IOREQ_LOAD. */
- robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
- * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
- off_t page; /* Swap page where to read/write the object */
- off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
- int canceled; /* True if this command was canceled by blocking side of VM */
- pthread_t thread; /* ID of the thread processing this entry */
-} iojob;
-
-/*================================ Prototypes =============================== */
-char *redisGitSHA1(void);
-char *redisGitDirty(void);
-
-static void freeStringObject(robj *o);
-static void freeListObject(robj *o);
-static void freeSetObject(robj *o);
-static void decrRefCount(void *o);
-static robj *createObject(int type, void *ptr);
-static void freeClient(redisClient *c);
-static int rdbLoad(char *filename);
-static void addReply(redisClient *c, robj *obj);
-static void addReplySds(redisClient *c, sds s);
-static void incrRefCount(robj *o);
-static int rdbSaveBackground(char *filename);
-static robj *createStringObject(char *ptr, size_t len);
-static robj *dupStringObject(robj *o);
-static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
-static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
-static void flushAppendOnlyFile(void);
-static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
-static int syncWithMaster(void);
-static robj *tryObjectEncoding(robj *o);
-static robj *getDecodedObject(robj *o);
-static int removeExpire(redisDb *db, robj *key);
-static int expireIfNeeded(redisDb *db, robj *key);
-static int deleteIfVolatile(redisDb *db, robj *key);
-static int dbDelete(redisDb *db, robj *key);
-static time_t getExpire(redisDb *db, robj *key);
-static int setExpire(redisDb *db, robj *key, time_t when);
-static void updateSlavesWaitingBgsave(int bgsaveerr);
-static void freeMemoryIfNeeded(void);
-static int processCommand(redisClient *c);
-static void setupSigSegvAction(void);
-static void rdbRemoveTempFile(pid_t childpid);
-static void aofRemoveTempFile(pid_t childpid);
-static size_t stringObjectLen(robj *o);
-static void processInputBuffer(redisClient *c);
-static zskiplist *zslCreate(void);
-static void zslFree(zskiplist *zsl);
-static void zslInsert(zskiplist *zsl, double score, robj *obj);
-static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
-static void initClientMultiState(redisClient *c);
-static void freeClientMultiState(redisClient *c);
-static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
-static void unblockClientWaitingData(redisClient *c);
-static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
-static void vmInit(void);
-static void vmMarkPagesFree(off_t page, off_t count);
-static robj *vmLoadObject(robj *o);
-static robj *vmPreviewObject(robj *o);
-static int vmSwapOneObjectBlocking(void);
-static int vmSwapOneObjectThreaded(void);
-static int vmCanSwapOut(void);
-static int tryFreeOneObjectFromFreelist(void);
-static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
-static void vmCancelThreadedIOJob(robj *o);
-static void lockThreadedIO(void);
-static void unlockThreadedIO(void);
-static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
-static void freeIOJob(iojob *j);
-static void queueIOJob(iojob *j);
-static int vmWriteObjectOnSwap(robj *o, off_t page);
-static robj *vmReadObjectFromSwap(off_t page, int type);
-static void waitEmptyIOJobsQueue(void);
-static void vmReopenSwapFile(void);
-static int vmFreePage(off_t page);
-static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
-static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
-static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
-static int dontWaitForSwappedKey(redisClient *c, robj *key);
-static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
-static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
-static struct redisCommand *lookupCommand(char *name);
-static void call(redisClient *c, struct redisCommand *cmd);
-static void resetClient(redisClient *c);
-static void convertToRealHash(robj *o);
-static void listTypeConvert(robj *o, int enc);
-static int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
-static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
-static void freePubsubPattern(void *p);
-static int listMatchPubsubPattern(void *a, void *b);
-static int compareStringObjects(robj *a, robj *b);
-static int equalStringObjects(robj *a, robj *b);
-static void usage();
-static int rewriteAppendOnlyFileBackground(void);
-static vmpointer *vmSwapObjectBlocking(robj *val);
-static int prepareForShutdown();
-static void touchWatchedKey(redisDb *db, robj *key);
-static void touchWatchedKeysOnFlush(int dbid);
-static void unwatchAllKeys(redisClient *c);
-
-static void authCommand(redisClient *c);
-static void pingCommand(redisClient *c);
-static void echoCommand(redisClient *c);
-static void setCommand(redisClient *c);
-static void setnxCommand(redisClient *c);
-static void setexCommand(redisClient *c);
-static void getCommand(redisClient *c);
-static void delCommand(redisClient *c);
-static void existsCommand(redisClient *c);
-static void incrCommand(redisClient *c);
-static void decrCommand(redisClient *c);
-static void incrbyCommand(redisClient *c);
-static void decrbyCommand(redisClient *c);
-static void selectCommand(redisClient *c);
-static void randomkeyCommand(redisClient *c);
-static void keysCommand(redisClient *c);
-static void dbsizeCommand(redisClient *c);
-static void lastsaveCommand(redisClient *c);
-static void saveCommand(redisClient *c);
-static void bgsaveCommand(redisClient *c);
-static void bgrewriteaofCommand(redisClient *c);
-static void shutdownCommand(redisClient *c);
-static void moveCommand(redisClient *c);
-static void renameCommand(redisClient *c);
-static void renamenxCommand(redisClient *c);
-static void lpushCommand(redisClient *c);
-static void rpushCommand(redisClient *c);
-static void lpushxCommand(redisClient *c);
-static void rpushxCommand(redisClient *c);
-static void linsertCommand(redisClient *c);
-static void lpopCommand(redisClient *c);
-static void rpopCommand(redisClient *c);
-static void llenCommand(redisClient *c);
-static void lindexCommand(redisClient *c);
-static void lrangeCommand(redisClient *c);
-static void ltrimCommand(redisClient *c);
-static void typeCommand(redisClient *c);
-static void lsetCommand(redisClient *c);
-static void saddCommand(redisClient *c);
-static void sremCommand(redisClient *c);
-static void smoveCommand(redisClient *c);
-static void sismemberCommand(redisClient *c);
-static void scardCommand(redisClient *c);
-static void spopCommand(redisClient *c);
-static void srandmemberCommand(redisClient *c);
-static void sinterCommand(redisClient *c);
-static void sinterstoreCommand(redisClient *c);
-static void sunionCommand(redisClient *c);
-static void sunionstoreCommand(redisClient *c);
-static void sdiffCommand(redisClient *c);
-static void sdiffstoreCommand(redisClient *c);
-static void syncCommand(redisClient *c);
-static void flushdbCommand(redisClient *c);
-static void flushallCommand(redisClient *c);
-static void sortCommand(redisClient *c);
-static void lremCommand(redisClient *c);
-static void rpoplpushcommand(redisClient *c);
-static void infoCommand(redisClient *c);
-static void mgetCommand(redisClient *c);
-static void monitorCommand(redisClient *c);
-static void expireCommand(redisClient *c);
-static void expireatCommand(redisClient *c);
-static void getsetCommand(redisClient *c);
-static void ttlCommand(redisClient *c);
-static void slaveofCommand(redisClient *c);
-static void debugCommand(redisClient *c);
-static void msetCommand(redisClient *c);
-static void msetnxCommand(redisClient *c);
-static void zaddCommand(redisClient *c);
-static void zincrbyCommand(redisClient *c);
-static void zrangeCommand(redisClient *c);
-static void zrangebyscoreCommand(redisClient *c);
-static void zcountCommand(redisClient *c);
-static void zrevrangeCommand(redisClient *c);
-static void zcardCommand(redisClient *c);
-static void zremCommand(redisClient *c);
-static void zscoreCommand(redisClient *c);
-static void zremrangebyscoreCommand(redisClient *c);
-static void multiCommand(redisClient *c);
-static void execCommand(redisClient *c);
-static void discardCommand(redisClient *c);
-static void blpopCommand(redisClient *c);
-static void brpopCommand(redisClient *c);
-static void appendCommand(redisClient *c);
-static void substrCommand(redisClient *c);
-static void zrankCommand(redisClient *c);
-static void zrevrankCommand(redisClient *c);
-static void hsetCommand(redisClient *c);
-static void hsetnxCommand(redisClient *c);
-static void hgetCommand(redisClient *c);
-static void hmsetCommand(redisClient *c);
-static void hmgetCommand(redisClient *c);
-static void hdelCommand(redisClient *c);
-static void hlenCommand(redisClient *c);
-static void zremrangebyrankCommand(redisClient *c);
-static void zunionstoreCommand(redisClient *c);
-static void zinterstoreCommand(redisClient *c);
-static void hkeysCommand(redisClient *c);
-static void hvalsCommand(redisClient *c);
-static void hgetallCommand(redisClient *c);
-static void hexistsCommand(redisClient *c);
-static void configCommand(redisClient *c);
-static void hincrbyCommand(redisClient *c);
-static void subscribeCommand(redisClient *c);
-static void unsubscribeCommand(redisClient *c);
-static void psubscribeCommand(redisClient *c);
-static void punsubscribeCommand(redisClient *c);
-static void publishCommand(redisClient *c);
-static void watchCommand(redisClient *c);
-static void unwatchCommand(redisClient *c);
-
-/*================================= Globals ================================= */
-
-/* Global vars */
-static struct redisServer server; /* server global state */
-static struct redisCommand *commandTable;
-static struct redisCommand readonlyCommandTable[] = {
- {"get",getCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
- {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
- {"setex",setexCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
- {"append",appendCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"substr",substrCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"del",delCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"exists",existsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"mget",mgetCommand,-2,REDIS_CMD_INLINE,NULL,1,-1,1},
- {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"rpushx",rpushxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"lpushx",lpushxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"linsert",linsertCommand,5,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"rpop",rpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"lpop",lpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"brpop",brpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"blpop",blpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"llen",llenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"lindex",lindexCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"lrange",lrangeCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"lrem",lremCommand,4,REDIS_CMD_BULK,NULL,1,1,1},
- {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,2,1},
- {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"srem",sremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"smove",smoveCommand,4,REDIS_CMD_BULK,NULL,1,2,1},
- {"sismember",sismemberCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"scard",scardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"spop",spopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
- {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
- {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
- {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
- {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
- {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
- {"smembers",sinterCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"zrem",zremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zremrangebyrank",zremrangebyrankCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zunionstore",zunionstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
- {"zinterstore",zinterstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
- {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zcount",zcountCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zcard",zcardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"zrank",zrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"zrevrank",zrevrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"hset",hsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"hsetnx",hsetnxCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"hget",hgetCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"hmset",hmsetCommand,-4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"hmget",hmgetCommand,-3,REDIS_CMD_BULK,NULL,1,1,1},
- {"hincrby",hincrbyCommand,4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"hdel",hdelCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"hlen",hlenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"hkeys",hkeysCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"hvals",hvalsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"hgetall",hgetallCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"hexists",hexistsCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
- {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
- {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
- {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"select",selectCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"move",moveCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"rename",renameCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
- {"expire",expireCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
- {"expireat",expireatCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
- {"keys",keysCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"auth",authCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"ping",pingCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"echo",echoCommand,2,REDIS_CMD_BULK,NULL,0,0,0},
- {"save",saveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"type",typeCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"multi",multiCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"exec",execCommand,1,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,execBlockClientOnSwappedKeys,0,0,0},
- {"discard",discardCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"sync",syncCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"flushall",flushallCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
- {"info",infoCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"monitor",monitorCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"ttl",ttlCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
- {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
- {"debug",debugCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"config",configCommand,-2,REDIS_CMD_BULK,NULL,0,0,0},
- {"subscribe",subscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"unsubscribe",unsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"psubscribe",psubscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"punsubscribe",punsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
- {"publish",publishCommand,3,REDIS_CMD_BULK|REDIS_CMD_FORCE_REPLICATION,NULL,0,0,0},
- {"watch",watchCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
- {"unwatch",unwatchCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}
-};
-
-/*============================ Utility functions ============================ */
-
-/* Glob-style pattern matching. */
-static int stringmatchlen(const char *pattern, int patternLen,
- const char *string, int stringLen, int nocase)
-{
- while(patternLen) {
- switch(pattern[0]) {
- case '*':
- while (pattern[1] == '*') {
- pattern++;
- patternLen--;
- }
- if (patternLen == 1)
- return 1; /* match */
- while(stringLen) {
- if (stringmatchlen(pattern+1, patternLen-1,
- string, stringLen, nocase))
- return 1; /* match */
- string++;
- stringLen--;
- }
- return 0; /* no match */
- break;
- case '?':
- if (stringLen == 0)
- return 0; /* no match */
- string++;
- stringLen--;
- break;
- case '[':
- {
- int not, match;
-
- pattern++;
- patternLen--;
- not = pattern[0] == '^';
- if (not) {
- pattern++;
- patternLen--;
- }
- match = 0;
- while(1) {
- if (pattern[0] == '\\') {
- pattern++;
- patternLen--;
- if (pattern[0] == string[0])
- match = 1;
- } else if (pattern[0] == ']') {
- break;
- } else if (patternLen == 0) {
- pattern--;
- patternLen++;
- break;
- } else if (pattern[1] == '-' && patternLen >= 3) {
- int start = pattern[0];
- int end = pattern[2];
- int c = string[0];
- if (start > end) {
- int t = start;
- start = end;
- end = t;
- }
- if (nocase) {
- start = tolower(start);
- end = tolower(end);
- c = tolower(c);
- }
- pattern += 2;
- patternLen -= 2;
- if (c >= start && c <= end)
- match = 1;
- } else {
- if (!nocase) {
- if (pattern[0] == string[0])
- match = 1;
- } else {
- if (tolower((int)pattern[0]) == tolower((int)string[0]))
- match = 1;
- }
- }
- pattern++;
- patternLen--;
- }
- if (not)
- match = !match;
- if (!match)
- return 0; /* no match */
- string++;
- stringLen--;
- break;
- }
- case '\\':
- if (patternLen >= 2) {
- pattern++;
- patternLen--;
- }
- /* fall through */
- default:
- if (!nocase) {
- if (pattern[0] != string[0])
- return 0; /* no match */
- } else {
- if (tolower((int)pattern[0]) != tolower((int)string[0]))
- return 0; /* no match */
- }
- string++;
- stringLen--;
- break;
- }
- pattern++;
- patternLen--;
- if (stringLen == 0) {
- while(*pattern == '*') {
- pattern++;
- patternLen--;
- }
- break;
- }
- }
- if (patternLen == 0 && stringLen == 0)
- return 1;
- return 0;
-}
-
-static int stringmatch(const char *pattern, const char *string, int nocase) {
- return stringmatchlen(pattern,strlen(pattern),string,strlen(string),nocase);
-}
-
-/* Convert a string representing an amount of memory into the number of
- * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
- * (1024*1024*1024).
- *
- * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
- * set to 0 */
-static long long memtoll(const char *p, int *err) {
- const char *u;
- char buf[128];
- long mul; /* unit multiplier */
- long long val;
- unsigned int digits;
-
- if (err) *err = 0;
- /* Search the first non digit character. */
- u = p;
- if (*u == '-') u++;
- while(*u && isdigit(*u)) u++;
- if (*u == '\0' || !strcasecmp(u,"b")) {
- mul = 1;
- } else if (!strcasecmp(u,"k")) {
- mul = 1000;
- } else if (!strcasecmp(u,"kb")) {
- mul = 1024;
- } else if (!strcasecmp(u,"m")) {
- mul = 1000*1000;
- } else if (!strcasecmp(u,"mb")) {
- mul = 1024*1024;
- } else if (!strcasecmp(u,"g")) {
- mul = 1000L*1000*1000;
- } else if (!strcasecmp(u,"gb")) {
- mul = 1024L*1024*1024;
- } else {
- if (err) *err = 1;
- mul = 1;
- }
- digits = u-p;
- if (digits >= sizeof(buf)) {
- if (err) *err = 1;
- return LLONG_MAX;
- }
- memcpy(buf,p,digits);
- buf[digits] = '\0';
- val = strtoll(buf,NULL,10);
- return val*mul;
-}
-
-/* Convert a long long into a string. Returns the number of
- * characters needed to represent the number, that can be shorter if passed
- * buffer length is not enough to store the whole number. */
-static int ll2string(char *s, size_t len, long long value) {
- char buf[32], *p;
- unsigned long long v;
- size_t l;
-
- if (len == 0) return 0;
- v = (value < 0) ? -value : value;
- p = buf+31; /* point to the last character */
- do {
- *p-- = '0'+(v%10);
- v /= 10;
- } while(v);
- if (value < 0) *p-- = '-';
- p++;
- l = 32-(p-buf);
- if (l+1 > len) l = len-1; /* Make sure it fits, including the nul term */
- memcpy(s,p,l);
- s[l] = '\0';
- return l;
-}
-
-static void redisLog(int level, const char *fmt, ...) {
- va_list ap;
- FILE *fp;
-
- fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
- if (!fp) return;
-
- va_start(ap, fmt);
- if (level >= server.verbosity) {
- char *c = ".-*#";
- char buf[64];
- time_t now;
-
- now = time(NULL);
- strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
- fprintf(fp,"[%d] %s %c ",(int)getpid(),buf,c[level]);
- vfprintf(fp, fmt, ap);
- fprintf(fp,"\n");
- fflush(fp);
- }
- va_end(ap);
-
- if (server.logfile) fclose(fp);
-}
-
-/*====================== Hash table type implementation ==================== */
-
-/* This is an hash table type that uses the SDS dynamic strings libary as
- * keys and radis objects as values (objects can hold SDS strings,
- * lists, sets). */
-
-static void dictVanillaFree(void *privdata, void *val)
-{
- DICT_NOTUSED(privdata);
- zfree(val);
-}
-
-static void dictListDestructor(void *privdata, void *val)
-{
- DICT_NOTUSED(privdata);
- listRelease((list*)val);
-}
-
-static int dictSdsKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
- int l1,l2;
- DICT_NOTUSED(privdata);
-
- l1 = sdslen((sds)key1);
- l2 = sdslen((sds)key2);
- if (l1 != l2) return 0;
- return memcmp(key1, key2, l1) == 0;
-}
-
-static void dictRedisObjectDestructor(void *privdata, void *val)
-{
- DICT_NOTUSED(privdata);
-
- if (val == NULL) return; /* Values of swapped out keys as set to NULL */
- decrRefCount(val);
-}
-
-static void dictSdsDestructor(void *privdata, void *val)
-{
- DICT_NOTUSED(privdata);
-
- sdsfree(val);
-}
-
-static int dictObjKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
- const robj *o1 = key1, *o2 = key2;
- return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
-}
-
-static unsigned int dictObjHash(const void *key) {
- const robj *o = key;
- return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
-}
-
-static unsigned int dictSdsHash(const void *key) {
- return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
-}
-
-static int dictEncObjKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
- robj *o1 = (robj*) key1, *o2 = (robj*) key2;
- int cmp;
-
- if (o1->encoding == REDIS_ENCODING_INT &&
- o2->encoding == REDIS_ENCODING_INT)
- return o1->ptr == o2->ptr;
-
- o1 = getDecodedObject(o1);
- o2 = getDecodedObject(o2);
- cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
- decrRefCount(o1);
- decrRefCount(o2);
- return cmp;
-}
-
-static unsigned int dictEncObjHash(const void *key) {
- robj *o = (robj*) key;
-
- if (o->encoding == REDIS_ENCODING_RAW) {
- return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
- } else {
- if (o->encoding == REDIS_ENCODING_INT) {
- char buf[32];
- int len;
-
- len = ll2string(buf,32,(long)o->ptr);
- return dictGenHashFunction((unsigned char*)buf, len);
- } else {
- unsigned int hash;
-
- o = getDecodedObject(o);
- hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
- decrRefCount(o);
- return hash;
- }
- }
-}
-
-/* Sets type */
-static dictType setDictType = {
- dictEncObjHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- NULL /* val destructor */
-};
-
-/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
-static dictType zsetDictType = {
- dictEncObjHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- dictVanillaFree /* val destructor of malloc(sizeof(double)) */
-};
-
-/* Db->dict, keys are sds strings, vals are Redis objects. */
-static dictType dbDictType = {
- dictSdsHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCompare, /* key compare */
- dictSdsDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
-};
-
-/* Db->expires */
-static dictType keyptrDictType = {
- dictSdsHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCompare, /* key compare */
- NULL, /* key destructor */
- NULL /* val destructor */
-};
-
-/* Hash type hash table (note that small hashes are represented with zimpaps) */
-static dictType hashDictType = {
- dictEncObjHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
-};
-
-/* Keylist hash table type has unencoded redis objects as keys and
- * lists as values. It's used for blocking operations (BLPOP) and to
- * map swapped keys to a list of clients waiting for this keys to be loaded. */
-static dictType keylistDictType = {
- dictObjHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- dictListDestructor /* val destructor */
-};
-
-static void version();
-
-/* ========================= Random utility functions ======================= */
-
-/* Redis generally does not try to recover from out of memory conditions
- * when allocating objects or strings, it is not clear if it will be possible
- * to report this condition to the client since the networking layer itself
- * is based on heap allocation for send buffers, so we simply abort.
- * At least the code will be simpler to read... */
-static void oom(const char *msg) {
- redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
- sleep(1);
- abort();
-}
-
-/* ====================== Redis server networking stuff ===================== */
-static void closeTimedoutClients(void) {
- redisClient *c;
- listNode *ln;
- time_t now = time(NULL);
- listIter li;
-
- listRewind(server.clients,&li);
- while ((ln = listNext(&li)) != NULL) {
- c = listNodeValue(ln);
- if (server.maxidletime &&
- !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
- !(c->flags & REDIS_MASTER) && /* no timeout for masters */
- dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
- listLength(c->pubsub_patterns) == 0 &&
- (now - c->lastinteraction > server.maxidletime))
- {
- redisLog(REDIS_VERBOSE,"Closing idle client");
- freeClient(c);
- } else if (c->flags & REDIS_BLOCKED) {
- if (c->blockingto != 0 && c->blockingto < now) {
- addReply(c,shared.nullmultibulk);
- unblockClientWaitingData(c);
- }
- }
- }
-}
-
-static int htNeedsResize(dict *dict) {
- long long size, used;
-
- size = dictSlots(dict);
- used = dictSize(dict);
- return (size && used && size > DICT_HT_INITIAL_SIZE &&
- (used*100/size < REDIS_HT_MINFILL));
-}
-
-/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
- * we resize the hash table to save memory */
-static void tryResizeHashTables(void) {
- int j;
-
- for (j = 0; j < server.dbnum; j++) {
- if (htNeedsResize(server.db[j].dict))
- dictResize(server.db[j].dict);
- if (htNeedsResize(server.db[j].expires))
- dictResize(server.db[j].expires);
- }
-}
-
-/* Our hash table implementation performs rehashing incrementally while
- * we write/read from the hash table. Still if the server is idle, the hash
- * table will use two tables for a long time. So we try to use 1 millisecond
- * of CPU time at every serverCron() loop in order to rehash some key. */
-static void incrementallyRehash(void) {
- int j;
-
- for (j = 0; j < server.dbnum; j++) {
- if (dictIsRehashing(server.db[j].dict)) {
- dictRehashMilliseconds(server.db[j].dict,1);
- break; /* already used our millisecond for this loop... */
- }
- }
-}
-
-/* A background saving child (BGSAVE) terminated its work. Handle this. */
-void backgroundSaveDoneHandler(int statloc) {
- int exitcode = WEXITSTATUS(statloc);
- int bysignal = WIFSIGNALED(statloc);
-
- if (!bysignal && exitcode == 0) {
- redisLog(REDIS_NOTICE,
- "Background saving terminated with success");
- server.dirty = 0;
- server.lastsave = time(NULL);
- } else if (!bysignal && exitcode != 0) {
- redisLog(REDIS_WARNING, "Background saving error");
- } else {
- redisLog(REDIS_WARNING,
- "Background saving terminated by signal %d", WTERMSIG(statloc));
- rdbRemoveTempFile(server.bgsavechildpid);
- }
- server.bgsavechildpid = -1;
- /* Possibly there are slaves waiting for a BGSAVE in order to be served
- * (the first stage of SYNC is a bulk transfer of dump.rdb) */
- updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
-}
-
-/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
- * Handle this. */
-void backgroundRewriteDoneHandler(int statloc) {
- int exitcode = WEXITSTATUS(statloc);
- int bysignal = WIFSIGNALED(statloc);
-
- if (!bysignal && exitcode == 0) {
- int fd;
- char tmpfile[256];
-
- redisLog(REDIS_NOTICE,
- "Background append only file rewriting terminated with success");
- /* Now it's time to flush the differences accumulated by the parent */
- snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
- fd = open(tmpfile,O_WRONLY|O_APPEND);
- if (fd == -1) {
- redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
- goto cleanup;
- }
- /* Flush our data... */
- if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
- (signed) sdslen(server.bgrewritebuf)) {
- redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
- close(fd);
- goto cleanup;
- }
- redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
- /* Now our work is to rename the temp file into the stable file. And
- * switch the file descriptor used by the server for append only. */
- if (rename(tmpfile,server.appendfilename) == -1) {
- redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
- close(fd);
- goto cleanup;
- }
- /* Mission completed... almost */
- redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
- if (server.appendfd != -1) {
- /* If append only is actually enabled... */
- close(server.appendfd);
- server.appendfd = fd;
- if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd);
- server.appendseldb = -1; /* Make sure it will issue SELECT */
- redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
- } else {
- /* If append only is disabled we just generate a dump in this
- * format. Why not? */
- close(fd);
- }
- } else if (!bysignal && exitcode != 0) {
- redisLog(REDIS_WARNING, "Background append only file rewriting error");
- } else {
- redisLog(REDIS_WARNING,
- "Background append only file rewriting terminated by signal %d",
- WTERMSIG(statloc));
- }
-cleanup:
- sdsfree(server.bgrewritebuf);
- server.bgrewritebuf = sdsempty();
- aofRemoveTempFile(server.bgrewritechildpid);
- server.bgrewritechildpid = -1;
-}
-
-/* This function is called once a background process of some kind terminates,
- * as we want to avoid resizing the hash tables when there is a child in order
- * to play well with copy-on-write (otherwise when a resize happens lots of
- * memory pages are copied). The goal of this function is to update the ability
- * for dict.c to resize the hash tables accordingly to the fact we have o not
- * running childs. */
-static void updateDictResizePolicy(void) {
- if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1)
- dictEnableResize();
- else
- dictDisableResize();
-}
-
-static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
- int j, loops = server.cronloops++;
- REDIS_NOTUSED(eventLoop);
- REDIS_NOTUSED(id);
- REDIS_NOTUSED(clientData);
-
- /* We take a cached value of the unix time in the global state because
- * with virtual memory and aging there is to store the current time
- * in objects at every object access, and accuracy is not needed.
- * To access a global var is faster than calling time(NULL) */
- server.unixtime = time(NULL);
- /* We have just 21 bits per object for LRU information.
- * So we use an (eventually wrapping) LRU clock with minutes resolution.
- *
- * When we need to select what object to swap, we compute the minimum
- * time distance between the current lruclock and the object last access
- * lruclock info. Even if clocks will wrap on overflow, there is
- * the interesting property that we are sure that at least
- * ABS(A-B) minutes passed between current time and timestamp B.
- *
- * This is not precise but we don't need at all precision, but just
- * something statistically reasonable.
- */
- server.lruclock = (time(NULL)/60)&((1<<21)-1);
-
- /* We received a SIGTERM, shutting down here in a safe way, as it is
- * not ok doing so inside the signal handler. */
- if (server.shutdown_asap) {
- if (prepareForShutdown() == REDIS_OK) exit(0);
- redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
- }
-
- /* Show some info about non-empty databases */
- for (j = 0; j < server.dbnum; j++) {
- long long size, used, vkeys;
-
- size = dictSlots(server.db[j].dict);
- used = dictSize(server.db[j].dict);
- vkeys = dictSize(server.db[j].expires);
- if (!(loops % 50) && (used || vkeys)) {
- redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
- /* dictPrintStats(server.dict); */
- }
- }
-
- /* We don't want to resize the hash tables while a bacground saving
- * is in progress: the saving child is created using fork() that is
- * implemented with a copy-on-write semantic in most modern systems, so
- * if we resize the HT while there is the saving child at work actually
- * a lot of memory movements in the parent will cause a lot of pages
- * copied. */
- if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {
- if (!(loops % 10)) tryResizeHashTables();
- if (server.activerehashing) incrementallyRehash();
- }
-
- /* Show information about connected clients */
- if (!(loops % 50)) {
- redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
- listLength(server.clients)-listLength(server.slaves),
- listLength(server.slaves),
- zmalloc_used_memory());
- }
-
- /* Close connections of timedout clients */
- if ((server.maxidletime && !(loops % 100)) || server.blpop_blocked_clients)
- closeTimedoutClients();
-
- /* Check if a background saving or AOF rewrite in progress terminated */
- if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
- int statloc;
- pid_t pid;
-
- if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
- if (pid == server.bgsavechildpid) {
- backgroundSaveDoneHandler(statloc);
- } else {
- backgroundRewriteDoneHandler(statloc);
- }
- updateDictResizePolicy();
- }
- } else {
- /* If there is not a background saving in progress check if
- * we have to save now */
- time_t now = time(NULL);
- for (j = 0; j < server.saveparamslen; j++) {
- struct saveparam *sp = server.saveparams+j;
-
- if (server.dirty >= sp->changes &&
- now-server.lastsave > sp->seconds) {
- redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
- sp->changes, sp->seconds);
- rdbSaveBackground(server.dbfilename);
- break;
- }
- }
- }
-
- /* Try to expire a few timed out keys. The algorithm used is adaptive and
- * will use few CPU cycles if there are few expiring keys, otherwise
- * it will get more aggressive to avoid that too much memory is used by
- * keys that can be removed from the keyspace. */
- for (j = 0; j < server.dbnum; j++) {
- int expired;
- redisDb *db = server.db+j;
-
- /* Continue to expire if at the end of the cycle more than 25%
- * of the keys were expired. */
- do {
- long num = dictSize(db->expires);
- time_t now = time(NULL);
-
- expired = 0;
- if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
- num = REDIS_EXPIRELOOKUPS_PER_CRON;
- while (num--) {
- dictEntry *de;
- time_t t;
-
- if ((de = dictGetRandomKey(db->expires)) == NULL) break;
- t = (time_t) dictGetEntryVal(de);
- if (now > t) {
- sds key = dictGetEntryKey(de);
- robj *keyobj = createStringObject(key,sdslen(key));
-
- dbDelete(db,keyobj);
- decrRefCount(keyobj);
- expired++;
- server.stat_expiredkeys++;
- }
- }
- } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
- }
-
- /* Swap a few keys on disk if we are over the memory limit and VM
- * is enbled. Try to free objects from the free list first. */
- if (vmCanSwapOut()) {
- while (server.vm_enabled && zmalloc_used_memory() >
- server.vm_max_memory)
- {
- int retval;
-
- if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
- retval = (server.vm_max_threads == 0) ?
- vmSwapOneObjectBlocking() :
- vmSwapOneObjectThreaded();
- if (retval == REDIS_ERR && !(loops % 300) &&
- zmalloc_used_memory() >
- (server.vm_max_memory+server.vm_max_memory/10))
- {
- redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
- }
- /* Note that when using threade I/O we free just one object,
- * because anyway when the I/O thread in charge to swap this
- * object out will finish, the handler of completed jobs
- * will try to swap more objects if we are still out of memory. */
- if (retval == REDIS_ERR || server.vm_max_threads > 0) break;
- }
- }
-
- /* Check if we should connect to a MASTER */
- if (server.replstate == REDIS_REPL_CONNECT && !(loops % 10)) {
- redisLog(REDIS_NOTICE,"Connecting to MASTER...");
- if (syncWithMaster() == REDIS_OK) {
- redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
- if (server.appendonly) rewriteAppendOnlyFileBackground();
- }
- }
- return 100;
-}
-
-/* This function gets called every time Redis is entering the
- * main loop of the event driven library, that is, before to sleep
- * for ready file descriptors. */
-static void beforeSleep(struct aeEventLoop *eventLoop) {
- REDIS_NOTUSED(eventLoop);
-
- /* Awake clients that got all the swapped keys they requested */
- if (server.vm_enabled && listLength(server.io_ready_clients)) {
- listIter li;
- listNode *ln;
-
- listRewind(server.io_ready_clients,&li);
- while((ln = listNext(&li))) {
- redisClient *c = ln->value;
- struct redisCommand *cmd;
-
- /* Resume the client. */
- listDelNode(server.io_ready_clients,ln);
- c->flags &= (~REDIS_IO_WAIT);
- server.vm_blocked_clients--;
- aeCreateFileEvent(server.el, c->fd, AE_READABLE,
- readQueryFromClient, c);
- cmd = lookupCommand(c->argv[0]->ptr);
- assert(cmd != NULL);
- call(c,cmd);
- resetClient(c);
- /* There may be more data to process in the input buffer. */
- if (c->querybuf && sdslen(c->querybuf) > 0)
- processInputBuffer(c);
- }
- }
- /* Write the AOF buffer on disk */
- flushAppendOnlyFile();
-}
-
-static void createSharedObjects(void) {
- int j;
-
- shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
- shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
- shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
- shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
- shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
- shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
- shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
- shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
- shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
- shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
- shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
- shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
- shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
- "-ERR Operation against a key holding the wrong kind of value\r\n"));
- shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
- "-ERR no such key\r\n"));
- shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
- "-ERR syntax error\r\n"));
- shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
- "-ERR source and destination objects are the same\r\n"));
- shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
- "-ERR index out of range\r\n"));
- shared.space = createObject(REDIS_STRING,sdsnew(" "));
- shared.colon = createObject(REDIS_STRING,sdsnew(":"));
- shared.plus = createObject(REDIS_STRING,sdsnew("+"));
- shared.select0 = createStringObject("select 0\r\n",10);
- shared.select1 = createStringObject("select 1\r\n",10);
- shared.select2 = createStringObject("select 2\r\n",10);
- shared.select3 = createStringObject("select 3\r\n",10);
- shared.select4 = createStringObject("select 4\r\n",10);
- shared.select5 = createStringObject("select 5\r\n",10);
- shared.select6 = createStringObject("select 6\r\n",10);
- shared.select7 = createStringObject("select 7\r\n",10);
- shared.select8 = createStringObject("select 8\r\n",10);
- shared.select9 = createStringObject("select 9\r\n",10);
- shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
- shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
- shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
- shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
- shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
- shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
- shared.mbulk3 = createStringObject("*3\r\n",4);
- shared.mbulk4 = createStringObject("*4\r\n",4);
- for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
- shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
- shared.integers[j]->encoding = REDIS_ENCODING_INT;
- }
-}
-
-static void appendServerSaveParams(time_t seconds, int changes) {
- server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
- server.saveparams[server.saveparamslen].seconds = seconds;
- server.saveparams[server.saveparamslen].changes = changes;
- server.saveparamslen++;
-}
-
-static void resetServerSaveParams() {
- zfree(server.saveparams);
- server.saveparams = NULL;
- server.saveparamslen = 0;
-}
-
-static void initServerConfig() {
- server.dbnum = REDIS_DEFAULT_DBNUM;
- server.port = REDIS_SERVERPORT;
- server.verbosity = REDIS_VERBOSE;
- server.maxidletime = REDIS_MAXIDLETIME;
- server.saveparams = NULL;
- server.logfile = NULL; /* NULL = log on standard output */
- server.bindaddr = NULL;
- server.glueoutputbuf = 1;
- server.daemonize = 0;
- server.appendonly = 0;
- server.appendfsync = APPENDFSYNC_EVERYSEC;
- server.no_appendfsync_on_rewrite = 0;
- server.lastfsync = time(NULL);
- server.appendfd = -1;
- server.appendseldb = -1; /* Make sure the first time will not match */
- server.pidfile = zstrdup("/var/run/redis.pid");
- server.dbfilename = zstrdup("dump.rdb");
- server.appendfilename = zstrdup("appendonly.aof");
- server.requirepass = NULL;
- server.rdbcompression = 1;
- server.activerehashing = 1;
- server.maxclients = 0;
- server.blpop_blocked_clients = 0;
- server.maxmemory = 0;
- server.vm_enabled = 0;
- server.vm_swap_file = zstrdup("/tmp/redis-%p.vm");
- server.vm_page_size = 256; /* 256 bytes per page */
- server.vm_pages = 1024*1024*100; /* 104 millions of pages */
- server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
- server.vm_max_threads = 4;
- server.vm_blocked_clients = 0;
- server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES;
- server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE;
- server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
- server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
- server.shutdown_asap = 0;
-
- resetServerSaveParams();
-
- appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
- appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
- appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
- /* Replication related */
- server.isslave = 0;
- server.masterauth = NULL;
- server.masterhost = NULL;
- server.masterport = 6379;
- server.master = NULL;
- server.replstate = REDIS_REPL_NONE;
-
- /* Double constants initialization */
- R_Zero = 0.0;
- R_PosInf = 1.0/R_Zero;
- R_NegInf = -1.0/R_Zero;
- R_Nan = R_Zero/R_Zero;
-}
-
-static void initServer() {
- int j;
-
- signal(SIGHUP, SIG_IGN);
- signal(SIGPIPE, SIG_IGN);
- setupSigSegvAction();
-
- server.devnull = fopen("/dev/null","w");
- if (server.devnull == NULL) {
- redisLog(REDIS_WARNING, "Can't open /dev/null: %s", server.neterr);
- exit(1);
- }
- server.clients = listCreate();
- server.slaves = listCreate();
- server.monitors = listCreate();
- server.objfreelist = listCreate();
- createSharedObjects();
- server.el = aeCreateEventLoop();
- server.db = zmalloc(sizeof(redisDb)*server.dbnum);
- server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
- if (server.fd == -1) {
- redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
- exit(1);
- }
- for (j = 0; j < server.dbnum; j++) {
- server.db[j].dict = dictCreate(&dbDictType,NULL);
- server.db[j].expires = dictCreate(&keyptrDictType,NULL);
- server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
- if (server.vm_enabled)
- server.db[j].io_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].id = j;
- }
- server.pubsub_channels = dictCreate(&keylistDictType,NULL);
- server.pubsub_patterns = listCreate();
- listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
- listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
- server.cronloops = 0;
- server.bgsavechildpid = -1;
- server.bgrewritechildpid = -1;
- server.bgrewritebuf = sdsempty();
- server.aofbuf = sdsempty();
- server.lastsave = time(NULL);
- server.dirty = 0;
- server.stat_numcommands = 0;
- server.stat_numconnections = 0;
- server.stat_expiredkeys = 0;
- server.stat_starttime = time(NULL);
- server.unixtime = time(NULL);
- aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
- if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
- acceptHandler, NULL) == AE_ERR) oom("creating file event");
-
- if (server.appendonly) {
- server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
- if (server.appendfd == -1) {
- redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
- strerror(errno));
- exit(1);
- }
- }
-
- if (server.vm_enabled) vmInit();
-}
-
-/* Empty the whole database */
-static long long emptyDb() {
- int j;
- long long removed = 0;
-
- for (j = 0; j < server.dbnum; j++) {
- removed += dictSize(server.db[j].dict);
- dictEmpty(server.db[j].dict);
- dictEmpty(server.db[j].expires);
- }
- return removed;
-}
-
-static int yesnotoi(char *s) {
- if (!strcasecmp(s,"yes")) return 1;
- else if (!strcasecmp(s,"no")) return 0;
- else return -1;
-}
-
-/* I agree, this is a very rudimental way to load a configuration...
- will improve later if the config gets more complex */
-static void loadServerConfig(char *filename) {
- FILE *fp;
- char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
- int linenum = 0;
- sds line = NULL;
-
- if (filename[0] == '-' && filename[1] == '\0')
- fp = stdin;
- else {
- if ((fp = fopen(filename,"r")) == NULL) {
- redisLog(REDIS_WARNING, "Fatal error, can't open config file '%s'", filename);
- exit(1);
- }
- }
-
- while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
- sds *argv;
- int argc, j;
-
- linenum++;
- line = sdsnew(buf);
- line = sdstrim(line," \t\r\n");
-
- /* Skip comments and blank lines*/
- if (line[0] == '#' || line[0] == '\0') {
- sdsfree(line);
- continue;
- }
-
- /* Split into arguments */
- argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
- sdstolower(argv[0]);
-
- /* Execute config directives */
- if (!strcasecmp(argv[0],"timeout") && argc == 2) {
- server.maxidletime = atoi(argv[1]);
- if (server.maxidletime < 0) {
- err = "Invalid timeout value"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"port") && argc == 2) {
- server.port = atoi(argv[1]);
- if (server.port < 1 || server.port > 65535) {
- err = "Invalid port"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
- server.bindaddr = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"save") && argc == 3) {
- int seconds = atoi(argv[1]);
- int changes = atoi(argv[2]);
- if (seconds < 1 || changes < 0) {
- err = "Invalid save parameters"; goto loaderr;
- }
- appendServerSaveParams(seconds,changes);
- } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
- if (chdir(argv[1]) == -1) {
- redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
- argv[1], strerror(errno));
- exit(1);
- }
- } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
- if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
- else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
- else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
- else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
- else {
- err = "Invalid log level. Must be one of debug, notice, warning";
- goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
- FILE *logfp;
-
- server.logfile = zstrdup(argv[1]);
- if (!strcasecmp(server.logfile,"stdout")) {
- zfree(server.logfile);
- server.logfile = NULL;
- }
- if (server.logfile) {
- /* Test if we are able to open the file. The server will not
- * be able to abort just for this problem later... */
- logfp = fopen(server.logfile,"a");
- if (logfp == NULL) {
- err = sdscatprintf(sdsempty(),
- "Can't open the log file: %s", strerror(errno));
- goto loaderr;
- }
- fclose(logfp);
- }
- } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
- server.dbnum = atoi(argv[1]);
- if (server.dbnum < 1) {
- err = "Invalid number of databases"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"include") && argc == 2) {
- loadServerConfig(argv[1]);
- } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
- server.maxclients = atoi(argv[1]);
- } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
- server.maxmemory = memtoll(argv[1],NULL);
- } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
- server.masterhost = sdsnew(argv[1]);
- server.masterport = atoi(argv[2]);
- server.replstate = REDIS_REPL_CONNECT;
- } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
- server.masterauth = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
- if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
- if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) {
- if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
- if ((server.daemonize = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
- if ((server.appendonly = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
- zfree(server.appendfilename);
- server.appendfilename = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite")
- && argc == 2) {
- if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
- if (!strcasecmp(argv[1],"no")) {
- server.appendfsync = APPENDFSYNC_NO;
- } else if (!strcasecmp(argv[1],"always")) {
- server.appendfsync = APPENDFSYNC_ALWAYS;
- } else if (!strcasecmp(argv[1],"everysec")) {
- server.appendfsync = APPENDFSYNC_EVERYSEC;
- } else {
- err = "argument must be 'no', 'always' or 'everysec'";
- goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
- server.requirepass = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
- zfree(server.pidfile);
- server.pidfile = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
- zfree(server.dbfilename);
- server.dbfilename = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
- if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
- err = "argument must be 'yes' or 'no'"; goto loaderr;
- }
- } else if (!strcasecmp(argv[0],"vm-swap-file") && argc == 2) {
- zfree(server.vm_swap_file);
- server.vm_swap_file = zstrdup(argv[1]);
- } else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
- server.vm_max_memory = memtoll(argv[1],NULL);
- } else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
- server.vm_page_size = memtoll(argv[1], NULL);
- } else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
- server.vm_pages = memtoll(argv[1], NULL);
- } else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
- server.vm_max_threads = strtoll(argv[1], NULL, 10);
- } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2){
- server.hash_max_zipmap_entries = memtoll(argv[1], NULL);
- } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2){
- server.hash_max_zipmap_value = memtoll(argv[1], NULL);
- } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
- server.list_max_ziplist_entries = memtoll(argv[1], NULL);
- } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2){
- server.list_max_ziplist_value = memtoll(argv[1], NULL);
- } else {
- err = "Bad directive or wrong number of arguments"; goto loaderr;
- }
- for (j = 0; j < argc; j++)
- sdsfree(argv[j]);
- zfree(argv);
- sdsfree(line);
- }
- if (fp != stdin) fclose(fp);
- return;
-
-loaderr:
- fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
- fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
- fprintf(stderr, ">>> '%s'\n", line);
- fprintf(stderr, "%s\n", err);
- exit(1);
-}
-
-static void freeClientArgv(redisClient *c) {
- int j;
-
- for (j = 0; j < c->argc; j++)
- decrRefCount(c->argv[j]);
- for (j = 0; j < c->mbargc; j++)
- decrRefCount(c->mbargv[j]);
- c->argc = 0;
- c->mbargc = 0;
-}
-
-static void freeClient(redisClient *c) {
- listNode *ln;
-
- /* Note that if the client we are freeing is blocked into a blocking
- * call, we have to set querybuf to NULL *before* to call
- * unblockClientWaitingData() to avoid processInputBuffer() will get
- * called. Also it is important to remove the file events after
- * this, because this call adds the READABLE event. */
- sdsfree(c->querybuf);
- c->querybuf = NULL;
- if (c->flags & REDIS_BLOCKED)
- unblockClientWaitingData(c);
-
- /* UNWATCH all the keys */
- unwatchAllKeys(c);
- listRelease(c->watched_keys);
- /* Unsubscribe from all the pubsub channels */
- pubsubUnsubscribeAllChannels(c,0);
- pubsubUnsubscribeAllPatterns(c,0);
- dictRelease(c->pubsub_channels);
- listRelease(c->pubsub_patterns);
- /* Obvious cleanup */
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- listRelease(c->reply);
- freeClientArgv(c);
- close(c->fd);
- /* Remove from the list of clients */
- ln = listSearchKey(server.clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.clients,ln);
- /* Remove from the list of clients that are now ready to be restarted
- * after waiting for swapped keys */
- if (c->flags & REDIS_IO_WAIT && listLength(c->io_keys) == 0) {
- ln = listSearchKey(server.io_ready_clients,c);
- if (ln) {
- listDelNode(server.io_ready_clients,ln);
- server.vm_blocked_clients--;
- }
- }
- /* Remove from the list of clients waiting for swapped keys */
- while (server.vm_enabled && listLength(c->io_keys)) {
- ln = listFirst(c->io_keys);
- dontWaitForSwappedKey(c,ln->value);
- }
- listRelease(c->io_keys);
- /* Master/slave cleanup */
- if (c->flags & REDIS_SLAVE) {
- if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
- close(c->repldbfd);
- list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
- ln = listSearchKey(l,c);
- redisAssert(ln != NULL);
- listDelNode(l,ln);
- }
- if (c->flags & REDIS_MASTER) {
- server.master = NULL;
- server.replstate = REDIS_REPL_CONNECT;
- }
- /* Release memory */
- zfree(c->argv);
- zfree(c->mbargv);
- freeClientMultiState(c);
- zfree(c);
-}
-
-#define GLUEREPLY_UP_TO (1024)
-static void glueReplyBuffersIfNeeded(redisClient *c) {
- int copylen = 0;
- char buf[GLUEREPLY_UP_TO];
- listNode *ln;
- listIter li;
- robj *o;
-
- listRewind(c->reply,&li);
- while((ln = listNext(&li))) {
- int objlen;
-
- o = ln->value;
- objlen = sdslen(o->ptr);
- if (copylen + objlen <= GLUEREPLY_UP_TO) {
- memcpy(buf+copylen,o->ptr,objlen);
- copylen += objlen;
- listDelNode(c->reply,ln);
- } else {
- if (copylen == 0) return;
- break;
- }
- }
- /* Now the output buffer is empty, add the new single element */
- o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
- listAddNodeHead(c->reply,o);
-}
-
-static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen;
- robj *o;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- /* Use writev() if we have enough buffers to send */
- if (!server.glueoutputbuf &&
- listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
- !(c->flags & REDIS_MASTER))
- {
- sendReplyToClientWritev(el, fd, privdata, mask);
- return;
- }
-
- while(listLength(c->reply)) {
- if (server.glueoutputbuf && listLength(c->reply) > 1)
- glueReplyBuffersIfNeeded(c);
-
- o = listNodeValue(listFirst(c->reply));
- objlen = sdslen(o->ptr);
-
- if (objlen == 0) {
- listDelNode(c->reply,listFirst(c->reply));
- continue;
- }
-
- if (c->flags & REDIS_MASTER) {
- /* Don't reply to a master */
- nwritten = objlen - c->sentlen;
- } else {
- nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
- if (nwritten <= 0) break;
- }
- c->sentlen += nwritten;
- totwritten += nwritten;
- /* If we fully sent the object on head go to the next one */
- if (c->sentlen == objlen) {
- listDelNode(c->reply,listFirst(c->reply));
- c->sentlen = 0;
- }
- /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
- * bytes, in a single threaded server it's a good idea to serve
- * other clients as well, even if a very large request comes from
- * super fast link that is always able to accept data (in real world
- * scenario think about 'KEYS *' against the loopback interfae) */
- if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
- }
- if (nwritten == -1) {
- if (errno == EAGAIN) {
- nwritten = 0;
- } else {
- redisLog(REDIS_VERBOSE,
- "Error writing to client: %s", strerror(errno));
- freeClient(c);
- return;
- }
- }
- if (totwritten > 0) c->lastinteraction = time(NULL);
- if (listLength(c->reply) == 0) {
- c->sentlen = 0;
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- }
-}
-
-static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
-{
- redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen, willwrite;
- robj *o;
- struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
- int offset, ion = 0;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- listNode *node;
- while (listLength(c->reply)) {
- offset = c->sentlen;
- ion = 0;
- willwrite = 0;
-
- /* fill-in the iov[] array */
- for(node = listFirst(c->reply); node; node = listNextNode(node)) {
- o = listNodeValue(node);
- objlen = sdslen(o->ptr);
-
- if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
- break;
-
- if(ion == REDIS_WRITEV_IOVEC_COUNT)
- break; /* no more iovecs */
-
- iov[ion].iov_base = ((char*)o->ptr) + offset;
- iov[ion].iov_len = objlen - offset;
- willwrite += objlen - offset;
- offset = 0; /* just for the first item */
- ion++;
- }
-
- if(willwrite == 0)
- break;
-
- /* write all collected blocks at once */
- if((nwritten = writev(fd, iov, ion)) < 0) {
- if (errno != EAGAIN) {
- redisLog(REDIS_VERBOSE,
- "Error writing to client: %s", strerror(errno));
- freeClient(c);
- return;
- }
- break;
- }
-
- totwritten += nwritten;
- offset = c->sentlen;
-
- /* remove written robjs from c->reply */
- while (nwritten && listLength(c->reply)) {
- o = listNodeValue(listFirst(c->reply));
- objlen = sdslen(o->ptr);
-
- if(nwritten >= objlen - offset) {
- listDelNode(c->reply, listFirst(c->reply));
- nwritten -= objlen - offset;
- c->sentlen = 0;
- } else {
- /* partial write */
- c->sentlen += nwritten;
- break;
- }
- offset = 0;
- }
- }
-
- if (totwritten > 0)
- c->lastinteraction = time(NULL);
-
- if (listLength(c->reply) == 0) {
- c->sentlen = 0;
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- }
-}
-
-static int qsortRedisCommands(const void *r1, const void *r2) {
- return strcasecmp(
- ((struct redisCommand*)r1)->name,
- ((struct redisCommand*)r2)->name);
-}
-
-static void sortCommandTable() {
- /* Copy and sort the read-only version of the command table */
- commandTable = (struct redisCommand*)malloc(sizeof(readonlyCommandTable));
- memcpy(commandTable,readonlyCommandTable,sizeof(readonlyCommandTable));
- qsort(commandTable,
- sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
- sizeof(struct redisCommand),qsortRedisCommands);
-}
-
-static struct redisCommand *lookupCommand(char *name) {
- struct redisCommand tmp = {name,NULL,0,0,NULL,0,0,0};
- return bsearch(
- &tmp,
- commandTable,
- sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
- sizeof(struct redisCommand),
- qsortRedisCommands);
-}
-
-/* resetClient prepare the client to process the next command */
-static void resetClient(redisClient *c) {
- freeClientArgv(c);
- c->bulklen = -1;
- c->multibulk = 0;
-}
-
-/* Call() is the core of Redis execution of a command */
-static void call(redisClient *c, struct redisCommand *cmd) {
- long long dirty;
-
- dirty = server.dirty;
- cmd->proc(c);
- dirty = server.dirty-dirty;
-
- if (server.appendonly && dirty)
- feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
- if ((dirty || cmd->flags & REDIS_CMD_FORCE_REPLICATION) &&
- listLength(server.slaves))
- replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc);
- if (listLength(server.monitors))
- replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc);
- server.stat_numcommands++;
-}
-
-/* If this function gets called we already read a whole
- * command, argments are in the client argv/argc fields.
- * processCommand() execute the command or prepare the
- * server for a bulk read from the client.
- *
- * If 1 is returned the client is still alive and valid and
- * and other operations can be performed by the caller. Otherwise
- * if 0 is returned the client was destroied (i.e. after QUIT). */
-static int processCommand(redisClient *c) {
- struct redisCommand *cmd;
-
- /* Free some memory if needed (maxmemory setting) */
- if (server.maxmemory) freeMemoryIfNeeded();
-
- /* Handle the multi bulk command type. This is an alternative protocol
- * supported by Redis in order to receive commands that are composed of
- * multiple binary-safe "bulk" arguments. The latency of processing is
- * a bit higher but this allows things like multi-sets, so if this
- * protocol is used only for MSET and similar commands this is a big win. */
- if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
- c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
- if (c->multibulk <= 0) {
- resetClient(c);
- return 1;
- } else {
- decrRefCount(c->argv[c->argc-1]);
- c->argc--;
- return 1;
- }
- } else if (c->multibulk) {
- if (c->bulklen == -1) {
- if (((char*)c->argv[0]->ptr)[0] != '$') {
- addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
- resetClient(c);
- return 1;
- } else {
- int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
- decrRefCount(c->argv[0]);
- if (bulklen < 0 || bulklen > 1024*1024*1024) {
- c->argc--;
- addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
- resetClient(c);
- return 1;
- }
- c->argc--;
- c->bulklen = bulklen+2; /* add two bytes for CR+LF */
- return 1;
- }
- } else {
- c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
- c->mbargv[c->mbargc] = c->argv[0];
- c->mbargc++;
- c->argc--;
- c->multibulk--;
- if (c->multibulk == 0) {
- robj **auxargv;
- int auxargc;
-
- /* Here we need to swap the multi-bulk argc/argv with the
- * normal argc/argv of the client structure. */
- auxargv = c->argv;
- c->argv = c->mbargv;
- c->mbargv = auxargv;
-
- auxargc = c->argc;
- c->argc = c->mbargc;
- c->mbargc = auxargc;
-
- /* We need to set bulklen to something different than -1
- * in order for the code below to process the command without
- * to try to read the last argument of a bulk command as
- * a special argument. */
- c->bulklen = 0;
- /* continue below and process the command */
- } else {
- c->bulklen = -1;
- return 1;
- }
- }
- }
- /* -- end of multi bulk commands processing -- */
-
- /* The QUIT command is handled as a special case. Normal command
- * procs are unable to close the client connection safely */
- if (!strcasecmp(c->argv[0]->ptr,"quit")) {
- freeClient(c);
- return 0;
- }
-
- /* Now lookup the command and check ASAP about trivial error conditions
- * such wrong arity, bad command name and so forth. */
- cmd = lookupCommand(c->argv[0]->ptr);
- if (!cmd) {
- addReplySds(c,
- sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
- (char*)c->argv[0]->ptr));
- resetClient(c);
- return 1;
- } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
- (c->argc < -cmd->arity)) {
- addReplySds(c,
- sdscatprintf(sdsempty(),
- "-ERR wrong number of arguments for '%s' command\r\n",
- cmd->name));
- resetClient(c);
- return 1;
- } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
- /* This is a bulk command, we have to read the last argument yet. */
- int bulklen = atoi(c->argv[c->argc-1]->ptr);
-
- decrRefCount(c->argv[c->argc-1]);
- if (bulklen < 0 || bulklen > 1024*1024*1024) {
- c->argc--;
- addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
- resetClient(c);
- return 1;
- }
- c->argc--;
- c->bulklen = bulklen+2; /* add two bytes for CR+LF */
- /* It is possible that the bulk read is already in the
- * buffer. Check this condition and handle it accordingly.
- * This is just a fast path, alternative to call processInputBuffer().
- * It's a good idea since the code is small and this condition
- * happens most of the times. */
- if ((signed)sdslen(c->querybuf) >= c->bulklen) {
- c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
- c->argc++;
- c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
- } else {
- /* Otherwise return... there is to read the last argument
- * from the socket. */
- return 1;
- }
- }
- /* Let's try to encode the bulk object to save space. */
- if (cmd->flags & REDIS_CMD_BULK)
- c->argv[c->argc-1] = tryObjectEncoding(c->argv[c->argc-1]);
-
- /* Check if the user is authenticated */
- if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
- addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
- resetClient(c);
- return 1;
- }
-
- /* Handle the maxmemory directive */
- if (server.maxmemory && (cmd->flags & REDIS_CMD_DENYOOM) &&
- zmalloc_used_memory() > server.maxmemory)
- {
- addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
- resetClient(c);
- return 1;
- }
-
- /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
- if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
- &&
- cmd->proc != subscribeCommand && cmd->proc != unsubscribeCommand &&
- cmd->proc != psubscribeCommand && cmd->proc != punsubscribeCommand) {
- addReplySds(c,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
- resetClient(c);
- return 1;
- }
-
- /* Exec the command */
- if (c->flags & REDIS_MULTI &&
- cmd->proc != execCommand && cmd->proc != discardCommand &&
- cmd->proc != multiCommand && cmd->proc != watchCommand)
- {
- queueMultiCommand(c,cmd);
- addReply(c,shared.queued);
- } else {
- if (server.vm_enabled && server.vm_max_threads > 0 &&
- blockClientOnSwappedKeys(c,cmd)) return 1;
- call(c,cmd);
- }
-
- /* Prepare the client for the next command */
- resetClient(c);
- return 1;
-}
-
-static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
- listNode *ln;
- listIter li;
- int outc = 0, j;
- robj **outv;
- /* We need 1+(ARGS*3) objects since commands are using the new protocol
- * and we one 1 object for the first "*<count>\r\n" multibulk count, then
- * for every additional object we have "$<count>\r\n" + object + "\r\n". */
- robj *static_outv[REDIS_STATIC_ARGS*3+1];
- robj *lenobj;
-
- if (argc <= REDIS_STATIC_ARGS) {
- outv = static_outv;
- } else {
- outv = zmalloc(sizeof(robj*)*(argc*3+1));
- }
-
- lenobj = createObject(REDIS_STRING,
- sdscatprintf(sdsempty(), "*%d\r\n", argc));
- lenobj->refcount = 0;
- outv[outc++] = lenobj;
- for (j = 0; j < argc; j++) {
- lenobj = createObject(REDIS_STRING,
- sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long) stringObjectLen(argv[j])));
- lenobj->refcount = 0;
- outv[outc++] = lenobj;
- outv[outc++] = argv[j];
- outv[outc++] = shared.crlf;
- }
-
- /* Increment all the refcounts at start and decrement at end in order to
- * be sure to free objects if there is no slave in a replication state
- * able to be feed with commands */
- for (j = 0; j < outc; j++) incrRefCount(outv[j]);
- listRewind(slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
-
- /* Don't feed slaves that are still waiting for BGSAVE to start */
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
-
- /* Feed all the other slaves, MONITORs and so on */
- if (slave->slaveseldb != dictid) {
- robj *selectcmd;
-
- switch(dictid) {
- case 0: selectcmd = shared.select0; break;
- case 1: selectcmd = shared.select1; break;
- case 2: selectcmd = shared.select2; break;
- case 3: selectcmd = shared.select3; break;
- case 4: selectcmd = shared.select4; break;
- case 5: selectcmd = shared.select5; break;
- case 6: selectcmd = shared.select6; break;
- case 7: selectcmd = shared.select7; break;
- case 8: selectcmd = shared.select8; break;
- case 9: selectcmd = shared.select9; break;
- default:
- selectcmd = createObject(REDIS_STRING,
- sdscatprintf(sdsempty(),"select %d\r\n",dictid));
- selectcmd->refcount = 0;
- break;
- }
- addReply(slave,selectcmd);
- slave->slaveseldb = dictid;
- }
- for (j = 0; j < outc; j++) addReply(slave,outv[j]);
- }
- for (j = 0; j < outc; j++) decrRefCount(outv[j]);
- if (outv != static_outv) zfree(outv);
-}
-
-static sds sdscatrepr(sds s, char *p, size_t len) {
- s = sdscatlen(s,"\"",1);
- while(len--) {
- switch(*p) {
- case '\\':
- case '"':
- s = sdscatprintf(s,"\\%c",*p);
- break;
- case '\n': s = sdscatlen(s,"\\n",1); break;
- case '\r': s = sdscatlen(s,"\\r",1); break;
- case '\t': s = sdscatlen(s,"\\t",1); break;
- case '\a': s = sdscatlen(s,"\\a",1); break;
- case '\b': s = sdscatlen(s,"\\b",1); break;
- default:
- if (isprint(*p))
- s = sdscatprintf(s,"%c",*p);
- else
- s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
- break;
- }
- p++;
- }
- return sdscatlen(s,"\"",1);
-}
-
-static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) {
- listNode *ln;
- listIter li;
- int j;
- sds cmdrepr = sdsnew("+");
- robj *cmdobj;
- struct timeval tv;
-
- gettimeofday(&tv,NULL);
- cmdrepr = sdscatprintf(cmdrepr,"%ld.%ld ",(long)tv.tv_sec,(long)tv.tv_usec);
- if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid);
-
- for (j = 0; j < argc; j++) {
- if (argv[j]->encoding == REDIS_ENCODING_INT) {
- cmdrepr = sdscatprintf(cmdrepr, "%ld", (long)argv[j]->ptr);
- } else {
- cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
- sdslen(argv[j]->ptr));
- }
- if (j != argc-1)
- cmdrepr = sdscatlen(cmdrepr," ",1);
- }
- cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
- cmdobj = createObject(REDIS_STRING,cmdrepr);
-
- listRewind(monitors,&li);
- while((ln = listNext(&li))) {
- redisClient *monitor = ln->value;
- addReply(monitor,cmdobj);
- }
- decrRefCount(cmdobj);
-}
-
-static void processInputBuffer(redisClient *c) {
-again:
- /* Before to process the input buffer, make sure the client is not
- * waitig for a blocking operation such as BLPOP. Note that the first
- * iteration the client is never blocked, otherwise the processInputBuffer
- * would not be called at all, but after the execution of the first commands
- * in the input buffer the client may be blocked, and the "goto again"
- * will try to reiterate. The following line will make it return asap. */
- if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
- if (c->bulklen == -1) {
- /* Read the first line of the query */
- char *p = strchr(c->querybuf,'\n');
- size_t querylen;
-
- if (p) {
- sds query, *argv;
- int argc, j;
-
- query = c->querybuf;
- c->querybuf = sdsempty();
- querylen = 1+(p-(query));
- if (sdslen(query) > querylen) {
- /* leave data after the first line of the query in the buffer */
- c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
- }
- *p = '\0'; /* remove "\n" */
- if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
- sdsupdatelen(query);
-
- /* Now we can split the query in arguments */
- argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
- sdsfree(query);
-
- if (c->argv) zfree(c->argv);
- c->argv = zmalloc(sizeof(robj*)*argc);
-
- for (j = 0; j < argc; j++) {
- if (sdslen(argv[j])) {
- c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
- c->argc++;
- } else {
- sdsfree(argv[j]);
- }
- }
- zfree(argv);
- if (c->argc) {
- /* Execute the command. If the client is still valid
- * after processCommand() return and there is something
- * on the query buffer try to process the next command. */
- if (processCommand(c) && sdslen(c->querybuf)) goto again;
- } else {
- /* Nothing to process, argc == 0. Just process the query
- * buffer if it's not empty or return to the caller */
- if (sdslen(c->querybuf)) goto again;
- }
- return;
- } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
- redisLog(REDIS_VERBOSE, "Client protocol error");
- freeClient(c);
- return;
- }
- } else {
- /* Bulk read handling. Note that if we are at this point
- the client already sent a command terminated with a newline,
- we are reading the bulk data that is actually the last
- argument of the command. */
- int qbl = sdslen(c->querybuf);
-
- if (c->bulklen <= qbl) {
- /* Copy everything but the final CRLF as final argument */
- c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
- c->argc++;
- c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
- /* Process the command. If the client is still valid after
- * the processing and there is more data in the buffer
- * try to parse it. */
- if (processCommand(c) && sdslen(c->querybuf)) goto again;
- return;
- }
- }
-}
-
-static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = (redisClient*) privdata;
- char buf[REDIS_IOBUF_LEN];
- int nread;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- nread = read(fd, buf, REDIS_IOBUF_LEN);
- if (nread == -1) {
- if (errno == EAGAIN) {
- nread = 0;
- } else {
- redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
- freeClient(c);
- return;
- }
- } else if (nread == 0) {
- redisLog(REDIS_VERBOSE, "Client closed connection");
- freeClient(c);
- return;
- }
- if (nread) {
- c->querybuf = sdscatlen(c->querybuf, buf, nread);
- c->lastinteraction = time(NULL);
- } else {
- return;
- }
- processInputBuffer(c);
-}
-
-static int selectDb(redisClient *c, int id) {
- if (id < 0 || id >= server.dbnum)
- return REDIS_ERR;
- c->db = &server.db[id];
- return REDIS_OK;
-}
-
-static void *dupClientReplyValue(void *o) {
- incrRefCount((robj*)o);
- return o;
-}
-
-static int listMatchObjects(void *a, void *b) {
- return equalStringObjects(a,b);
-}
-
-static redisClient *createClient(int fd) {
- redisClient *c = zmalloc(sizeof(*c));
-
- anetNonBlock(NULL,fd);
- anetTcpNoDelay(NULL,fd);
- if (!c) return NULL;
- selectDb(c,0);
- c->fd = fd;
- c->querybuf = sdsempty();
- c->argc = 0;
- c->argv = NULL;
- c->bulklen = -1;
- c->multibulk = 0;
- c->mbargc = 0;
- c->mbargv = NULL;
- c->sentlen = 0;
- c->flags = 0;
- c->lastinteraction = time(NULL);
- c->authenticated = 0;
- c->replstate = REDIS_REPL_NONE;
- c->reply = listCreate();
- listSetFreeMethod(c->reply,decrRefCount);
- listSetDupMethod(c->reply,dupClientReplyValue);
- c->blocking_keys = NULL;
- c->blocking_keys_num = 0;
- c->io_keys = listCreate();
- c->watched_keys = listCreate();
- listSetFreeMethod(c->io_keys,decrRefCount);
- c->pubsub_channels = dictCreate(&setDictType,NULL);
- c->pubsub_patterns = listCreate();
- listSetFreeMethod(c->pubsub_patterns,decrRefCount);
- listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
- if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
- readQueryFromClient, c) == AE_ERR) {
- freeClient(c);
- return NULL;
- }
- listAddNodeTail(server.clients,c);
- initClientMultiState(c);
- return c;
-}
-
-static void addReply(redisClient *c, robj *obj) {
- if (listLength(c->reply) == 0 &&
- (c->replstate == REDIS_REPL_NONE ||
- c->replstate == REDIS_REPL_ONLINE) &&
- aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c) == AE_ERR) return;
-
- if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
- obj = dupStringObject(obj);
- obj->refcount = 0; /* getDecodedObject() will increment the refcount */
- }
- listAddNodeTail(c->reply,getDecodedObject(obj));
-}
-
-static void addReplySds(redisClient *c, sds s) {
- robj *o = createObject(REDIS_STRING,s);
- addReply(c,o);
- decrRefCount(o);
-}
-
-static void addReplyDouble(redisClient *c, double d) {
- char buf[128];
-
- snprintf(buf,sizeof(buf),"%.17g",d);
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
- (unsigned long) strlen(buf),buf));
-}
-
-static void addReplyLongLong(redisClient *c, long long ll) {
- char buf[128];
- size_t len;
-
- if (ll == 0) {
- addReply(c,shared.czero);
- return;
- } else if (ll == 1) {
- addReply(c,shared.cone);
- return;
- }
- buf[0] = ':';
- len = ll2string(buf+1,sizeof(buf)-1,ll);
- buf[len+1] = '\r';
- buf[len+2] = '\n';
- addReplySds(c,sdsnewlen(buf,len+3));
-}
-
-static void addReplyUlong(redisClient *c, unsigned long ul) {
- char buf[128];
- size_t len;
-
- if (ul == 0) {
- addReply(c,shared.czero);
- return;
- } else if (ul == 1) {
- addReply(c,shared.cone);
- return;
- }
- len = snprintf(buf,sizeof(buf),":%lu\r\n",ul);
- addReplySds(c,sdsnewlen(buf,len));
-}
-
-static void addReplyBulkLen(redisClient *c, robj *obj) {
- size_t len, intlen;
- char buf[128];
-
- if (obj->encoding == REDIS_ENCODING_RAW) {
- len = sdslen(obj->ptr);
- } else {
- long n = (long)obj->ptr;
-
- /* Compute how many bytes will take this integer as a radix 10 string */
- len = 1;
- if (n < 0) {
- len++;
- n = -n;
- }
- while((n = n/10) != 0) {
- len++;
- }
- }
- buf[0] = '$';
- intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len);
- buf[intlen+1] = '\r';
- buf[intlen+2] = '\n';
- addReplySds(c,sdsnewlen(buf,intlen+3));
-}
-
-static void addReplyBulk(redisClient *c, robj *obj) {
- addReplyBulkLen(c,obj);
- addReply(c,obj);
- addReply(c,shared.crlf);
-}
-
-static void addReplyBulkSds(redisClient *c, sds s) {
- robj *o = createStringObject(s, sdslen(s));
- addReplyBulk(c,o);
- decrRefCount(o);
-}
-
-/* In the CONFIG command we need to add vanilla C string as bulk replies */
-static void addReplyBulkCString(redisClient *c, char *s) {
- if (s == NULL) {
- addReply(c,shared.nullbulk);
- } else {
- robj *o = createStringObject(s,strlen(s));
- addReplyBulk(c,o);
- decrRefCount(o);
- }
-}
-
-static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
- int cport, cfd;
- char cip[128];
- redisClient *c;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
-
- cfd = anetAccept(server.neterr, fd, cip, &cport);
- if (cfd == AE_ERR) {
- redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
- return;
- }
- redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
- if ((c = createClient(cfd)) == NULL) {
- redisLog(REDIS_WARNING,"Error allocating resoures for the client");
- close(cfd); /* May be already closed, just ingore errors */
- return;
- }
- /* If maxclient directive is set and this is one client more... close the
- * connection. Note that we create the client instead to check before
- * for this condition, since now the socket is already set in nonblocking
- * mode and we can send an error for free using the Kernel I/O */
- if (server.maxclients && listLength(server.clients) > server.maxclients) {
- char *err = "-ERR max number of clients reached\r\n";
-
- /* That's a best effort error message, don't check write errors */
- if (write(c->fd,err,strlen(err)) == -1) {
- /* Nothing to do, Just to avoid the warning... */
- }
- freeClient(c);
- return;
- }
- server.stat_numconnections++;
-}
-
-/* ======================= Redis objects implementation ===================== */
-
-static robj *createObject(int type, void *ptr) {
- robj *o;
-
- if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
- if (listLength(server.objfreelist)) {
- listNode *head = listFirst(server.objfreelist);
- o = listNodeValue(head);
- listDelNode(server.objfreelist,head);
- if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
- } else {
- if (server.vm_enabled)
- pthread_mutex_unlock(&server.obj_freelist_mutex);
- o = zmalloc(sizeof(*o));
- }
- o->type = type;
- o->encoding = REDIS_ENCODING_RAW;
- o->ptr = ptr;
- o->refcount = 1;
- if (server.vm_enabled) {
- /* Note that this code may run in the context of an I/O thread
- * and accessing server.lruclock in theory is an error
- * (no locks). But in practice this is safe, and even if we read
- * garbage Redis will not fail. */
- o->lru = server.lruclock;
- o->storage = REDIS_VM_MEMORY;
- }
- return o;
-}
-
-static robj *createStringObject(char *ptr, size_t len) {
- return createObject(REDIS_STRING,sdsnewlen(ptr,len));
-}
-
-static robj *createStringObjectFromLongLong(long long value) {
- robj *o;
- if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
- incrRefCount(shared.integers[value]);
- o = shared.integers[value];
- } else {
- if (value >= LONG_MIN && value <= LONG_MAX) {
- o = createObject(REDIS_STRING, NULL);
- o->encoding = REDIS_ENCODING_INT;
- o->ptr = (void*)((long)value);
- } else {
- o = createObject(REDIS_STRING,sdsfromlonglong(value));
- }
- }
- return o;
-}
-
-static robj *dupStringObject(robj *o) {
- assert(o->encoding == REDIS_ENCODING_RAW);
- return createStringObject(o->ptr,sdslen(o->ptr));
-}
-
-static robj *createListObject(void) {
- list *l = listCreate();
- robj *o = createObject(REDIS_LIST,l);
- listSetFreeMethod(l,decrRefCount);
- o->encoding = REDIS_ENCODING_LINKEDLIST;
- return o;
-}
-
-static robj *createZiplistObject(void) {
- unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_LIST,zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
- return o;
-}
-
-static robj *createSetObject(void) {
- dict *d = dictCreate(&setDictType,NULL);
- return createObject(REDIS_SET,d);
-}
-
-static robj *createHashObject(void) {
- /* All the Hashes start as zipmaps. Will be automatically converted
- * into hash tables if there are enough elements or big elements
- * inside. */
- unsigned char *zm = zipmapNew();
- robj *o = createObject(REDIS_HASH,zm);
- o->encoding = REDIS_ENCODING_ZIPMAP;
- return o;
-}
-
-static robj *createZsetObject(void) {
- zset *zs = zmalloc(sizeof(*zs));
-
- zs->dict = dictCreate(&zsetDictType,NULL);
- zs->zsl = zslCreate();
- return createObject(REDIS_ZSET,zs);
-}
-
-static void freeStringObject(robj *o) {
- if (o->encoding == REDIS_ENCODING_RAW) {
- sdsfree(o->ptr);
- }
-}
-
-static void freeListObject(robj *o) {
- switch (o->encoding) {
- case REDIS_ENCODING_LINKEDLIST:
- listRelease((list*) o->ptr);
- break;
- case REDIS_ENCODING_ZIPLIST:
- zfree(o->ptr);
- break;
- default:
- redisPanic("Unknown list encoding type");
- }
-}
-
-static void freeSetObject(robj *o) {
- dictRelease((dict*) o->ptr);
-}
-
-static void freeZsetObject(robj *o) {
- zset *zs = o->ptr;
-
- dictRelease(zs->dict);
- zslFree(zs->zsl);
- zfree(zs);
-}
-
-static void freeHashObject(robj *o) {
- switch (o->encoding) {
- case REDIS_ENCODING_HT:
- dictRelease((dict*) o->ptr);
- break;
- case REDIS_ENCODING_ZIPMAP:
- zfree(o->ptr);
- break;
- default:
- redisPanic("Unknown hash encoding type");
- break;
- }
-}
-
-static void incrRefCount(robj *o) {
- o->refcount++;
-}
-
-static void decrRefCount(void *obj) {
- robj *o = obj;
-
- /* Object is a swapped out value, or in the process of being loaded. */
- if (server.vm_enabled &&
- (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING))
- {
- vmpointer *vp = obj;
- if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o);
- vmMarkPagesFree(vp->page,vp->usedpages);
- server.vm_stats_swapped_objects--;
- zfree(vp);
- return;
- }
-
- if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
- /* Object is in memory, or in the process of being swapped out.
- *
- * If the object is being swapped out, abort the operation on
- * decrRefCount even if the refcount does not drop to 0: the object
- * is referenced at least two times, as value of the key AND as
- * job->val in the iojob. So if we don't invalidate the iojob, when it is
- * done but the relevant key was removed in the meantime, the
- * complete jobs handler will not find the key about the job and the
- * assert will fail. */
- if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING)
- vmCancelThreadedIOJob(o);
- if (--(o->refcount) == 0) {
- switch(o->type) {
- case REDIS_STRING: freeStringObject(o); break;
- case REDIS_LIST: freeListObject(o); break;
- case REDIS_SET: freeSetObject(o); break;
- case REDIS_ZSET: freeZsetObject(o); break;
- case REDIS_HASH: freeHashObject(o); break;
- default: redisPanic("Unknown object type"); break;
- }
- if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
- if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
- !listAddNodeHead(server.objfreelist,o))
- zfree(o);
- if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
- }
-}
-
-static int checkType(redisClient *c, robj *o, int type) {
- if (o->type != type) {
- addReply(c,shared.wrongtypeerr);
- return 1;
- }
- return 0;
-}
-
-/* Check if the nul-terminated string 's' can be represented by a long
- * (that is, is a number that fits into long without any other space or
- * character before or after the digits).
- *
- * If so, the function returns REDIS_OK and *longval is set to the value
- * of the number. Otherwise REDIS_ERR is returned */
-static int isStringRepresentableAsLong(sds s, long *longval) {
- char buf[32], *endptr;
- long value;
- int slen;
-
- value = strtol(s, &endptr, 10);
- if (endptr[0] != '\0') return REDIS_ERR;
- slen = ll2string(buf,32,value);
-
- /* If the number converted back into a string is not identical
- * then it's not possible to encode the string as integer */
- if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
- if (longval) *longval = value;
- return REDIS_OK;
-}
-
-/* Try to encode a string object in order to save space */
-static robj *tryObjectEncoding(robj *o) {
- long value;
- sds s = o->ptr;
-
- if (o->encoding != REDIS_ENCODING_RAW)
- return o; /* Already encoded */
-
- /* It's not safe to encode shared objects: shared objects can be shared
- * everywhere in the "object space" of Redis. Encoded objects can only
- * appear as "values" (and not, for instance, as keys) */
- if (o->refcount > 1) return o;
-
- /* Currently we try to encode only strings */
- redisAssert(o->type == REDIS_STRING);
-
- /* Check if we can represent this string as a long integer */
- if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return o;
-
- /* Ok, this object can be encoded */
- if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
- decrRefCount(o);
- incrRefCount(shared.integers[value]);
- return shared.integers[value];
- } else {
- o->encoding = REDIS_ENCODING_INT;
- sdsfree(o->ptr);
- o->ptr = (void*) value;
- return o;
- }
-}
-
-/* Get a decoded version of an encoded object (returned as a new object).
- * If the object is already raw-encoded just increment the ref count. */
-static robj *getDecodedObject(robj *o) {
- robj *dec;
-
- if (o->encoding == REDIS_ENCODING_RAW) {
- incrRefCount(o);
- return o;
- }
- if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
- char buf[32];
-
- ll2string(buf,32,(long)o->ptr);
- dec = createStringObject(buf,strlen(buf));
- return dec;
- } else {
- redisPanic("Unknown encoding type");
- }
-}
-
-/* Compare two string objects via strcmp() or alike.
- * Note that the objects may be integer-encoded. In such a case we
- * use ll2string() to get a string representation of the numbers on the stack
- * and compare the strings, it's much faster than calling getDecodedObject().
- *
- * Important note: if objects are not integer encoded, but binary-safe strings,
- * sdscmp() from sds.c will apply memcmp() so this function ca be considered
- * binary safe. */
-static int compareStringObjects(robj *a, robj *b) {
- redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
- char bufa[128], bufb[128], *astr, *bstr;
- int bothsds = 1;
-
- if (a == b) return 0;
- if (a->encoding != REDIS_ENCODING_RAW) {
- ll2string(bufa,sizeof(bufa),(long) a->ptr);
- astr = bufa;
- bothsds = 0;
- } else {
- astr = a->ptr;
- }
- if (b->encoding != REDIS_ENCODING_RAW) {
- ll2string(bufb,sizeof(bufb),(long) b->ptr);
- bstr = bufb;
- bothsds = 0;
- } else {
- bstr = b->ptr;
- }
- return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
-}
-
-/* Equal string objects return 1 if the two objects are the same from the
- * point of view of a string comparison, otherwise 0 is returned. Note that
- * this function is faster then checking for (compareStringObject(a,b) == 0)
- * because it can perform some more optimization. */
-static int equalStringObjects(robj *a, robj *b) {
- if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){
- return a->ptr == b->ptr;
- } else {
- return compareStringObjects(a,b) == 0;
- }
-}
-
-static size_t stringObjectLen(robj *o) {
- redisAssert(o->type == REDIS_STRING);
- if (o->encoding == REDIS_ENCODING_RAW) {
- return sdslen(o->ptr);
- } else {
- char buf[32];
-
- return ll2string(buf,32,(long)o->ptr);
- }
-}
-
-static int getDoubleFromObject(robj *o, double *target) {
- double value;
- char *eptr;
-
- if (o == NULL) {
- value = 0;
- } else {
- redisAssert(o->type == REDIS_STRING);
- if (o->encoding == REDIS_ENCODING_RAW) {
- value = strtod(o->ptr, &eptr);
- if (eptr[0] != '\0') return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
- value = (long)o->ptr;
- } else {
- redisPanic("Unknown string encoding");
- }
- }
-
- *target = value;
- return REDIS_OK;
-}
-
-static int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
- double value;
- if (getDoubleFromObject(o, &value) != REDIS_OK) {
- if (msg != NULL) {
- addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
- } else {
- addReplySds(c, sdsnew("-ERR value is not a double\r\n"));
- }
- return REDIS_ERR;
- }
-
- *target = value;
- return REDIS_OK;
-}
-
-static int getLongLongFromObject(robj *o, long long *target) {
- long long value;
- char *eptr;
-
- if (o == NULL) {
- value = 0;
- } else {
- redisAssert(o->type == REDIS_STRING);
- if (o->encoding == REDIS_ENCODING_RAW) {
- value = strtoll(o->ptr, &eptr, 10);
- if (eptr[0] != '\0') return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
- value = (long)o->ptr;
- } else {
- redisPanic("Unknown string encoding");
- }
- }
-
- *target = value;
- return REDIS_OK;
-}
-
-static int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
- long long value;
- if (getLongLongFromObject(o, &value) != REDIS_OK) {
- if (msg != NULL) {
- addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
- } else {
- addReplySds(c, sdsnew("-ERR value is not an integer\r\n"));
- }
- return REDIS_ERR;
- }
-
- *target = value;
- return REDIS_OK;
-}
-
-static int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
- long long value;
-
- if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
- if (value < LONG_MIN || value > LONG_MAX) {
- if (msg != NULL) {
- addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
- } else {
- addReplySds(c, sdsnew("-ERR value is out of range\r\n"));
- }
- return REDIS_ERR;
- }
-
- *target = value;
- return REDIS_OK;
-}
-
-/* =========================== Keyspace access API ========================== */
-
-static robj *lookupKey(redisDb *db, robj *key) {
- dictEntry *de = dictFind(db->dict,key->ptr);
- if (de) {
- robj *val = dictGetEntryVal(de);
-
- if (server.vm_enabled) {
- if (val->storage == REDIS_VM_MEMORY ||
- val->storage == REDIS_VM_SWAPPING)
- {
- /* If we were swapping the object out, cancel the operation */
- if (val->storage == REDIS_VM_SWAPPING)
- vmCancelThreadedIOJob(val);
- /* Update the access time for the aging algorithm. */
- val->lru = server.lruclock;
- } else {
- int notify = (val->storage == REDIS_VM_LOADING);
-
- /* Our value was swapped on disk. Bring it at home. */
- redisAssert(val->type == REDIS_VMPOINTER);
- val = vmLoadObject(val);
- dictGetEntryVal(de) = val;
-
- /* Clients blocked by the VM subsystem may be waiting for
- * this key... */
- if (notify) handleClientsBlockedOnSwappedKey(db,key);
- }
- }
- return val;
- } else {
- return NULL;
- }
-}
-
-static robj *lookupKeyRead(redisDb *db, robj *key) {
- expireIfNeeded(db,key);
- return lookupKey(db,key);
-}
-
-static robj *lookupKeyWrite(redisDb *db, robj *key) {
- deleteIfVolatile(db,key);
- touchWatchedKey(db,key);
- return lookupKey(db,key);
-}
-
-static robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
- robj *o = lookupKeyRead(c->db, key);
- if (!o) addReply(c,reply);
- return o;
-}
-
-static robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
- robj *o = lookupKeyWrite(c->db, key);
- if (!o) addReply(c,reply);
- return o;
-}
-
-/* Add the key to the DB. If the key already exists REDIS_ERR is returned,
- * otherwise REDIS_OK is returned, and the caller should increment the
- * refcount of 'val'. */
-static int dbAdd(redisDb *db, robj *key, robj *val) {
- /* Perform a lookup before adding the key, as we need to copy the
- * key value. */
- if (dictFind(db->dict, key->ptr) != NULL) {
- return REDIS_ERR;
- } else {
- sds copy = sdsdup(key->ptr);
- dictAdd(db->dict, copy, val);
- return REDIS_OK;
- }
-}
-
-/* If the key does not exist, this is just like dbAdd(). Otherwise
- * the value associated to the key is replaced with the new one.
- *
- * On update (key already existed) 0 is returned. Otherwise 1. */
-static int dbReplace(redisDb *db, robj *key, robj *val) {
- if (dictFind(db->dict,key->ptr) == NULL) {
- sds copy = sdsdup(key->ptr);
- dictAdd(db->dict, copy, val);
- return 1;
- } else {
- dictReplace(db->dict, key->ptr, val);
- return 0;
- }
-}
-
-static int dbExists(redisDb *db, robj *key) {
- return dictFind(db->dict,key->ptr) != NULL;
-}
-
-/* Return a random key, in form of a Redis object.
- * If there are no keys, NULL is returned.
- *
- * The function makes sure to return keys not already expired. */
-static robj *dbRandomKey(redisDb *db) {
- struct dictEntry *de;
-
- while(1) {
- sds key;
- robj *keyobj;
-
- de = dictGetRandomKey(db->dict);
- if (de == NULL) return NULL;
-
- key = dictGetEntryKey(de);
- keyobj = createStringObject(key,sdslen(key));
- if (dictFind(db->expires,key)) {
- if (expireIfNeeded(db,keyobj)) {
- decrRefCount(keyobj);
- continue; /* search for another key. This expired. */
- }
- }
- return keyobj;
- }
-}
-
-/* Delete a key, value, and associated expiration entry if any, from the DB */
-static int dbDelete(redisDb *db, robj *key) {
- /* Deleting an entry from the expires dict will not free the sds of
- * the key, because it is shared with the main dictionary. */
- if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
- return dictDelete(db->dict,key->ptr) == DICT_OK;
-}
-
-/*============================ RDB saving/loading =========================== */
-
-static int rdbSaveType(FILE *fp, unsigned char type) {
- if (fwrite(&type,1,1,fp) == 0) return -1;
- return 0;
-}
-
-static int rdbSaveTime(FILE *fp, time_t t) {
- int32_t t32 = (int32_t) t;
- if (fwrite(&t32,4,1,fp) == 0) return -1;
- return 0;
-}
-
-/* check rdbLoadLen() comments for more info */
-static int rdbSaveLen(FILE *fp, uint32_t len) {
- unsigned char buf[2];
-
- if (len < (1<<6)) {
- /* Save a 6 bit len */
- buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
- if (fwrite(buf,1,1,fp) == 0) return -1;
- } else if (len < (1<<14)) {
- /* Save a 14 bit len */
- buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
- buf[1] = len&0xFF;
- if (fwrite(buf,2,1,fp) == 0) return -1;
- } else {
- /* Save a 32 bit len */
- buf[0] = (REDIS_RDB_32BITLEN<<6);
- if (fwrite(buf,1,1,fp) == 0) return -1;
- len = htonl(len);
- if (fwrite(&len,4,1,fp) == 0) return -1;
- }
- return 0;
-}
-
-/* Encode 'value' as an integer if possible (if integer will fit the
- * supported range). If the function sucessful encoded the integer
- * then the (up to 5 bytes) encoded representation is written in the
- * string pointed by 'enc' and the length is returned. Otherwise
- * 0 is returned. */
-static int rdbEncodeInteger(long long value, unsigned char *enc) {
- /* Finally check if it fits in our ranges */
- if (value >= -(1<<7) && value <= (1<<7)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
- enc[1] = value&0xFF;
- return 2;
- } else if (value >= -(1<<15) && value <= (1<<15)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
- enc[1] = value&0xFF;
- enc[2] = (value>>8)&0xFF;
- return 3;
- } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
- enc[1] = value&0xFF;
- enc[2] = (value>>8)&0xFF;
- enc[3] = (value>>16)&0xFF;
- enc[4] = (value>>24)&0xFF;
- return 5;
- } else {
- return 0;
- }
-}
-
-/* String objects in the form "2391" "-100" without any space and with a
- * range of values that can fit in an 8, 16 or 32 bit signed value can be
- * encoded as integers to save space */
-static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
- long long value;
- char *endptr, buf[32];
-
- /* Check if it's possible to encode this value as a number */
- value = strtoll(s, &endptr, 10);
- if (endptr[0] != '\0') return 0;
- ll2string(buf,32,value);
-
- /* If the number converted back into a string is not identical
- * then it's not possible to encode the string as integer */
- if (strlen(buf) != len || memcmp(buf,s,len)) return 0;
-
- return rdbEncodeInteger(value,enc);
-}
-
-static int rdbSaveLzfStringObject(FILE *fp, unsigned char *s, size_t len) {
- size_t comprlen, outlen;
- unsigned char byte;
- void *out;
-
- /* We require at least four bytes compression for this to be worth it */
- if (len <= 4) return 0;
- outlen = len-4;
- if ((out = zmalloc(outlen+1)) == NULL) return 0;
- comprlen = lzf_compress(s, len, out, outlen);
- if (comprlen == 0) {
- zfree(out);
- return 0;
- }
- /* Data compressed! Let's save it on disk */
- byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
- if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
- if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
- if (rdbSaveLen(fp,len) == -1) goto writeerr;
- if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
- zfree(out);
- return comprlen;
-
-writeerr:
- zfree(out);
- return -1;
-}
-
-/* Save a string objet as [len][data] on disk. If the object is a string
- * representation of an integer value we try to safe it in a special form */
-static int rdbSaveRawString(FILE *fp, unsigned char *s, size_t len) {
- int enclen;
-
- /* Try integer encoding */
- if (len <= 11) {
- unsigned char buf[5];
- if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
- if (fwrite(buf,enclen,1,fp) == 0) return -1;
- return 0;
- }
- }
-
- /* Try LZF compression - under 20 bytes it's unable to compress even
- * aaaaaaaaaaaaaaaaaa so skip it */
- if (server.rdbcompression && len > 20) {
- int retval;
-
- retval = rdbSaveLzfStringObject(fp,s,len);
- if (retval == -1) return -1;
- if (retval > 0) return 0;
- /* retval == 0 means data can't be compressed, save the old way */
- }
-
- /* Store verbatim */
- if (rdbSaveLen(fp,len) == -1) return -1;
- if (len && fwrite(s,len,1,fp) == 0) return -1;
- return 0;
-}
-
-/* Save a long long value as either an encoded string or a string. */
-static int rdbSaveLongLongAsStringObject(FILE *fp, long long value) {
- unsigned char buf[32];
- int enclen = rdbEncodeInteger(value,buf);
- if (enclen > 0) {
- if (fwrite(buf,enclen,1,fp) == 0) return -1;
- } else {
- /* Encode as string */
- enclen = ll2string((char*)buf,32,value);
- redisAssert(enclen < 32);
- if (rdbSaveLen(fp,enclen) == -1) return -1;
- if (fwrite(buf,enclen,1,fp) == 0) return -1;
- }
- return 0;
-}
-
-/* Like rdbSaveStringObjectRaw() but handle encoded objects */
-static int rdbSaveStringObject(FILE *fp, robj *obj) {
- /* Avoid to decode the object, then encode it again, if the
- * object is alrady integer encoded. */
- if (obj->encoding == REDIS_ENCODING_INT) {
- return rdbSaveLongLongAsStringObject(fp,(long)obj->ptr);
- } else {
- redisAssert(obj->encoding == REDIS_ENCODING_RAW);
- return rdbSaveRawString(fp,obj->ptr,sdslen(obj->ptr));
- }
-}
-
-/* Save a double value. Doubles are saved as strings prefixed by an unsigned
- * 8 bit integer specifing the length of the representation.
- * This 8 bit integer has special values in order to specify the following
- * conditions:
- * 253: not a number
- * 254: + inf
- * 255: - inf
- */
-static int rdbSaveDoubleValue(FILE *fp, double val) {
- unsigned char buf[128];
- int len;
-
- if (isnan(val)) {
- buf[0] = 253;
- len = 1;
- } else if (!isfinite(val)) {
- len = 1;
- buf[0] = (val < 0) ? 255 : 254;
- } else {
-#if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
- /* Check if the float is in a safe range to be casted into a
- * long long. We are assuming that long long is 64 bit here.
- * Also we are assuming that there are no implementations around where
- * double has precision < 52 bit.
- *
- * Under this assumptions we test if a double is inside an interval
- * where casting to long long is safe. Then using two castings we
- * make sure the decimal part is zero. If all this is true we use
- * integer printing function that is much faster. */
- double min = -4503599627370495; /* (2^52)-1 */
- double max = 4503599627370496; /* -(2^52) */
- if (val > min && val < max && val == ((double)((long long)val)))
- ll2string((char*)buf+1,sizeof(buf),(long long)val);
- else
-#endif
- snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
- buf[0] = strlen((char*)buf+1);
- len = buf[0]+1;
- }
- if (fwrite(buf,len,1,fp) == 0) return -1;
- return 0;
-}
-
-/* Save a Redis object. */
-static int rdbSaveObject(FILE *fp, robj *o) {
- if (o->type == REDIS_STRING) {
- /* Save a string value */
- if (rdbSaveStringObject(fp,o) == -1) return -1;
- } else if (o->type == REDIS_LIST) {
- /* Save a list value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
-
- if (rdbSaveLen(fp,ziplistLen(o->ptr)) == -1) return -1;
- p = ziplistIndex(o->ptr,0);
- while(ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- if (rdbSaveRawString(fp,vstr,vlen) == -1)
- return -1;
- } else {
- if (rdbSaveLongLongAsStringObject(fp,vlong) == -1)
- return -1;
- }
- p = ziplistNext(o->ptr,p);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listIter li;
- listNode *ln;
-
- if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
- if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
- }
- } else {
- redisPanic("Unknown list encoding");
- }
- } else if (o->type == REDIS_SET) {
- /* Save a set value */
- dict *set = o->ptr;
- dictIterator *di = dictGetIterator(set);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_ZSET) {
- /* Save a set value */
- zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
- double *score = dictGetEntryVal(de);
-
- if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
- if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_HASH) {
- /* Save a hash value */
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- unsigned char *p = zipmapRewind(o->ptr);
- unsigned int count = zipmapLen(o->ptr);
- unsigned char *key, *val;
- unsigned int klen, vlen;
-
- if (rdbSaveLen(fp,count) == -1) return -1;
- while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
- if (rdbSaveRawString(fp,key,klen) == -1) return -1;
- if (rdbSaveRawString(fp,val,vlen) == -1) return -1;
- }
- } else {
- dictIterator *di = dictGetIterator(o->ptr);
- dictEntry *de;
-
- if (rdbSaveLen(fp,dictSize((dict*)o->ptr)) == -1) return -1;
- while((de = dictNext(di)) != NULL) {
- robj *key = dictGetEntryKey(de);
- robj *val = dictGetEntryVal(de);
-
- if (rdbSaveStringObject(fp,key) == -1) return -1;
- if (rdbSaveStringObject(fp,val) == -1) return -1;
- }
- dictReleaseIterator(di);
- }
- } else {
- redisPanic("Unknown object type");
- }
- return 0;
-}
-
-/* Return the length the object will have on disk if saved with
- * the rdbSaveObject() function. Currently we use a trick to get
- * this length with very little changes to the code. In the future
- * we could switch to a faster solution. */
-static off_t rdbSavedObjectLen(robj *o, FILE *fp) {
- if (fp == NULL) fp = server.devnull;
- rewind(fp);
- assert(rdbSaveObject(fp,o) != 1);
- return ftello(fp);
-}
-
-/* Return the number of pages required to save this object in the swap file */
-static off_t rdbSavedObjectPages(robj *o, FILE *fp) {
- off_t bytes = rdbSavedObjectLen(o,fp);
-
- return (bytes+(server.vm_page_size-1))/server.vm_page_size;
-}
-
-/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
-static int rdbSave(char *filename) {
- dictIterator *di = NULL;
- dictEntry *de;
- FILE *fp;
- char tmpfile[256];
- int j;
- time_t now = time(NULL);
-
- /* Wait for I/O therads to terminate, just in case this is a
- * foreground-saving, to avoid seeking the swap file descriptor at the
- * same time. */
- if (server.vm_enabled)
- waitEmptyIOJobsQueue();
-
- snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
- fp = fopen(tmpfile,"w");
- if (!fp) {
- redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
- return REDIS_ERR;
- }
- if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
- for (j = 0; j < server.dbnum; j++) {
- redisDb *db = server.db+j;
- dict *d = db->dict;
- if (dictSize(d) == 0) continue;
- di = dictGetIterator(d);
- if (!di) {
- fclose(fp);
- return REDIS_ERR;
- }
-
- /* Write the SELECT DB opcode */
- if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
- if (rdbSaveLen(fp,j) == -1) goto werr;
-
- /* Iterate this DB writing every entry */
- while((de = dictNext(di)) != NULL) {
- sds keystr = dictGetEntryKey(de);
- robj key, *o = dictGetEntryVal(de);
- time_t expiretime;
-
- initStaticStringObject(key,keystr);
- expiretime = getExpire(db,&key);
-
- /* Save the expire time */
- if (expiretime != -1) {
- /* If this key is already expired skip it */
- if (expiretime < now) continue;
- if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
- if (rdbSaveTime(fp,expiretime) == -1) goto werr;
- }
- /* Save the key and associated value. This requires special
- * handling if the value is swapped out. */
- if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
- o->storage == REDIS_VM_SWAPPING) {
- /* Save type, key, value */
- if (rdbSaveType(fp,o->type) == -1) goto werr;
- if (rdbSaveStringObject(fp,&key) == -1) goto werr;
- if (rdbSaveObject(fp,o) == -1) goto werr;
- } else {
- /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
- robj *po;
- /* Get a preview of the object in memory */
- po = vmPreviewObject(o);
- /* Save type, key, value */
- if (rdbSaveType(fp,po->type) == -1) goto werr;
- if (rdbSaveStringObject(fp,&key) == -1) goto werr;
- if (rdbSaveObject(fp,po) == -1) goto werr;
- /* Remove the loaded object from memory */
- decrRefCount(po);
- }
- }
- dictReleaseIterator(di);
- }
- /* EOF opcode */
- if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
-
- /* Make sure data will not remain on the OS's output buffers */
- fflush(fp);
- fsync(fileno(fp));
- fclose(fp);
-
- /* Use RENAME to make sure the DB file is changed atomically only
- * if the generate DB file is ok. */
- if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
- unlink(tmpfile);
- return REDIS_ERR;
- }
- redisLog(REDIS_NOTICE,"DB saved on disk");
- server.dirty = 0;
- server.lastsave = time(NULL);
- return REDIS_OK;
-
-werr:
- fclose(fp);
- unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
- if (di) dictReleaseIterator(di);
- return REDIS_ERR;
-}
-
-static int rdbSaveBackground(char *filename) {
- pid_t childpid;
-
- if (server.bgsavechildpid != -1) return REDIS_ERR;
- if (server.vm_enabled) waitEmptyIOJobsQueue();
- if ((childpid = fork()) == 0) {
- /* Child */
- if (server.vm_enabled) vmReopenSwapFile();
- close(server.fd);
- if (rdbSave(filename) == REDIS_OK) {
- _exit(0);
- } else {
- _exit(1);
- }
- } else {
- /* Parent */
- if (childpid == -1) {
- redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
- server.bgsavechildpid = childpid;
- updateDictResizePolicy();
- return REDIS_OK;
- }
- return REDIS_OK; /* unreached */
-}
-
-static void rdbRemoveTempFile(pid_t childpid) {
- char tmpfile[256];
-
- snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
- unlink(tmpfile);
-}
-
-static int rdbLoadType(FILE *fp) {
- unsigned char type;
- if (fread(&type,1,1,fp) == 0) return -1;
- return type;
-}
-
-static time_t rdbLoadTime(FILE *fp) {
- int32_t t32;
- if (fread(&t32,4,1,fp) == 0) return -1;
- return (time_t) t32;
-}
-
-/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
- * of this file for a description of how this are stored on disk.
- *
- * isencoded is set to 1 if the readed length is not actually a length but
- * an "encoding type", check the above comments for more info */
-static uint32_t rdbLoadLen(FILE *fp, int *isencoded) {
- unsigned char buf[2];
- uint32_t len;
- int type;
-
- if (isencoded) *isencoded = 0;
- if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
- type = (buf[0]&0xC0)>>6;
- if (type == REDIS_RDB_6BITLEN) {
- /* Read a 6 bit len */
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_ENCVAL) {
- /* Read a 6 bit len encoding type */
- if (isencoded) *isencoded = 1;
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_14BITLEN) {
- /* Read a 14 bit len */
- if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
- return ((buf[0]&0x3F)<<8)|buf[1];
- } else {
- /* Read a 32 bit len */
- if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
- return ntohl(len);
- }
-}
-
-/* Load an integer-encoded object from file 'fp', with the specified
- * encoding type 'enctype'. If encode is true the function may return
- * an integer-encoded object as reply, otherwise the returned object
- * will always be encoded as a raw string. */
-static robj *rdbLoadIntegerObject(FILE *fp, int enctype, int encode) {
- unsigned char enc[4];
- long long val;
-
- if (enctype == REDIS_RDB_ENC_INT8) {
- if (fread(enc,1,1,fp) == 0) return NULL;
- val = (signed char)enc[0];
- } else if (enctype == REDIS_RDB_ENC_INT16) {
- uint16_t v;
- if (fread(enc,2,1,fp) == 0) return NULL;
- v = enc[0]|(enc[1]<<8);
- val = (int16_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT32) {
- uint32_t v;
- if (fread(enc,4,1,fp) == 0) return NULL;
- v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
- val = (int32_t)v;
- } else {
- val = 0; /* anti-warning */
- redisPanic("Unknown RDB integer encoding type");
- }
- if (encode)
- return createStringObjectFromLongLong(val);
- else
- return createObject(REDIS_STRING,sdsfromlonglong(val));
-}
-
-static robj *rdbLoadLzfStringObject(FILE*fp) {
- unsigned int len, clen;
- unsigned char *c = NULL;
- sds val = NULL;
-
- if ((clen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((c = zmalloc(clen)) == NULL) goto err;
- if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
- if (fread(c,clen,1,fp) == 0) goto err;
- if (lzf_decompress(c,clen,val,len) == 0) goto err;
- zfree(c);
- return createObject(REDIS_STRING,val);
-err:
- zfree(c);
- sdsfree(val);
- return NULL;
-}
-
-static robj *rdbGenericLoadStringObject(FILE*fp, int encode) {
- int isencoded;
- uint32_t len;
- sds val;
-
- len = rdbLoadLen(fp,&isencoded);
- if (isencoded) {
- switch(len) {
- case REDIS_RDB_ENC_INT8:
- case REDIS_RDB_ENC_INT16:
- case REDIS_RDB_ENC_INT32:
- return rdbLoadIntegerObject(fp,len,encode);
- case REDIS_RDB_ENC_LZF:
- return rdbLoadLzfStringObject(fp);
- default:
- redisPanic("Unknown RDB encoding type");
- }
- }
-
- if (len == REDIS_RDB_LENERR) return NULL;
- val = sdsnewlen(NULL,len);
- if (len && fread(val,len,1,fp) == 0) {
- sdsfree(val);
- return NULL;
- }
- return createObject(REDIS_STRING,val);
-}
-
-static robj *rdbLoadStringObject(FILE *fp) {
- return rdbGenericLoadStringObject(fp,0);
-}
-
-static robj *rdbLoadEncodedStringObject(FILE *fp) {
- return rdbGenericLoadStringObject(fp,1);
-}
-
-/* For information about double serialization check rdbSaveDoubleValue() */
-static int rdbLoadDoubleValue(FILE *fp, double *val) {
- char buf[128];
- unsigned char len;
-
- if (fread(&len,1,1,fp) == 0) return -1;
- switch(len) {
- case 255: *val = R_NegInf; return 0;
- case 254: *val = R_PosInf; return 0;
- case 253: *val = R_Nan; return 0;
- default:
- if (fread(buf,len,1,fp) == 0) return -1;
- buf[len] = '\0';
- sscanf(buf, "%lg", val);
- return 0;
- }
-}
-
-/* Load a Redis object of the specified type from the specified file.
- * On success a newly allocated object is returned, otherwise NULL. */
-static robj *rdbLoadObject(int type, FILE *fp) {
- robj *o, *ele, *dec;
- size_t len;
-
- redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",type,ftell(fp));
- if (type == REDIS_STRING) {
- /* Read string value */
- if ((o = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
- o = tryObjectEncoding(o);
- } else if (type == REDIS_LIST) {
- /* Read list value */
- if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
-
- /* Use a real list when there are too many entries */
- if (len > server.list_max_ziplist_entries) {
- o = createListObject();
- } else {
- o = createZiplistObject();
- }
-
- /* Load every single element of the list */
- while(len--) {
- if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
-
- /* If we are using a ziplist and the value is too big, convert
- * the object to a real list. */
- if (o->encoding == REDIS_ENCODING_ZIPLIST &&
- ele->encoding == REDIS_ENCODING_RAW &&
- sdslen(ele->ptr) > server.list_max_ziplist_value)
- listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
-
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- dec = getDecodedObject(ele);
- o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
- decrRefCount(dec);
- decrRefCount(ele);
- } else {
- ele = tryObjectEncoding(ele);
- listAddNodeTail(o->ptr,ele);
- }
- }
- } else if (type == REDIS_SET) {
- /* Read list/set value */
- if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
- o = createSetObject();
- /* It's faster to expand the dict to the right size asap in order
- * to avoid rehashing */
- if (len > DICT_HT_INITIAL_SIZE)
- dictExpand(o->ptr,len);
- /* Load every single element of the list/set */
- while(len--) {
- if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
- dictAdd((dict*)o->ptr,ele,NULL);
- }
- } else if (type == REDIS_ZSET) {
- /* Read list/set value */
- size_t zsetlen;
- zset *zs;
-
- if ((zsetlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
- o = createZsetObject();
- zs = o->ptr;
- /* Load every single element of the list/set */
- while(zsetlen--) {
- robj *ele;
- double *score = zmalloc(sizeof(double));
-
- if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
- if (rdbLoadDoubleValue(fp,score) == -1) return NULL;
- dictAdd(zs->dict,ele,score);
- zslInsert(zs->zsl,*score,ele);
- incrRefCount(ele); /* added to skiplist */
- }
- } else if (type == REDIS_HASH) {
- size_t hashlen;
-
- if ((hashlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
- o = createHashObject();
- /* Too many entries? Use an hash table. */
- if (hashlen > server.hash_max_zipmap_entries)
- convertToRealHash(o);
- /* Load every key/value, then set it into the zipmap or hash
- * table, as needed. */
- while(hashlen--) {
- robj *key, *val;
-
- if ((key = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
- if ((val = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
- /* If we are using a zipmap and there are too big values
- * the object is converted to real hash table encoding. */
- if (o->encoding != REDIS_ENCODING_HT &&
- ((key->encoding == REDIS_ENCODING_RAW &&
- sdslen(key->ptr) > server.hash_max_zipmap_value) ||
- (val->encoding == REDIS_ENCODING_RAW &&
- sdslen(val->ptr) > server.hash_max_zipmap_value)))
- {
- convertToRealHash(o);
- }
-
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- unsigned char *zm = o->ptr;
- robj *deckey, *decval;
-
- /* We need raw string objects to add them to the zipmap */
- deckey = getDecodedObject(key);
- decval = getDecodedObject(val);
- zm = zipmapSet(zm,deckey->ptr,sdslen(deckey->ptr),
- decval->ptr,sdslen(decval->ptr),NULL);
- o->ptr = zm;
- decrRefCount(deckey);
- decrRefCount(decval);
- decrRefCount(key);
- decrRefCount(val);
- } else {
- key = tryObjectEncoding(key);
- val = tryObjectEncoding(val);
- dictAdd((dict*)o->ptr,key,val);
- }
- }
- } else {
- redisPanic("Unknown object type");
- }
- return o;
-}
-
-static int rdbLoad(char *filename) {
- FILE *fp;
- uint32_t dbid;
- int type, retval, rdbver;
- int swap_all_values = 0;
- redisDb *db = server.db+0;
- char buf[1024];
- time_t expiretime, now = time(NULL);
-
- fp = fopen(filename,"r");
- if (!fp) return REDIS_ERR;
- if (fread(buf,9,1,fp) == 0) goto eoferr;
- buf[9] = '\0';
- if (memcmp(buf,"REDIS",5) != 0) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
- return REDIS_ERR;
- }
- rdbver = atoi(buf+5);
- if (rdbver != 1) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
- return REDIS_ERR;
- }
- while(1) {
- robj *key, *val;
- int force_swapout;
-
- expiretime = -1;
- /* Read type. */
- if ((type = rdbLoadType(fp)) == -1) goto eoferr;
- if (type == REDIS_EXPIRETIME) {
- if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
- /* We read the time so we need to read the object type again */
- if ((type = rdbLoadType(fp)) == -1) goto eoferr;
- }
- if (type == REDIS_EOF) break;
- /* Handle SELECT DB opcode as a special case */
- if (type == REDIS_SELECTDB) {
- if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
- goto eoferr;
- if (dbid >= (unsigned)server.dbnum) {
- redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
- exit(1);
- }
- db = server.db+dbid;
- continue;
- }
- /* Read key */
- if ((key = rdbLoadStringObject(fp)) == NULL) goto eoferr;
- /* Read value */
- if ((val = rdbLoadObject(type,fp)) == NULL) goto eoferr;
- /* Check if the key already expired */
- if (expiretime != -1 && expiretime < now) {
- decrRefCount(key);
- decrRefCount(val);
- continue;
- }
- /* Add the new object in the hash table */
- retval = dbAdd(db,key,val);
- if (retval == REDIS_ERR) {
- redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key->ptr);
- exit(1);
- }
- /* Set the expire time if needed */
- if (expiretime != -1) setExpire(db,key,expiretime);
-
- /* Handle swapping while loading big datasets when VM is on */
-
- /* If we detecter we are hopeless about fitting something in memory
- * we just swap every new key on disk. Directly...
- * Note that's important to check for this condition before resorting
- * to random sampling, otherwise we may try to swap already
- * swapped keys. */
- if (swap_all_values) {
- dictEntry *de = dictFind(db->dict,key->ptr);
-
- /* de may be NULL since the key already expired */
- if (de) {
- vmpointer *vp;
- val = dictGetEntryVal(de);
-
- if (val->refcount == 1 &&
- (vp = vmSwapObjectBlocking(val)) != NULL)
- dictGetEntryVal(de) = vp;
- }
- decrRefCount(key);
- continue;
- }
- decrRefCount(key);
-
- /* Flush data on disk once 32 MB of additional RAM are used... */
- force_swapout = 0;
- if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
- force_swapout = 1;
-
- /* If we have still some hope of having some value fitting memory
- * then we try random sampling. */
- if (!swap_all_values && server.vm_enabled && force_swapout) {
- while (zmalloc_used_memory() > server.vm_max_memory) {
- if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
- }
- if (zmalloc_used_memory() > server.vm_max_memory)
- swap_all_values = 1; /* We are already using too much mem */
- }
- }
- fclose(fp);
- return REDIS_OK;
-
-eoferr: /* unexpected end of file is handled here with a fatal exit */
- redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
- exit(1);
- return REDIS_ERR; /* Just to avoid warning */
-}
-
-/*================================== Shutdown =============================== */
-static int prepareForShutdown() {
- redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
- /* Kill the saving child if there is a background saving in progress.
- We want to avoid race conditions, for instance our saving child may
- overwrite the synchronous saving did by SHUTDOWN. */
- if (server.bgsavechildpid != -1) {
- redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
- kill(server.bgsavechildpid,SIGKILL);
- rdbRemoveTempFile(server.bgsavechildpid);
- }
- if (server.appendonly) {
- /* Append only file: fsync() the AOF and exit */
- aof_fsync(server.appendfd);
- if (server.vm_enabled) unlink(server.vm_swap_file);
- } else {
- /* Snapshotting. Perform a SYNC SAVE and exit */
- if (rdbSave(server.dbfilename) == REDIS_OK) {
- if (server.daemonize)
- unlink(server.pidfile);
- redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
- } else {
- /* Ooops.. error saving! The best we can do is to continue
- * operating. Note that if there was a background saving process,
- * in the next cron() Redis will be notified that the background
- * saving aborted, handling special stuff like slaves pending for
- * synchronization... */
- redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
- return REDIS_ERR;
- }
- }
- redisLog(REDIS_WARNING,"Server exit now, bye bye...");
- return REDIS_OK;
-}
-
-/*================================== Commands =============================== */
-
-static void authCommand(redisClient *c) {
- if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
- c->authenticated = 1;
- addReply(c,shared.ok);
- } else {
- c->authenticated = 0;
- addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
- }
-}
-
-static void pingCommand(redisClient *c) {
- addReply(c,shared.pong);
-}
-
-static void echoCommand(redisClient *c) {
- addReplyBulk(c,c->argv[1]);
-}
-
-/*=================================== Strings =============================== */
-
-static void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expire) {
- int retval;
- long seconds = 0; /* initialized to avoid an harmness warning */
-
- if (expire) {
- if (getLongFromObjectOrReply(c, expire, &seconds, NULL) != REDIS_OK)
- return;
- if (seconds <= 0) {
- addReplySds(c,sdsnew("-ERR invalid expire time in SETEX\r\n"));
- return;
- }
- }
-
- touchWatchedKey(c->db,key);
- if (nx) deleteIfVolatile(c->db,key);
- retval = dbAdd(c->db,key,val);
- if (retval == REDIS_ERR) {
- if (!nx) {
- dbReplace(c->db,key,val);
- incrRefCount(val);
- } else {
- addReply(c,shared.czero);
- return;
- }
- } else {
- incrRefCount(val);
- }
- server.dirty++;
- removeExpire(c->db,key);
- if (expire) setExpire(c->db,key,time(NULL)+seconds);
- addReply(c, nx ? shared.cone : shared.ok);
-}
-
-static void setCommand(redisClient *c) {
- setGenericCommand(c,0,c->argv[1],c->argv[2],NULL);
-}
-
-static void setnxCommand(redisClient *c) {
- setGenericCommand(c,1,c->argv[1],c->argv[2],NULL);
-}
-
-static void setexCommand(redisClient *c) {
- setGenericCommand(c,0,c->argv[1],c->argv[3],c->argv[2]);
-}
-
-static int getGenericCommand(redisClient *c) {
- robj *o;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
- return REDIS_OK;
-
- if (o->type != REDIS_STRING) {
- addReply(c,shared.wrongtypeerr);
- return REDIS_ERR;
- } else {
- addReplyBulk(c,o);
- return REDIS_OK;
- }
-}
-
-static void getCommand(redisClient *c) {
- getGenericCommand(c);
-}
-
-static void getsetCommand(redisClient *c) {
- if (getGenericCommand(c) == REDIS_ERR) return;
- dbReplace(c->db,c->argv[1],c->argv[2]);
- incrRefCount(c->argv[2]);
- server.dirty++;
- removeExpire(c->db,c->argv[1]);
-}
-
-static void mgetCommand(redisClient *c) {
- int j;
-
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
- for (j = 1; j < c->argc; j++) {
- robj *o = lookupKeyRead(c->db,c->argv[j]);
- if (o == NULL) {
- addReply(c,shared.nullbulk);
- } else {
- if (o->type != REDIS_STRING) {
- addReply(c,shared.nullbulk);
- } else {
- addReplyBulk(c,o);
- }
- }
- }
-}
-
-static void msetGenericCommand(redisClient *c, int nx) {
- int j, busykeys = 0;
-
- if ((c->argc % 2) == 0) {
- addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
- return;
- }
- /* Handle the NX flag. The MSETNX semantic is to return zero and don't
- * set nothing at all if at least one already key exists. */
- if (nx) {
- for (j = 1; j < c->argc; j += 2) {
- if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
- busykeys++;
- }
- }
- }
- if (busykeys) {
- addReply(c, shared.czero);
- return;
- }
-
- for (j = 1; j < c->argc; j += 2) {
- c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
- dbReplace(c->db,c->argv[j],c->argv[j+1]);
- incrRefCount(c->argv[j+1]);
- removeExpire(c->db,c->argv[j]);
- }
- server.dirty += (c->argc-1)/2;
- addReply(c, nx ? shared.cone : shared.ok);
-}
-
-static void msetCommand(redisClient *c) {
- msetGenericCommand(c,0);
-}
-
-static void msetnxCommand(redisClient *c) {
- msetGenericCommand(c,1);
-}
-
-static void incrDecrCommand(redisClient *c, long long incr) {
- long long value;
- robj *o;
-
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL && checkType(c,o,REDIS_STRING)) return;
- if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;
-
- value += incr;
- o = createStringObjectFromLongLong(value);
- dbReplace(c->db,c->argv[1],o);
- server.dirty++;
- addReply(c,shared.colon);
- addReply(c,o);
- addReply(c,shared.crlf);
-}
-
-static void incrCommand(redisClient *c) {
- incrDecrCommand(c,1);
-}
-
-static void decrCommand(redisClient *c) {
- incrDecrCommand(c,-1);
-}
-
-static void incrbyCommand(redisClient *c) {
- long long incr;
-
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
- incrDecrCommand(c,incr);
-}
-
-static void decrbyCommand(redisClient *c) {
- long long incr;
-
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
- incrDecrCommand(c,-incr);
-}
-
-static void appendCommand(redisClient *c) {
- int retval;
- size_t totlen;
- robj *o;
-
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (o == NULL) {
- /* Create the key */
- retval = dbAdd(c->db,c->argv[1],c->argv[2]);
- incrRefCount(c->argv[2]);
- totlen = stringObjectLen(c->argv[2]);
- } else {
- if (o->type != REDIS_STRING) {
- addReply(c,shared.wrongtypeerr);
- return;
- }
- /* If the object is specially encoded or shared we have to make
- * a copy */
- if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
- robj *decoded = getDecodedObject(o);
-
- o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
- decrRefCount(decoded);
- dbReplace(c->db,c->argv[1],o);
- }
- /* APPEND! */
- if (c->argv[2]->encoding == REDIS_ENCODING_RAW) {
- o->ptr = sdscatlen(o->ptr,
- c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
- } else {
- o->ptr = sdscatprintf(o->ptr, "%ld",
- (unsigned long) c->argv[2]->ptr);
- }
- totlen = sdslen(o->ptr);
- }
- server.dirty++;
- addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen));
-}
-
-static void substrCommand(redisClient *c) {
- robj *o;
- long start = atoi(c->argv[2]->ptr);
- long end = atoi(c->argv[3]->ptr);
- size_t rangelen, strlen;
- sds range;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
-
- o = getDecodedObject(o);
- strlen = sdslen(o->ptr);
-
- /* convert negative indexes */
- if (start < 0) start = strlen+start;
- if (end < 0) end = strlen+end;
- if (start < 0) start = 0;
- if (end < 0) end = 0;
-
- /* indexes sanity checks */
- if (start > end || (size_t)start >= strlen) {
- /* Out of range start or start > end result in null reply */
- addReply(c,shared.nullbulk);
- decrRefCount(o);
- return;
- }
- if ((size_t)end >= strlen) end = strlen-1;
- rangelen = (end-start)+1;
-
- /* Return the result */
- addReplySds(c,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen));
- range = sdsnewlen((char*)o->ptr+start,rangelen);
- addReplySds(c,range);
- addReply(c,shared.crlf);
- decrRefCount(o);
-}
-
-/* ========================= Type agnostic commands ========================= */
-
-static void delCommand(redisClient *c) {
- int deleted = 0, j;
-
- for (j = 1; j < c->argc; j++) {
- if (dbDelete(c->db,c->argv[j])) {
- touchWatchedKey(c->db,c->argv[j]);
- server.dirty++;
- deleted++;
- }
- }
- addReplyLongLong(c,deleted);
-}
-
-static void existsCommand(redisClient *c) {
- expireIfNeeded(c->db,c->argv[1]);
- if (dbExists(c->db,c->argv[1])) {
- addReply(c, shared.cone);
- } else {
- addReply(c, shared.czero);
- }
-}
-
-static void selectCommand(redisClient *c) {
- int id = atoi(c->argv[1]->ptr);
-
- if (selectDb(c,id) == REDIS_ERR) {
- addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
- } else {
- addReply(c,shared.ok);
- }
-}
-
-static void randomkeyCommand(redisClient *c) {
- robj *key;
-
- if ((key = dbRandomKey(c->db)) == NULL) {
- addReply(c,shared.nullbulk);
- return;
- }
-
- addReplyBulk(c,key);
- decrRefCount(key);
-}
-
-static void keysCommand(redisClient *c) {
- dictIterator *di;
- dictEntry *de;
- sds pattern = c->argv[1]->ptr;
- int plen = sdslen(pattern);
- unsigned long numkeys = 0;
- robj *lenobj = createObject(REDIS_STRING,NULL);
-
- di = dictGetIterator(c->db->dict);
- addReply(c,lenobj);
- decrRefCount(lenobj);
- while((de = dictNext(di)) != NULL) {
- sds key = dictGetEntryKey(de);
- robj *keyobj;
-
- if ((pattern[0] == '*' && pattern[1] == '\0') ||
- stringmatchlen(pattern,plen,key,sdslen(key),0)) {
- keyobj = createStringObject(key,sdslen(key));
- if (expireIfNeeded(c->db,keyobj) == 0) {
- addReplyBulk(c,keyobj);
- numkeys++;
- }
- decrRefCount(keyobj);
- }
- }
- dictReleaseIterator(di);
- lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",numkeys);
-}
-
-static void dbsizeCommand(redisClient *c) {
- addReplySds(c,
- sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
-}
-
-static void lastsaveCommand(redisClient *c) {
- addReplySds(c,
- sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
-}
-
-static void typeCommand(redisClient *c) {
- robj *o;
- char *type;
-
- o = lookupKeyRead(c->db,c->argv[1]);
- if (o == NULL) {
- type = "+none";
- } else {
- switch(o->type) {
- case REDIS_STRING: type = "+string"; break;
- case REDIS_LIST: type = "+list"; break;
- case REDIS_SET: type = "+set"; break;
- case REDIS_ZSET: type = "+zset"; break;
- case REDIS_HASH: type = "+hash"; break;
- default: type = "+unknown"; break;
- }
- }
- addReplySds(c,sdsnew(type));
- addReply(c,shared.crlf);
-}
-
-static void saveCommand(redisClient *c) {
- if (server.bgsavechildpid != -1) {
- addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
- return;
- }
- if (rdbSave(server.dbfilename) == REDIS_OK) {
- addReply(c,shared.ok);
- } else {
- addReply(c,shared.err);
- }
-}
-
-static void bgsaveCommand(redisClient *c) {
- if (server.bgsavechildpid != -1) {
- addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
- return;
- }
- if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
- char *status = "+Background saving started\r\n";
- addReplySds(c,sdsnew(status));
- } else {
- addReply(c,shared.err);
- }
-}
-
-static void shutdownCommand(redisClient *c) {
- if (prepareForShutdown() == REDIS_OK)
- exit(0);
- addReplySds(c, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
-}
-
-static void renameGenericCommand(redisClient *c, int nx) {
- robj *o;
-
- /* To use the same key as src and dst is probably an error */
- if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
- addReply(c,shared.sameobjecterr);
- return;
- }
-
- if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
- return;
-
- incrRefCount(o);
- deleteIfVolatile(c->db,c->argv[2]);
- if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
- if (nx) {
- decrRefCount(o);
- addReply(c,shared.czero);
- return;
- }
- dbReplace(c->db,c->argv[2],o);
- }
- dbDelete(c->db,c->argv[1]);
- touchWatchedKey(c->db,c->argv[2]);
- server.dirty++;
- addReply(c,nx ? shared.cone : shared.ok);
-}
-
-static void renameCommand(redisClient *c) {
- renameGenericCommand(c,0);
-}
-
-static void renamenxCommand(redisClient *c) {
- renameGenericCommand(c,1);
-}
-
-static void moveCommand(redisClient *c) {
- robj *o;
- redisDb *src, *dst;
- int srcid;
-
- /* Obtain source and target DB pointers */
- src = c->db;
- srcid = c->db->id;
- if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
- addReply(c,shared.outofrangeerr);
- return;
- }
- dst = c->db;
- selectDb(c,srcid); /* Back to the source DB */
-
- /* If the user is moving using as target the same
- * DB as the source DB it is probably an error. */
- if (src == dst) {
- addReply(c,shared.sameobjecterr);
- return;
- }
-
- /* Check if the element exists and get a reference */
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (!o) {
- addReply(c,shared.czero);
- return;
- }
-
- /* Try to add the element to the target DB */
- deleteIfVolatile(dst,c->argv[1]);
- if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
- addReply(c,shared.czero);
- return;
- }
- incrRefCount(o);
-
- /* OK! key moved, free the entry in the source DB */
- dbDelete(src,c->argv[1]);
- server.dirty++;
- addReply(c,shared.cone);
-}
-
-/* =================================== Lists ================================ */
-
-
-/* Check the argument length to see if it requires us to convert the ziplist
- * to a real list. Only check raw-encoded objects because integer encoded
- * objects are never too long. */
-static void listTypeTryConversion(robj *subject, robj *value) {
- if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
- if (value->encoding == REDIS_ENCODING_RAW &&
- sdslen(value->ptr) > server.list_max_ziplist_value)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-}
-
-static void listTypePush(robj *subject, robj *value, int where) {
- /* Check if we need to convert the ziplist */
- listTypeTryConversion(subject,value);
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
-
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
- value = getDecodedObject(value);
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
- decrRefCount(value);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_HEAD) {
- listAddNodeHead(subject->ptr,value);
- } else {
- listAddNodeTail(subject->ptr,value);
- }
- incrRefCount(value);
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-static robj *listTypePop(robj *subject, int where) {
- robj *value = NULL;
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- int pos = (where == REDIS_HEAD) ? 0 : -1;
- p = ziplistIndex(subject->ptr,pos);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
- /* We only need to delete an element when it exists */
- subject->ptr = ziplistDelete(subject->ptr,&p);
- }
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = subject->ptr;
- listNode *ln;
- if (where == REDIS_HEAD) {
- ln = listFirst(list);
- } else {
- ln = listLast(list);
- }
- if (ln != NULL) {
- value = listNodeValue(ln);
- incrRefCount(value);
- listDelNode(list,ln);
- }
- } else {
- redisPanic("Unknown list encoding");
- }
- return value;
-}
-
-static unsigned long listTypeLength(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
- return ziplistLen(subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
- return listLength((list*)subject->ptr);
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-/* Structure to hold set iteration abstraction. */
-typedef struct {
- robj *subject;
- unsigned char encoding;
- unsigned char direction; /* Iteration direction */
- unsigned char *zi;
- listNode *ln;
-} listTypeIterator;
-
-/* Structure for an entry while iterating over a list. */
-typedef struct {
- listTypeIterator *li;
- unsigned char *zi; /* Entry in ziplist */
- listNode *ln; /* Entry in linked list */
-} listTypeEntry;
-
-/* Initialize an iterator at the specified index. */
-static listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction) {
- listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
- li->subject = subject;
- li->encoding = subject->encoding;
- li->direction = direction;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- li->zi = ziplistIndex(subject->ptr,index);
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- li->ln = listIndex(subject->ptr,index);
- } else {
- redisPanic("Unknown list encoding");
- }
- return li;
-}
-
-/* Clean up the iterator. */
-static void listTypeReleaseIterator(listTypeIterator *li) {
- zfree(li);
-}
-
-/* Stores pointer to current the entry in the provided entry structure
- * and advances the position of the iterator. Returns 1 when the current
- * entry is in fact an entry, 0 otherwise. */
-static int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
- /* Protect from converting when iterating */
- redisAssert(li->subject->encoding == li->encoding);
-
- entry->li = li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- entry->zi = li->zi;
- if (entry->zi != NULL) {
- if (li->direction == REDIS_TAIL)
- li->zi = ziplistNext(li->subject->ptr,li->zi);
- else
- li->zi = ziplistPrev(li->subject->ptr,li->zi);
- return 1;
- }
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- entry->ln = li->ln;
- if (entry->ln != NULL) {
- if (li->direction == REDIS_TAIL)
- li->ln = li->ln->next;
- else
- li->ln = li->ln->prev;
- return 1;
- }
- } else {
- redisPanic("Unknown list encoding");
- }
- return 0;
-}
-
-/* Return entry or NULL at the current position of the iterator. */
-static robj *listTypeGet(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
- robj *value = NULL;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- redisAssert(entry->zi != NULL);
- if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
- }
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- redisAssert(entry->ln != NULL);
- value = listNodeValue(entry->ln);
- incrRefCount(value);
- } else {
- redisPanic("Unknown list encoding");
- }
- return value;
-}
-
-static void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
- robj *subject = entry->li->subject;
- if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
- value = getDecodedObject(value);
- if (where == REDIS_TAIL) {
- unsigned char *next = ziplistNext(subject->ptr,entry->zi);
-
- /* When we insert after the current element, but the current element
- * is the tail of the list, we need to do a push. */
- if (next == NULL) {
- subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
- } else {
- subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
- }
- } else {
- subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
- }
- decrRefCount(value);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- if (where == REDIS_TAIL) {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
- } else {
- listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
- }
- incrRefCount(value);
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-/* Compare the given object with the entry at the current position. */
-static int listTypeEqual(listTypeEntry *entry, robj *o) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- redisAssert(o->encoding == REDIS_ENCODING_RAW);
- return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
- } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
- return equalStringObjects(o,listNodeValue(entry->ln));
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-/* Delete the element pointed to. */
-static void listTypeDelete(listTypeEntry *entry) {
- listTypeIterator *li = entry->li;
- if (li->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p = entry->zi;
- li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
-
- /* Update position of the iterator depending on the direction */
- if (li->direction == REDIS_TAIL)
- li->zi = p;
- else
- li->zi = ziplistPrev(li->subject->ptr,p);
- } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *next;
- if (li->direction == REDIS_TAIL)
- next = entry->ln->next;
- else
- next = entry->ln->prev;
- listDelNode(li->subject->ptr,entry->ln);
- li->ln = next;
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-static void listTypeConvert(robj *subject, int enc) {
- listTypeIterator *li;
- listTypeEntry entry;
- redisAssert(subject->type == REDIS_LIST);
-
- if (enc == REDIS_ENCODING_LINKEDLIST) {
- list *l = listCreate();
- listSetFreeMethod(l,decrRefCount);
-
- /* listTypeGet returns a robj with incremented refcount */
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
- listTypeReleaseIterator(li);
-
- subject->encoding = REDIS_ENCODING_LINKEDLIST;
- zfree(subject->ptr);
- subject->ptr = l;
- } else {
- redisPanic("Unsupported list conversion");
- }
-}
-
-static void pushGenericCommand(redisClient *c, int where) {
- robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
- if (lobj == NULL) {
- if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
- addReply(c,shared.cone);
- return;
- }
- lobj = createZiplistObject();
- dbAdd(c->db,c->argv[1],lobj);
- } else {
- if (lobj->type != REDIS_LIST) {
- addReply(c,shared.wrongtypeerr);
- return;
- }
- if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
- addReply(c,shared.cone);
- return;
- }
- }
- listTypePush(lobj,c->argv[2],where);
- addReplyLongLong(c,listTypeLength(lobj));
- server.dirty++;
-}
-
-static void lpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_HEAD);
-}
-
-static void rpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_TAIL);
-}
-
-static void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
- robj *subject;
- listTypeIterator *iter;
- listTypeEntry entry;
- int inserted = 0;
-
- if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,subject,REDIS_LIST)) return;
-
- if (refval != NULL) {
- /* Note: we expect refval to be string-encoded because it is *not* the
- * last argument of the multi-bulk LINSERT. */
- redisAssert(refval->encoding == REDIS_ENCODING_RAW);
-
- /* We're not sure if this value can be inserted yet, but we cannot
- * convert the list inside the iterator. We don't want to loop over
- * the list twice (once to see if the value can be inserted and once
- * to do the actual insert), so we assume this value can be inserted
- * and convert the ziplist to a regular list if necessary. */
- listTypeTryConversion(subject,val);
-
- /* Seek refval from head to tail */
- iter = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(iter,&entry)) {
- if (listTypeEqual(&entry,refval)) {
- listTypeInsert(&entry,val,where);
- inserted = 1;
- break;
- }
- }
- listTypeReleaseIterator(iter);
-
- if (inserted) {
- /* Check if the length exceeds the ziplist length threshold. */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
- ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
- listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
- server.dirty++;
- } else {
- /* Notify client of a failed insert */
- addReply(c,shared.cnegone);
- return;
- }
- } else {
- listTypePush(subject,val,where);
- server.dirty++;
- }
-
- addReplyUlong(c,listTypeLength(subject));
-}
-
-static void lpushxCommand(redisClient *c) {
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
-}
-
-static void rpushxCommand(redisClient *c) {
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
-}
-
-static void linsertCommand(redisClient *c) {
- if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
- } else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
- } else {
- addReply(c,shared.syntaxerr);
- }
-}
-
-static void llenCommand(redisClient *c) {
- robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
- addReplyUlong(c,listTypeLength(o));
-}
-
-static void lindexCommand(redisClient *c) {
- robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
- int index = atoi(c->argv[2]->ptr);
- robj *value = NULL;
-
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p;
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
- p = ziplistIndex(o->ptr,index);
- if (ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (vstr) {
- value = createStringObject((char*)vstr,vlen);
- } else {
- value = createStringObjectFromLongLong(vlong);
- }
- addReplyBulk(c,value);
- decrRefCount(value);
- } else {
- addReply(c,shared.nullbulk);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln != NULL) {
- value = listNodeValue(ln);
- addReplyBulk(c,value);
- } else {
- addReply(c,shared.nullbulk);
- }
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-static void lsetCommand(redisClient *c) {
- robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
- int index = atoi(c->argv[2]->ptr);
- robj *value = c->argv[3];
-
- listTypeTryConversion(o,value);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *p, *zl = o->ptr;
- p = ziplistIndex(zl,index);
- if (p == NULL) {
- addReply(c,shared.outofrangeerr);
- } else {
- o->ptr = ziplistDelete(o->ptr,&p);
- value = getDecodedObject(value);
- o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
- decrRefCount(value);
- addReply(c,shared.ok);
- server.dirty++;
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- listNode *ln = listIndex(o->ptr,index);
- if (ln == NULL) {
- addReply(c,shared.outofrangeerr);
- } else {
- decrRefCount((robj*)listNodeValue(ln));
- listNodeValue(ln) = value;
- incrRefCount(value);
- addReply(c,shared.ok);
- server.dirty++;
- }
- } else {
- redisPanic("Unknown list encoding");
- }
-}
-
-static void popGenericCommand(redisClient *c, int where) {
- robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
-
- robj *value = listTypePop(o,where);
- if (value == NULL) {
- addReply(c,shared.nullbulk);
- } else {
- addReplyBulk(c,value);
- decrRefCount(value);
- if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty++;
- }
-}
-
-static void lpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_HEAD);
-}
-
-static void rpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_TAIL);
-}
-
-static void lrangeCommand(redisClient *c) {
- robj *o, *value;
- int start = atoi(c->argv[2]->ptr);
- int end = atoi(c->argv[3]->ptr);
- int llen;
- int rangelen, j;
- listTypeEntry entry;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_LIST)) return;
- llen = listTypeLength(o);
-
- /* convert negative indexes */
- if (start < 0) start = llen+start;
- if (end < 0) end = llen+end;
- if (start < 0) start = 0;
- if (end < 0) end = 0;
-
- /* indexes sanity checks */
- if (start > end || start >= llen) {
- /* Out of range start or start > end result in empty list */
- addReply(c,shared.emptymultibulk);
- return;
- }
- if (end >= llen) end = llen-1;
- rangelen = (end-start)+1;
-
- /* Return the result in form of a multi-bulk reply */
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
- listTypeIterator *li = listTypeInitIterator(o,start,REDIS_TAIL);
- for (j = 0; j < rangelen; j++) {
- redisAssert(listTypeNext(li,&entry));
- value = listTypeGet(&entry);
- addReplyBulk(c,value);
- decrRefCount(value);
- }
- listTypeReleaseIterator(li);
-}
-
-static void ltrimCommand(redisClient *c) {
- robj *o;
- int start = atoi(c->argv[2]->ptr);
- int end = atoi(c->argv[3]->ptr);
- int llen;
- int j, ltrim, rtrim;
- list *list;
- listNode *ln;
-
- if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
- checkType(c,o,REDIS_LIST)) return;
- llen = listTypeLength(o);
-
- /* convert negative indexes */
- if (start < 0) start = llen+start;
- if (end < 0) end = llen+end;
- if (start < 0) start = 0;
- if (end < 0) end = 0;
-
- /* indexes sanity checks */
- if (start > end || start >= llen) {
- /* Out of range start or start > end result in empty list */
- ltrim = llen;
- rtrim = 0;
- } else {
- if (end >= llen) end = llen-1;
- ltrim = start;
- rtrim = llen-end-1;
- }
-
- /* Remove list elements to perform the trim */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
- o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list = o->ptr;
- for (j = 0; j < ltrim; j++) {
- ln = listFirst(list);
- listDelNode(list,ln);
- }
- for (j = 0; j < rtrim; j++) {
- ln = listLast(list);
- listDelNode(list,ln);
- }
- } else {
- redisPanic("Unknown list encoding");
- }
- if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty++;
- addReply(c,shared.ok);
-}
-
-static void lremCommand(redisClient *c) {
- robj *subject, *obj = c->argv[3];
- int toremove = atoi(c->argv[2]->ptr);
- int removed = 0;
- listTypeEntry entry;
-
- subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
- if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
-
- /* Make sure obj is raw when we're dealing with a ziplist */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- obj = getDecodedObject(obj);
-
- listTypeIterator *li;
- if (toremove < 0) {
- toremove = -toremove;
- li = listTypeInitIterator(subject,-1,REDIS_HEAD);
- } else {
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
- }
-
- while (listTypeNext(li,&entry)) {
- if (listTypeEqual(&entry,obj)) {
- listTypeDelete(&entry);
- server.dirty++;
- removed++;
- if (toremove && removed == toremove) break;
- }
- }
- listTypeReleaseIterator(li);
-
- /* Clean up raw encoded object */
- if (subject->encoding == REDIS_ENCODING_ZIPLIST)
- decrRefCount(obj);
-
- if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
- addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
-}
-
-/* This is the semantic of this command:
- * RPOPLPUSH srclist dstlist:
- * IF LLEN(srclist) > 0
- * element = RPOP srclist
- * LPUSH dstlist element
- * RETURN element
- * ELSE
- * RETURN nil
- * END
- * END
- *
- * The idea is to be able to get an element from a list in a reliable way
- * since the element is not just returned but pushed against another list
- * as well. This command was originally proposed by Ezra Zygmuntowicz.
- */
-static void rpoplpushcommand(redisClient *c) {
- robj *sobj, *value;
- if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,sobj,REDIS_LIST)) return;
-
- if (listTypeLength(sobj) == 0) {
- addReply(c,shared.nullbulk);
- } else {
- robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
- if (dobj && checkType(c,dobj,REDIS_LIST)) return;
- value = listTypePop(sobj,REDIS_TAIL);
-
- /* Add the element to the target list (unless it's directly
- * passed to some BLPOP-ing client */
- if (!handleClientsWaitingListPush(c,c->argv[2],value)) {
- /* Create the list if the key does not exist */
- if (!dobj) {
- dobj = createZiplistObject();
- dbAdd(c->db,c->argv[2],dobj);
- }
- listTypePush(dobj,value,REDIS_HEAD);
- }
-
- /* Send the element to the client as reply as well */
- addReplyBulk(c,value);
-
- /* listTypePop returns an object with its refcount incremented */
- decrRefCount(value);
-
- /* Delete the source list when it is empty */
- if (listTypeLength(sobj) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty++;
- }
-}
-
-/* ==================================== Sets ================================ */
-
-static void saddCommand(redisClient *c) {
- robj *set;
-
- set = lookupKeyWrite(c->db,c->argv[1]);
- if (set == NULL) {
- set = createSetObject();
- dbAdd(c->db,c->argv[1],set);
- } else {
- if (set->type != REDIS_SET) {
- addReply(c,shared.wrongtypeerr);
- return;
- }
- }
- if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
- incrRefCount(c->argv[2]);
- server.dirty++;
- addReply(c,shared.cone);
- } else {
- addReply(c,shared.czero);
- }
-}
-
-static void sremCommand(redisClient *c) {
- robj *set;
-
- if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
-
- if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
- server.dirty++;
- if (htNeedsResize(set->ptr)) dictResize(set->ptr);
- if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
- addReply(c,shared.cone);
- } else {
- addReply(c,shared.czero);
- }
-}
-
-static void smoveCommand(redisClient *c) {
- robj *srcset, *dstset;
-
- srcset = lookupKeyWrite(c->db,c->argv[1]);
- dstset = lookupKeyWrite(c->db,c->argv[2]);
-
- /* If the source key does not exist return 0, if it's of the wrong type
- * raise an error */
- if (srcset == NULL || srcset->type != REDIS_SET) {
- addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
- return;
- }
- /* Error if the destination key is not a set as well */
- if (dstset && dstset->type != REDIS_SET) {
- addReply(c,shared.wrongtypeerr);
- return;
- }
- /* Remove the element from the source set */
- if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
- /* Key not found in the src set! return zero */
- addReply(c,shared.czero);
- return;
- }
- if (dictSize((dict*)srcset->ptr) == 0 && srcset != dstset)
- dbDelete(c->db,c->argv[1]);
- server.dirty++;
- /* Add the element to the destination set */
- if (!dstset) {
- dstset = createSetObject();
- dbAdd(c->db,c->argv[2],dstset);
- }
- if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
- incrRefCount(c->argv[3]);
- addReply(c,shared.cone);
-}
-
-static void sismemberCommand(redisClient *c) {
- robj *set;
-
- if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
-
- if (dictFind(set->ptr,c->argv[2]))
- addReply(c,shared.cone);
- else
- addReply(c,shared.czero);
-}
-
-static void scardCommand(redisClient *c) {
- robj *o;
- dict *s;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_SET)) return;
-
- s = o->ptr;
- addReplyUlong(c,dictSize(s));
-}
-
-static void spopCommand(redisClient *c) {
- robj *set;
- dictEntry *de;
-
- if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
-
- de = dictGetRandomKey(set->ptr);
- if (de == NULL) {
- addReply(c,shared.nullbulk);
- } else {
- robj *ele = dictGetEntryKey(de);
-
- addReplyBulk(c,ele);
- dictDelete(set->ptr,ele);
- if (htNeedsResize(set->ptr)) dictResize(set->ptr);
- if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty++;
- }
-}
-
-static void srandmemberCommand(redisClient *c) {
- robj *set;
- dictEntry *de;
-
- if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
-
- de = dictGetRandomKey(set->ptr);
- if (de == NULL) {
- addReply(c,shared.nullbulk);
- } else {
- robj *ele = dictGetEntryKey(de);
-
- addReplyBulk(c,ele);
- }
-}
-
-static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
- dict **d1 = (void*) s1, **d2 = (void*) s2;
-
- return dictSize(*d1)-dictSize(*d2);
-}
-
-static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
- dict **dv = zmalloc(sizeof(dict*)*setsnum);
- dictIterator *di;
- dictEntry *de;
- robj *lenobj = NULL, *dstset = NULL;
- unsigned long j, cardinality = 0;
-
- for (j = 0; j < setsnum; j++) {
- robj *setobj;
-
- setobj = dstkey ?
- lookupKeyWrite(c->db,setskeys[j]) :
- lookupKeyRead(c->db,setskeys[j]);
- if (!setobj) {
- zfree(dv);
- if (dstkey) {
- if (dbDelete(c->db,dstkey))
- server.dirty++;
- addReply(c,shared.czero);
- } else {
- addReply(c,shared.emptymultibulk);
- }
- return;
- }
- if (setobj->type != REDIS_SET) {
- zfree(dv);
- addReply(c,shared.wrongtypeerr);
- return;
- }
- dv[j] = setobj->ptr;
- }
- /* Sort sets from the smallest to largest, this will improve our
- * algorithm's performace */
- qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
-
- /* The first thing we should output is the total number of elements...
- * since this is a multi-bulk write, but at this stage we don't know
- * the intersection set size, so we use a trick, append an empty object
- * to the output list and save the pointer to later modify it with the
- * right length */
- if (!dstkey) {
- lenobj = createObject(REDIS_STRING,NULL);
- addReply(c,lenobj);
- decrRefCount(lenobj);
- } else {
- /* If we have a target key where to store the resulting set
- * create this key with an empty set inside */
- dstset = createSetObject();
- }
-
- /* Iterate all the elements of the first (smallest) set, and test
- * the element against all the other sets, if at least one set does
- * not include the element it is discarded */
- di = dictGetIterator(dv[0]);
-
- while((de = dictNext(di)) != NULL) {
- robj *ele;
-
- for (j = 1; j < setsnum; j++)
- if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
- if (j != setsnum)
- continue; /* at least one set does not contain the member */
- ele = dictGetEntryKey(de);
- if (!dstkey) {
- addReplyBulk(c,ele);
- cardinality++;
- } else {
- dictAdd(dstset->ptr,ele,NULL);
- incrRefCount(ele);
- }
- }
- dictReleaseIterator(di);
-
- if (dstkey) {
- /* Store the resulting set into the target, if the intersection
- * is not an empty set. */
- dbDelete(c->db,dstkey);
- if (dictSize((dict*)dstset->ptr) > 0) {
- dbAdd(c->db,dstkey,dstset);
- addReplyLongLong(c,dictSize((dict*)dstset->ptr));
- } else {
- decrRefCount(dstset);
- addReply(c,shared.czero);
- }
- server.dirty++;
- } else {
- lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
- }
- zfree(dv);
-}
-
-static void sinterCommand(redisClient *c) {
- sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
-}
-
-static void sinterstoreCommand(redisClient *c) {
- sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
-}
-
-#define REDIS_OP_UNION 0
-#define REDIS_OP_DIFF 1
-#define REDIS_OP_INTER 2
-
-static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
- dict **dv = zmalloc(sizeof(dict*)*setsnum);
- dictIterator *di;
- dictEntry *de;
- robj *dstset = NULL;
- int j, cardinality = 0;
-
- for (j = 0; j < setsnum; j++) {
- robj *setobj;
-
- setobj = dstkey ?
- lookupKeyWrite(c->db,setskeys[j]) :
- lookupKeyRead(c->db,setskeys[j]);
- if (!setobj) {
- dv[j] = NULL;
- continue;
- }
- if (setobj->type != REDIS_SET) {
- zfree(dv);
- addReply(c,shared.wrongtypeerr);
- return;
- }
- dv[j] = setobj->ptr;
- }
-
- /* We need a temp set object to store our union. If the dstkey
- * is not NULL (that is, we are inside an SUNIONSTORE operation) then
- * this set object will be the resulting object to set into the target key*/
- dstset = createSetObject();
-
- /* Iterate all the elements of all the sets, add every element a single
- * time to the result set */
- for (j = 0; j < setsnum; j++) {
- if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
- if (!dv[j]) continue; /* non existing keys are like empty sets */
-
- di = dictGetIterator(dv[j]);
-
- while((de = dictNext(di)) != NULL) {
- robj *ele;
-
- /* dictAdd will not add the same element multiple times */
- ele = dictGetEntryKey(de);
- if (op == REDIS_OP_UNION || j == 0) {
- if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
- incrRefCount(ele);
- cardinality++;
- }
- } else if (op == REDIS_OP_DIFF) {
- if (dictDelete(dstset->ptr,ele) == DICT_OK) {
- cardinality--;
- }
- }
- }
- dictReleaseIterator(di);
-
- /* result set is empty? Exit asap. */
- if (op == REDIS_OP_DIFF && cardinality == 0) break;
- }
-
- /* Output the content of the resulting set, if not in STORE mode */
- if (!dstkey) {
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
- di = dictGetIterator(dstset->ptr);
- while((de = dictNext(di)) != NULL) {
- robj *ele;
-
- ele = dictGetEntryKey(de);
- addReplyBulk(c,ele);
- }
- dictReleaseIterator(di);
- decrRefCount(dstset);
- } else {
- /* If we have a target key where to store the resulting set
- * create this key with the result set inside */
- dbDelete(c->db,dstkey);
- if (dictSize((dict*)dstset->ptr) > 0) {
- dbAdd(c->db,dstkey,dstset);
- addReplyLongLong(c,dictSize((dict*)dstset->ptr));
- } else {
- decrRefCount(dstset);
- addReply(c,shared.czero);
- }
- server.dirty++;
- }
- zfree(dv);
-}
-
-static void sunionCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
-}
-
-static void sunionstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
-}
-
-static void sdiffCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
-}
-
-static void sdiffstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
-}
-
-/* ==================================== ZSets =============================== */
-
-/* ZSETs are ordered sets using two data structures to hold the same elements
- * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
- * data structure.
- *
- * The elements are added to an hash table mapping Redis objects to scores.
- * At the same time the elements are added to a skip list mapping scores
- * to Redis objects (so objects are sorted by scores in this "view"). */
-
-/* This skiplist implementation is almost a C translation of the original
- * algorithm described by William Pugh in "Skip Lists: A Probabilistic
- * Alternative to Balanced Trees", modified in three ways:
- * a) this implementation allows for repeated values.
- * b) the comparison is not just by key (our 'score') but by satellite data.
- * c) there is a back pointer, so it's a doubly linked list with the back
- * pointers being only at "level 1". This allows to traverse the list
- * from tail to head, useful for ZREVRANGE. */
-
-static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
- zskiplistNode *zn = zmalloc(sizeof(*zn));
-
- zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
- if (level > 1)
- zn->span = zmalloc(sizeof(unsigned int) * (level - 1));
- else
- zn->span = NULL;
- zn->score = score;
- zn->obj = obj;
- return zn;
-}
-
-static zskiplist *zslCreate(void) {
- int j;
- zskiplist *zsl;
-
- zsl = zmalloc(sizeof(*zsl));
- zsl->level = 1;
- zsl->length = 0;
- zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
- for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
- zsl->header->forward[j] = NULL;
-
- /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
- if (j < ZSKIPLIST_MAXLEVEL-1)
- zsl->header->span[j] = 0;
- }
- zsl->header->backward = NULL;
- zsl->tail = NULL;
- return zsl;
-}
-
-static void zslFreeNode(zskiplistNode *node) {
- decrRefCount(node->obj);
- zfree(node->forward);
- zfree(node->span);
- zfree(node);
-}
-
-static void zslFree(zskiplist *zsl) {
- zskiplistNode *node = zsl->header->forward[0], *next;
-
- zfree(zsl->header->forward);
- zfree(zsl->header->span);
- zfree(zsl->header);
- while(node) {
- next = node->forward[0];
- zslFreeNode(node);
- node = next;
- }
- zfree(zsl);
-}
-
-static int zslRandomLevel(void) {
- int level = 1;
- while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
- level += 1;
- return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
-}
-
-static void zslInsert(zskiplist *zsl, double score, robj *obj) {
- zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
- unsigned int rank[ZSKIPLIST_MAXLEVEL];
- int i, level;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- /* store rank that is crossed to reach the insert position */
- rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
-
- while (x->forward[i] &&
- (x->forward[i]->score < score ||
- (x->forward[i]->score == score &&
- compareStringObjects(x->forward[i]->obj,obj) < 0))) {
- rank[i] += i > 0 ? x->span[i-1] : 1;
- x = x->forward[i];
- }
- update[i] = x;
- }
- /* we assume the key is not already inside, since we allow duplicated
- * scores, and the re-insertion of score and redis object should never
- * happpen since the caller of zslInsert() should test in the hash table
- * if the element is already inside or not. */
- level = zslRandomLevel();
- if (level > zsl->level) {
- for (i = zsl->level; i < level; i++) {
- rank[i] = 0;
- update[i] = zsl->header;
- update[i]->span[i-1] = zsl->length;
- }
- zsl->level = level;
- }
- x = zslCreateNode(level,score,obj);
- for (i = 0; i < level; i++) {
- x->forward[i] = update[i]->forward[i];
- update[i]->forward[i] = x;
-
- /* update span covered by update[i] as x is inserted here */
- if (i > 0) {
- x->span[i-1] = update[i]->span[i-1] - (rank[0] - rank[i]);
- update[i]->span[i-1] = (rank[0] - rank[i]) + 1;
- }
- }
-
- /* increment span for untouched levels */
- for (i = level; i < zsl->level; i++) {
- update[i]->span[i-1]++;
- }
-
- x->backward = (update[0] == zsl->header) ? NULL : update[0];
- if (x->forward[0])
- x->forward[0]->backward = x;
- else
- zsl->tail = x;
- zsl->length++;
-}
-
-/* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
-void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
- int i;
- for (i = 0; i < zsl->level; i++) {
- if (update[i]->forward[i] == x) {
- if (i > 0) {
- update[i]->span[i-1] += x->span[i-1] - 1;
- }
- update[i]->forward[i] = x->forward[i];
- } else {
- /* invariant: i > 0, because update[0]->forward[0]
- * is always equal to x */
- update[i]->span[i-1] -= 1;
- }
- }
- if (x->forward[0]) {
- x->forward[0]->backward = x->backward;
- } else {
- zsl->tail = x->backward;
- }
- while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
- zsl->level--;
- zsl->length--;
-}
-
-/* Delete an element with matching score/object from the skiplist. */
-static int zslDelete(zskiplist *zsl, double score, robj *obj) {
- zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] &&
- (x->forward[i]->score < score ||
- (x->forward[i]->score == score &&
- compareStringObjects(x->forward[i]->obj,obj) < 0)))
- x = x->forward[i];
- update[i] = x;
- }
- /* We may have multiple elements with the same score, what we need
- * is to find the element with both the right score and object. */
- x = x->forward[0];
- if (x && score == x->score && equalStringObjects(x->obj,obj)) {
- zslDeleteNode(zsl, x, update);
- zslFreeNode(x);
- return 1;
- } else {
- return 0; /* not found */
- }
- return 0; /* not found */
-}
-
-/* Delete all the elements with score between min and max from the skiplist.
- * Min and mx are inclusive, so a score >= min || score <= max is deleted.
- * Note that this function takes the reference to the hash table view of the
- * sorted set, in order to remove the elements from the hash table too. */
-static unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict *dict) {
- zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
- unsigned long removed = 0;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] && x->forward[i]->score < min)
- x = x->forward[i];
- update[i] = x;
- }
- /* We may have multiple elements with the same score, what we need
- * is to find the element with both the right score and object. */
- x = x->forward[0];
- while (x && x->score <= max) {
- zskiplistNode *next = x->forward[0];
- zslDeleteNode(zsl, x, update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
- removed++;
- x = next;
- }
- return removed; /* not found */
-}
-
-/* Delete all the elements with rank between start and end from the skiplist.
- * Start and end are inclusive. Note that start and end need to be 1-based */
-static unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
- zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
- unsigned long traversed = 0, removed = 0;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] && (traversed + (i > 0 ? x->span[i-1] : 1)) < start) {
- traversed += i > 0 ? x->span[i-1] : 1;
- x = x->forward[i];
- }
- update[i] = x;
- }
-
- traversed++;
- x = x->forward[0];
- while (x && traversed <= end) {
- zskiplistNode *next = x->forward[0];
- zslDeleteNode(zsl, x, update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
- removed++;
- traversed++;
- x = next;
- }
- return removed;
-}
-
-/* Find the first node having a score equal or greater than the specified one.
- * Returns NULL if there is no match. */
-static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
- zskiplistNode *x;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] && x->forward[i]->score < score)
- x = x->forward[i];
- }
- /* We may have multiple elements with the same score, what we need
- * is to find the element with both the right score and object. */
- return x->forward[0];
-}
-
-/* Find the rank for an element by both score and key.
- * Returns 0 when the element cannot be found, rank otherwise.
- * Note that the rank is 1-based due to the span of zsl->header to the
- * first element. */
-static unsigned long zslistTypeGetRank(zskiplist *zsl, double score, robj *o) {
- zskiplistNode *x;
- unsigned long rank = 0;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] &&
- (x->forward[i]->score < score ||
- (x->forward[i]->score == score &&
- compareStringObjects(x->forward[i]->obj,o) <= 0))) {
- rank += i > 0 ? x->span[i-1] : 1;
- x = x->forward[i];
- }
-
- /* x might be equal to zsl->header, so test if obj is non-NULL */
- if (x->obj && equalStringObjects(x->obj,o)) {
- return rank;
- }
- }
- return 0;
-}
-
-/* Finds an element by its rank. The rank argument needs to be 1-based. */
-zskiplistNode* zslistTypeGetElementByRank(zskiplist *zsl, unsigned long rank) {
- zskiplistNode *x;
- unsigned long traversed = 0;
- int i;
-
- x = zsl->header;
- for (i = zsl->level-1; i >= 0; i--) {
- while (x->forward[i] && (traversed + (i>0 ? x->span[i-1] : 1)) <= rank)
- {
- traversed += i > 0 ? x->span[i-1] : 1;
- x = x->forward[i];
- }
- if (traversed == rank) {
- return x;
- }
- }
- return NULL;
-}
-
-/* The actual Z-commands implementations */
-
-/* This generic command implements both ZADD and ZINCRBY.
- * scoreval is the score if the operation is a ZADD (doincrement == 0) or
- * the increment if the operation is a ZINCRBY (doincrement == 1). */
-static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
- robj *zsetobj;
- zset *zs;
- double *score;
-
- if (isnan(scoreval)) {
- addReplySds(c,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
- return;
- }
-
- zsetobj = lookupKeyWrite(c->db,key);
- if (zsetobj == NULL) {
- zsetobj = createZsetObject();
- dbAdd(c->db,key,zsetobj);
- } else {
- if (zsetobj->type != REDIS_ZSET) {
- addReply(c,shared.wrongtypeerr);
- return;
- }
- }
- zs = zsetobj->ptr;
-
- /* Ok now since we implement both ZADD and ZINCRBY here the code
- * needs to handle the two different conditions. It's all about setting
- * '*score', that is, the new score to set, to the right value. */
- score = zmalloc(sizeof(double));
- if (doincrement) {
- dictEntry *de;
-
- /* Read the old score. If the element was not present starts from 0 */
- de = dictFind(zs->dict,ele);
- if (de) {
- double *oldscore = dictGetEntryVal(de);
- *score = *oldscore + scoreval;
- } else {
- *score = scoreval;
- }
- if (isnan(*score)) {
- addReplySds(c,
- sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
- zfree(score);
- /* Note that we don't need to check if the zset may be empty and
- * should be removed here, as we can only obtain Nan as score if
- * there was already an element in the sorted set. */
- return;
- }
- } else {
- *score = scoreval;
- }
-
- /* What follows is a simple remove and re-insert operation that is common
- * to both ZADD and ZINCRBY... */
- if (dictAdd(zs->dict,ele,score) == DICT_OK) {
- /* case 1: New element */
- incrRefCount(ele); /* added to hash */
- zslInsert(zs->zsl,*score,ele);
- incrRefCount(ele); /* added to skiplist */
- server.dirty++;
- if (doincrement)
- addReplyDouble(c,*score);
- else
- addReply(c,shared.cone);
- } else {
- dictEntry *de;
- double *oldscore;
-
- /* case 2: Score update operation */
- de = dictFind(zs->dict,ele);
- redisAssert(de != NULL);
- oldscore = dictGetEntryVal(de);
- if (*score != *oldscore) {
- int deleted;
-
- /* Remove and insert the element in the skip list with new score */
- deleted = zslDelete(zs->zsl,*oldscore,ele);
- redisAssert(deleted != 0);
- zslInsert(zs->zsl,*score,ele);
- incrRefCount(ele);
- /* Update the score in the hash table */
- dictReplace(zs->dict,ele,score);
- server.dirty++;
- } else {
- zfree(score);
- }
- if (doincrement)
- addReplyDouble(c,*score);
- else
- addReply(c,shared.czero);
- }
-}
-
-static void zaddCommand(redisClient *c) {
- double scoreval;
-
- if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
- zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
-}
-
-static void zincrbyCommand(redisClient *c) {
- double scoreval;
-
- if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
- zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
-}
-
-static void zremCommand(redisClient *c) {
- robj *zsetobj;
- zset *zs;
- dictEntry *de;
- double *oldscore;
- int deleted;
-
- if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,zsetobj,REDIS_ZSET)) return;
-
- zs = zsetobj->ptr;
- de = dictFind(zs->dict,c->argv[2]);
- if (de == NULL) {
- addReply(c,shared.czero);
- return;
- }
- /* Delete from the skiplist */
- oldscore = dictGetEntryVal(de);
- deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
- redisAssert(deleted != 0);
-
- /* Delete from the hash table */
- dictDelete(zs->dict,c->argv[2]);
- if (htNeedsResize(zs->dict)) dictResize(zs->dict);
- if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty++;
- addReply(c,shared.cone);
-}
-
-static void zremrangebyscoreCommand(redisClient *c) {
- double min;
- double max;
- long deleted;
- robj *zsetobj;
- zset *zs;
-
- if ((getDoubleFromObjectOrReply(c, c->argv[2], &min, NULL) != REDIS_OK) ||
- (getDoubleFromObjectOrReply(c, c->argv[3], &max, NULL) != REDIS_OK)) return;
-
- if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,zsetobj,REDIS_ZSET)) return;
-
- zs = zsetobj->ptr;
- deleted = zslDeleteRangeByScore(zs->zsl,min,max,zs->dict);
- if (htNeedsResize(zs->dict)) dictResize(zs->dict);
- if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty += deleted;
- addReplyLongLong(c,deleted);
-}
-
-static void zremrangebyrankCommand(redisClient *c) {
- long start;
- long end;
- int llen;
- long deleted;
- robj *zsetobj;
- zset *zs;
-
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
-
- if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,zsetobj,REDIS_ZSET)) return;
- zs = zsetobj->ptr;
- llen = zs->zsl->length;
-
- /* convert negative indexes */
- if (start < 0) start = llen+start;
- if (end < 0) end = llen+end;
- if (start < 0) start = 0;
- if (end < 0) end = 0;
-
- /* indexes sanity checks */
- if (start > end || start >= llen) {
- addReply(c,shared.czero);
- return;
- }
- if (end >= llen) end = llen-1;
-
- /* increment start and end because zsl*Rank functions
- * use 1-based rank */
- deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
- if (htNeedsResize(zs->dict)) dictResize(zs->dict);
- if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
- server.dirty += deleted;
- addReplyLongLong(c, deleted);
-}
-
-typedef struct {
- dict *dict;
- double weight;
-} zsetopsrc;
-
-static int qsortCompareZsetopsrcByCardinality(const void *s1, const void *s2) {
- zsetopsrc *d1 = (void*) s1, *d2 = (void*) s2;
- unsigned long size1, size2;
- size1 = d1->dict ? dictSize(d1->dict) : 0;
- size2 = d2->dict ? dictSize(d2->dict) : 0;
- return size1 - size2;
-}
-
-#define REDIS_AGGR_SUM 1
-#define REDIS_AGGR_MIN 2
-#define REDIS_AGGR_MAX 3
-#define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
-
-inline static void zunionInterAggregate(double *target, double val, int aggregate) {
- if (aggregate == REDIS_AGGR_SUM) {
- *target = *target + val;
- } else if (aggregate == REDIS_AGGR_MIN) {
- *target = val < *target ? val : *target;
- } else if (aggregate == REDIS_AGGR_MAX) {
- *target = val > *target ? val : *target;
- } else {
- /* safety net */
- redisPanic("Unknown ZUNION/INTER aggregate type");
- }
-}
-
-static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
- int i, j, setnum;
- int aggregate = REDIS_AGGR_SUM;
- zsetopsrc *src;
- robj *dstobj;
- zset *dstzset;
- dictIterator *di;
- dictEntry *de;
-
- /* expect setnum input keys to be given */
- setnum = atoi(c->argv[2]->ptr);
- if (setnum < 1) {
- addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
- return;
- }
-
- /* test if the expected number of keys would overflow */
- if (3+setnum > c->argc) {
- addReply(c,shared.syntaxerr);
- return;
- }
-
- /* read keys to be used for input */
- src = zmalloc(sizeof(zsetopsrc) * setnum);
- for (i = 0, j = 3; i < setnum; i++, j++) {
- robj *obj = lookupKeyWrite(c->db,c->argv[j]);
- if (!obj) {
- src[i].dict = NULL;
- } else {
- if (obj->type == REDIS_ZSET) {
- src[i].dict = ((zset*)obj->ptr)->dict;
- } else if (obj->type == REDIS_SET) {
- src[i].dict = (obj->ptr);
- } else {
- zfree(src);
- addReply(c,shared.wrongtypeerr);
- return;
- }
- }
-
- /* default all weights to 1 */
- src[i].weight = 1.0;
- }
-
- /* parse optional extra arguments */
- if (j < c->argc) {
- int remaining = c->argc - j;
-
- while (remaining) {
- if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) {
- j++; remaining--;
- for (i = 0; i < setnum; i++, j++, remaining--) {
- if (getDoubleFromObjectOrReply(c, c->argv[j], &src[i].weight, NULL) != REDIS_OK)
- return;
- }
- } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) {
- j++; remaining--;
- if (!strcasecmp(c->argv[j]->ptr,"sum")) {
- aggregate = REDIS_AGGR_SUM;
- } else if (!strcasecmp(c->argv[j]->ptr,"min")) {
- aggregate = REDIS_AGGR_MIN;
- } else if (!strcasecmp(c->argv[j]->ptr,"max")) {
- aggregate = REDIS_AGGR_MAX;
- } else {
- zfree(src);
- addReply(c,shared.syntaxerr);
- return;
- }
- j++; remaining--;
- } else {
- zfree(src);
- addReply(c,shared.syntaxerr);
- return;
- }
- }
- }
-
- /* sort sets from the smallest to largest, this will improve our
- * algorithm's performance */
- qsort(src,setnum,sizeof(zsetopsrc),qsortCompareZsetopsrcByCardinality);
-
- dstobj = createZsetObject();
- dstzset = dstobj->ptr;
-
- if (op == REDIS_OP_INTER) {
- /* skip going over all entries if the smallest zset is NULL or empty */
- if (src[0].dict && dictSize(src[0].dict) > 0) {
- /* precondition: as src[0].dict is non-empty and the zsets are ordered
- * from small to large, all src[i > 0].dict are non-empty too */
- di = dictGetIterator(src[0].dict);
- while((de = dictNext(di)) != NULL) {
- double *score = zmalloc(sizeof(double)), value;
- *score = src[0].weight * zunionInterDictValue(de);
-
- for (j = 1; j < setnum; j++) {
- dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
- if (other) {
- value = src[j].weight * zunionInterDictValue(other);
- zunionInterAggregate(score, value, aggregate);
- } else {
- break;
- }
- }
-
- /* skip entry when not present in every source dict */
- if (j != setnum) {
- zfree(score);
- } else {
- robj *o = dictGetEntryKey(de);
- dictAdd(dstzset->dict,o,score);
- incrRefCount(o); /* added to dictionary */
- zslInsert(dstzset->zsl,*score,o);
- incrRefCount(o); /* added to skiplist */
- }
- }
- dictReleaseIterator(di);
- }
- } else if (op == REDIS_OP_UNION) {
- for (i = 0; i < setnum; i++) {
- if (!src[i].dict) continue;
-
- di = dictGetIterator(src[i].dict);
- while((de = dictNext(di)) != NULL) {
- /* skip key when already processed */
- if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue;
-
- double *score = zmalloc(sizeof(double)), value;
- *score = src[i].weight * zunionInterDictValue(de);
-
- /* because the zsets are sorted by size, its only possible
- * for sets at larger indices to hold this entry */
- for (j = (i+1); j < setnum; j++) {
- dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
- if (other) {
- value = src[j].weight * zunionInterDictValue(other);
- zunionInterAggregate(score, value, aggregate);
- }
- }
-
- robj *o = dictGetEntryKey(de);
- dictAdd(dstzset->dict,o,score);
- incrRefCount(o); /* added to dictionary */
- zslInsert(dstzset->zsl,*score,o);
- incrRefCount(o); /* added to skiplist */
- }
- dictReleaseIterator(di);
- }
- } else {
- /* unknown operator */
- redisAssert(op == REDIS_OP_INTER || op == REDIS_OP_UNION);
- }
-
- dbDelete(c->db,dstkey);
- if (dstzset->zsl->length) {
- dbAdd(c->db,dstkey,dstobj);
- addReplyLongLong(c, dstzset->zsl->length);
- server.dirty++;
- } else {
- decrRefCount(dstobj);
- addReply(c, shared.czero);
- }
- zfree(src);
-}
-
-static void zunionstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
-}
-
-static void zinterstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
-}
-
-static void zrangeGenericCommand(redisClient *c, int reverse) {
- robj *o;
- long start;
- long end;
- int withscores = 0;
- int llen;
- int rangelen, j;
- zset *zsetobj;
- zskiplist *zsl;
- zskiplistNode *ln;
- robj *ele;
-
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
-
- if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
- withscores = 1;
- } else if (c->argc >= 5) {
- addReply(c,shared.syntaxerr);
- return;
- }
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_ZSET)) return;
- zsetobj = o->ptr;
- zsl = zsetobj->zsl;
- llen = zsl->length;
-
- /* convert negative indexes */
- if (start < 0) start = llen+start;
- if (end < 0) end = llen+end;
- if (start < 0) start = 0;
- if (end < 0) end = 0;
-
- /* indexes sanity checks */
- if (start > end || start >= llen) {
- /* Out of range start or start > end result in empty list */
- addReply(c,shared.emptymultibulk);
- return;
- }
- if (end >= llen) end = llen-1;
- rangelen = (end-start)+1;
-
- /* check if starting point is trivial, before searching
- * the element in log(N) time */
- if (reverse) {
- ln = start == 0 ? zsl->tail : zslistTypeGetElementByRank(zsl, llen-start);
- } else {
- ln = start == 0 ?
- zsl->header->forward[0] : zslistTypeGetElementByRank(zsl, start+1);
- }
-
- /* Return the result in form of a multi-bulk reply */
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
- withscores ? (rangelen*2) : rangelen));
- for (j = 0; j < rangelen; j++) {
- ele = ln->obj;
- addReplyBulk(c,ele);
- if (withscores)
- addReplyDouble(c,ln->score);
- ln = reverse ? ln->backward : ln->forward[0];
- }
-}
-
-static void zrangeCommand(redisClient *c) {
- zrangeGenericCommand(c,0);
-}
-
-static void zrevrangeCommand(redisClient *c) {
- zrangeGenericCommand(c,1);
-}
-
-/* This command implements both ZRANGEBYSCORE and ZCOUNT.
- * If justcount is non-zero, just the count is returned. */
-static void genericZrangebyscoreCommand(redisClient *c, int justcount) {
- robj *o;
- double min, max;
- int minex = 0, maxex = 0; /* are min or max exclusive? */
- int offset = 0, limit = -1;
- int withscores = 0;
- int badsyntax = 0;
-
- /* Parse the min-max interval. If one of the values is prefixed
- * by the "(" character, it's considered "open". For instance
- * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
- * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
- if (((char*)c->argv[2]->ptr)[0] == '(') {
- min = strtod((char*)c->argv[2]->ptr+1,NULL);
- minex = 1;
- } else {
- min = strtod(c->argv[2]->ptr,NULL);
- }
- if (((char*)c->argv[3]->ptr)[0] == '(') {
- max = strtod((char*)c->argv[3]->ptr+1,NULL);
- maxex = 1;
- } else {
- max = strtod(c->argv[3]->ptr,NULL);
- }
-
- /* Parse "WITHSCORES": note that if the command was called with
- * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
- * enter the following paths to parse WITHSCORES and LIMIT. */
- if (c->argc == 5 || c->argc == 8) {
- if (strcasecmp(c->argv[c->argc-1]->ptr,"withscores") == 0)
- withscores = 1;
- else
- badsyntax = 1;
- }
- if (c->argc != (4 + withscores) && c->argc != (7 + withscores))
- badsyntax = 1;
- if (badsyntax) {
- addReplySds(c,
- sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
- return;
- }
-
- /* Parse "LIMIT" */
- if (c->argc == (7 + withscores) && strcasecmp(c->argv[4]->ptr,"limit")) {
- addReply(c,shared.syntaxerr);
- return;
- } else if (c->argc == (7 + withscores)) {
- offset = atoi(c->argv[5]->ptr);
- limit = atoi(c->argv[6]->ptr);
- if (offset < 0) offset = 0;
- }
-
- /* Ok, lookup the key and get the range */
- o = lookupKeyRead(c->db,c->argv[1]);
- if (o == NULL) {
- addReply(c,justcount ? shared.czero : shared.emptymultibulk);
- } else {
- if (o->type != REDIS_ZSET) {
- addReply(c,shared.wrongtypeerr);
- } else {
- zset *zsetobj = o->ptr;
- zskiplist *zsl = zsetobj->zsl;
- zskiplistNode *ln;
- robj *ele, *lenobj = NULL;
- unsigned long rangelen = 0;
-
- /* Get the first node with the score >= min, or with
- * score > min if 'minex' is true. */
- ln = zslFirstWithScore(zsl,min);
- while (minex && ln && ln->score == min) ln = ln->forward[0];
-
- if (ln == NULL) {
- /* No element matching the speciifed interval */
- addReply(c,justcount ? shared.czero : shared.emptymultibulk);
- return;
- }
-
- /* We don't know in advance how many matching elements there
- * are in the list, so we push this object that will represent
- * the multi-bulk length in the output buffer, and will "fix"
- * it later */
- if (!justcount) {
- lenobj = createObject(REDIS_STRING,NULL);
- addReply(c,lenobj);
- decrRefCount(lenobj);
- }
-
- while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) {
- if (offset) {
- offset--;
- ln = ln->forward[0];
- continue;
- }
- if (limit == 0) break;
- if (!justcount) {
- ele = ln->obj;
- addReplyBulk(c,ele);
- if (withscores)
- addReplyDouble(c,ln->score);
- }
- ln = ln->forward[0];
- rangelen++;
- if (limit > 0) limit--;
- }
- if (justcount) {
- addReplyLongLong(c,(long)rangelen);
- } else {
- lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",
- withscores ? (rangelen*2) : rangelen);
- }
- }
- }
-}
-
-static void zrangebyscoreCommand(redisClient *c) {
- genericZrangebyscoreCommand(c,0);
-}
-
-static void zcountCommand(redisClient *c) {
- genericZrangebyscoreCommand(c,1);
-}
-
-static void zcardCommand(redisClient *c) {
- robj *o;
- zset *zs;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_ZSET)) return;
-
- zs = o->ptr;
- addReplyUlong(c,zs->zsl->length);
-}
-
-static void zscoreCommand(redisClient *c) {
- robj *o;
- zset *zs;
- dictEntry *de;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_ZSET)) return;
-
- zs = o->ptr;
- de = dictFind(zs->dict,c->argv[2]);
- if (!de) {
- addReply(c,shared.nullbulk);
- } else {
- double *score = dictGetEntryVal(de);
-
- addReplyDouble(c,*score);
- }
-}
-
-static void zrankGenericCommand(redisClient *c, int reverse) {
- robj *o;
- zset *zs;
- zskiplist *zsl;
- dictEntry *de;
- unsigned long rank;
- double *score;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_ZSET)) return;
-
- zs = o->ptr;
- zsl = zs->zsl;
- de = dictFind(zs->dict,c->argv[2]);
- if (!de) {
- addReply(c,shared.nullbulk);
- return;
- }
-
- score = dictGetEntryVal(de);
- rank = zslistTypeGetRank(zsl, *score, c->argv[2]);
- if (rank) {
- if (reverse) {
- addReplyLongLong(c, zsl->length - rank);
- } else {
- addReplyLongLong(c, rank-1);
- }
- } else {
- addReply(c,shared.nullbulk);
- }
-}
-
-static void zrankCommand(redisClient *c) {
- zrankGenericCommand(c, 0);
-}
-
-static void zrevrankCommand(redisClient *c) {
- zrankGenericCommand(c, 1);
-}
-
-/* ========================= Hashes utility functions ======================= */
-#define REDIS_HASH_KEY 1
-#define REDIS_HASH_VALUE 2
-
-/* Check the length of a number of objects to see if we need to convert a
- * zipmap to a real hash. Note that we only check string encoded objects
- * as their string length can be queried in constant time. */
-static void hashTypeTryConversion(robj *subject, robj **argv, int start, int end) {
- int i;
- if (subject->encoding != REDIS_ENCODING_ZIPMAP) return;
-
- for (i = start; i <= end; i++) {
- if (argv[i]->encoding == REDIS_ENCODING_RAW &&
- sdslen(argv[i]->ptr) > server.hash_max_zipmap_value)
- {
- convertToRealHash(subject);
- return;
- }
- }
-}
-
-/* Encode given objects in-place when the hash uses a dict. */
-static void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) {
- if (subject->encoding == REDIS_ENCODING_HT) {
- if (o1) *o1 = tryObjectEncoding(*o1);
- if (o2) *o2 = tryObjectEncoding(*o2);
- }
-}
-
-/* Get the value from a hash identified by key. Returns either a string
- * object or NULL if the value cannot be found. The refcount of the object
- * is always increased by 1 when the value was found. */
-static robj *hashTypeGet(robj *o, robj *key) {
- robj *value = NULL;
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- unsigned char *v;
- unsigned int vlen;
- key = getDecodedObject(key);
- if (zipmapGet(o->ptr,key->ptr,sdslen(key->ptr),&v,&vlen)) {
- value = createStringObject((char*)v,vlen);
- }
- decrRefCount(key);
- } else {
- dictEntry *de = dictFind(o->ptr,key);
- if (de != NULL) {
- value = dictGetEntryVal(de);
- incrRefCount(value);
- }
- }
- return value;
-}
-
-/* Test if the key exists in the given hash. Returns 1 if the key
- * exists and 0 when it doesn't. */
-static int hashTypeExists(robj *o, robj *key) {
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- key = getDecodedObject(key);
- if (zipmapExists(o->ptr,key->ptr,sdslen(key->ptr))) {
- decrRefCount(key);
- return 1;
- }
- decrRefCount(key);
- } else {
- if (dictFind(o->ptr,key) != NULL) {
- return 1;
- }
- }
- return 0;
-}
-
-/* Add an element, discard the old if the key already exists.
- * Return 0 on insert and 1 on update. */
-static int hashTypeSet(robj *o, robj *key, robj *value) {
- int update = 0;
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- key = getDecodedObject(key);
- value = getDecodedObject(value);
- o->ptr = zipmapSet(o->ptr,
- key->ptr,sdslen(key->ptr),
- value->ptr,sdslen(value->ptr), &update);
- decrRefCount(key);
- decrRefCount(value);
-
- /* Check if the zipmap needs to be upgraded to a real hash table */
- if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries)
- convertToRealHash(o);
- } else {
- if (dictReplace(o->ptr,key,value)) {
- /* Insert */
- incrRefCount(key);
- } else {
- /* Update */
- update = 1;
- }
- incrRefCount(value);
- }
- return update;
-}
-
-/* Delete an element from a hash.
- * Return 1 on deleted and 0 on not found. */
-static int hashTypeDelete(robj *o, robj *key) {
- int deleted = 0;
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- key = getDecodedObject(key);
- o->ptr = zipmapDel(o->ptr,key->ptr,sdslen(key->ptr), &deleted);
- decrRefCount(key);
- } else {
- deleted = dictDelete((dict*)o->ptr,key) == DICT_OK;
- /* Always check if the dictionary needs a resize after a delete. */
- if (deleted && htNeedsResize(o->ptr)) dictResize(o->ptr);
- }
- return deleted;
-}
-
-/* Return the number of elements in a hash. */
-static unsigned long hashTypeLength(robj *o) {
- return (o->encoding == REDIS_ENCODING_ZIPMAP) ?
- zipmapLen((unsigned char*)o->ptr) : dictSize((dict*)o->ptr);
-}
-
-/* Structure to hold hash iteration abstration. Note that iteration over
- * hashes involves both fields and values. Because it is possible that
- * not both are required, store pointers in the iterator to avoid
- * unnecessary memory allocation for fields/values. */
-typedef struct {
- int encoding;
- unsigned char *zi;
- unsigned char *zk, *zv;
- unsigned int zklen, zvlen;
-
- dictIterator *di;
- dictEntry *de;
-} hashTypeIterator;
-
-static hashTypeIterator *hashTypeInitIterator(robj *subject) {
- hashTypeIterator *hi = zmalloc(sizeof(hashTypeIterator));
- hi->encoding = subject->encoding;
- if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
- hi->zi = zipmapRewind(subject->ptr);
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- hi->di = dictGetIterator(subject->ptr);
- } else {
- redisAssert(NULL);
- }
- return hi;
-}
-
-static void hashTypeReleaseIterator(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_HT) {
- dictReleaseIterator(hi->di);
- }
- zfree(hi);
-}
-
-/* Move to the next entry in the hash. Return REDIS_OK when the next entry
- * could be found and REDIS_ERR when the iterator reaches the end. */
-static int hashTypeNext(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
- if ((hi->zi = zipmapNext(hi->zi, &hi->zk, &hi->zklen,
- &hi->zv, &hi->zvlen)) == NULL) return REDIS_ERR;
- } else {
- if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR;
- }
- return REDIS_OK;
-}
-
-/* Get key or value object at current iteration position.
- * This increases the refcount of the field object by 1. */
-static robj *hashTypeCurrent(hashTypeIterator *hi, int what) {
- robj *o;
- if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
- if (what & REDIS_HASH_KEY) {
- o = createStringObject((char*)hi->zk,hi->zklen);
- } else {
- o = createStringObject((char*)hi->zv,hi->zvlen);
- }
- } else {
- if (what & REDIS_HASH_KEY) {
- o = dictGetEntryKey(hi->de);
- } else {
- o = dictGetEntryVal(hi->de);
- }
- incrRefCount(o);
- }
- return o;
-}
-
-static robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
- robj *o = lookupKeyWrite(c->db,key);
- if (o == NULL) {
- o = createHashObject();
- dbAdd(c->db,key,o);
- } else {
- if (o->type != REDIS_HASH) {
- addReply(c,shared.wrongtypeerr);
- return NULL;
- }
- }
- return o;
-}
-
-/* ============================= Hash commands ============================== */
-static void hsetCommand(redisClient *c) {
- int update;
- robj *o;
-
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- hashTypeTryConversion(o,c->argv,2,3);
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- update = hashTypeSet(o,c->argv[2],c->argv[3]);
- addReply(c, update ? shared.czero : shared.cone);
- server.dirty++;
-}
-
-static void hsetnxCommand(redisClient *c) {
- robj *o;
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- hashTypeTryConversion(o,c->argv,2,3);
-
- if (hashTypeExists(o, c->argv[2])) {
- addReply(c, shared.czero);
- } else {
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- hashTypeSet(o,c->argv[2],c->argv[3]);
- addReply(c, shared.cone);
- server.dirty++;
- }
-}
-
-static void hmsetCommand(redisClient *c) {
- int i;
- robj *o;
-
- if ((c->argc % 2) == 1) {
- addReplySds(c,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
- return;
- }
-
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- hashTypeTryConversion(o,c->argv,2,c->argc-1);
- for (i = 2; i < c->argc; i += 2) {
- hashTypeTryObjectEncoding(o,&c->argv[i], &c->argv[i+1]);
- hashTypeSet(o,c->argv[i],c->argv[i+1]);
- }
- addReply(c, shared.ok);
- server.dirty++;
-}
-
-static void hincrbyCommand(redisClient *c) {
- long long value, incr;
- robj *o, *current, *new;
-
- if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- if ((current = hashTypeGet(o,c->argv[2])) != NULL) {
- if (getLongLongFromObjectOrReply(c,current,&value,
- "hash value is not an integer") != REDIS_OK) {
- decrRefCount(current);
- return;
- }
- decrRefCount(current);
- } else {
- value = 0;
- }
-
- value += incr;
- new = createStringObjectFromLongLong(value);
- hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
- hashTypeSet(o,c->argv[2],new);
- decrRefCount(new);
- addReplyLongLong(c,value);
- server.dirty++;
-}
-
-static void hgetCommand(redisClient *c) {
- robj *o, *value;
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
-
- if ((value = hashTypeGet(o,c->argv[2])) != NULL) {
- addReplyBulk(c,value);
- decrRefCount(value);
- } else {
- addReply(c,shared.nullbulk);
- }
-}
-
-static void hmgetCommand(redisClient *c) {
- int i;
- robj *o, *value;
- o = lookupKeyRead(c->db,c->argv[1]);
- if (o != NULL && o->type != REDIS_HASH) {
- addReply(c,shared.wrongtypeerr);
- }
-
- /* Note the check for o != NULL happens inside the loop. This is
- * done because objects that cannot be found are considered to be
- * an empty hash. The reply should then be a series of NULLs. */
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-2));
- for (i = 2; i < c->argc; i++) {
- if (o != NULL && (value = hashTypeGet(o,c->argv[i])) != NULL) {
- addReplyBulk(c,value);
- decrRefCount(value);
- } else {
- addReply(c,shared.nullbulk);
- }
- }
-}
-
-static void hdelCommand(redisClient *c) {
- robj *o;
- if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
-
- if (hashTypeDelete(o,c->argv[2])) {
- if (hashTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
- addReply(c,shared.cone);
- server.dirty++;
- } else {
- addReply(c,shared.czero);
- }
-}
-
-static void hlenCommand(redisClient *c) {
- robj *o;
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
-
- addReplyUlong(c,hashTypeLength(o));
-}
-
-static void genericHgetallCommand(redisClient *c, int flags) {
- robj *o, *lenobj, *obj;
- unsigned long count = 0;
- hashTypeIterator *hi;
-
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_HASH)) return;
-
- lenobj = createObject(REDIS_STRING,NULL);
- addReply(c,lenobj);
- decrRefCount(lenobj);
-
- hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
- if (flags & REDIS_HASH_KEY) {
- obj = hashTypeCurrent(hi,REDIS_HASH_KEY);
- addReplyBulk(c,obj);
- decrRefCount(obj);
- count++;
- }
- if (flags & REDIS_HASH_VALUE) {
- obj = hashTypeCurrent(hi,REDIS_HASH_VALUE);
- addReplyBulk(c,obj);
- decrRefCount(obj);
- count++;
- }
- }
- hashTypeReleaseIterator(hi);
-
- lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",count);
-}
-
-static void hkeysCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY);
-}
-
-static void hvalsCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_VALUE);
-}
-
-static void hgetallCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY|REDIS_HASH_VALUE);
-}
-
-static void hexistsCommand(redisClient *c) {
- robj *o;
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
-
- addReply(c, hashTypeExists(o,c->argv[2]) ? shared.cone : shared.czero);
-}
-
-static void convertToRealHash(robj *o) {
- unsigned char *key, *val, *p, *zm = o->ptr;
- unsigned int klen, vlen;
- dict *dict = dictCreate(&hashDictType,NULL);
-
- assert(o->type == REDIS_HASH && o->encoding != REDIS_ENCODING_HT);
- p = zipmapRewind(zm);
- while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
- robj *keyobj, *valobj;
-
- keyobj = createStringObject((char*)key,klen);
- valobj = createStringObject((char*)val,vlen);
- keyobj = tryObjectEncoding(keyobj);
- valobj = tryObjectEncoding(valobj);
- dictAdd(dict,keyobj,valobj);
- }
- o->encoding = REDIS_ENCODING_HT;
- o->ptr = dict;
- zfree(zm);
-}
-
-/* ========================= Non type-specific commands ==================== */
-
-static void flushdbCommand(redisClient *c) {
- server.dirty += dictSize(c->db->dict);
- touchWatchedKeysOnFlush(c->db->id);
- dictEmpty(c->db->dict);
- dictEmpty(c->db->expires);
- addReply(c,shared.ok);
-}
-
-static void flushallCommand(redisClient *c) {
- touchWatchedKeysOnFlush(-1);
- server.dirty += emptyDb();
- addReply(c,shared.ok);
- if (server.bgsavechildpid != -1) {
- kill(server.bgsavechildpid,SIGKILL);
- rdbRemoveTempFile(server.bgsavechildpid);
- }
- rdbSave(server.dbfilename);
- server.dirty++;
-}
-
-static redisSortOperation *createSortOperation(int type, robj *pattern) {
- redisSortOperation *so = zmalloc(sizeof(*so));
- so->type = type;
- so->pattern = pattern;
- return so;
-}
-
-/* Return the value associated to the key with a name obtained
- * substituting the first occurence of '*' in 'pattern' with 'subst'.
- * The returned object will always have its refcount increased by 1
- * when it is non-NULL. */
-static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
- char *p, *f;
- sds spat, ssub;
- robj keyobj, fieldobj, *o;
- int prefixlen, sublen, postfixlen, fieldlen;
- /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
- struct {
- long len;
- long free;
- char buf[REDIS_SORTKEY_MAX+1];
- } keyname, fieldname;
-
- /* If the pattern is "#" return the substitution object itself in order
- * to implement the "SORT ... GET #" feature. */
- spat = pattern->ptr;
- if (spat[0] == '#' && spat[1] == '\0') {
- incrRefCount(subst);
- return subst;
- }
-
- /* The substitution object may be specially encoded. If so we create
- * a decoded object on the fly. Otherwise getDecodedObject will just
- * increment the ref count, that we'll decrement later. */
- subst = getDecodedObject(subst);
-
- ssub = subst->ptr;
- if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
- p = strchr(spat,'*');
- if (!p) {
- decrRefCount(subst);
- return NULL;
- }
-
- /* Find out if we're dealing with a hash dereference. */
- if ((f = strstr(p+1, "->")) != NULL) {
- fieldlen = sdslen(spat)-(f-spat);
- /* this also copies \0 character */
- memcpy(fieldname.buf,f+2,fieldlen-1);
- fieldname.len = fieldlen-2;
- } else {
- fieldlen = 0;
- }
-
- prefixlen = p-spat;
- sublen = sdslen(ssub);
- postfixlen = sdslen(spat)-(prefixlen+1)-fieldlen;
- memcpy(keyname.buf,spat,prefixlen);
- memcpy(keyname.buf+prefixlen,ssub,sublen);
- memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
- keyname.buf[prefixlen+sublen+postfixlen] = '\0';
- keyname.len = prefixlen+sublen+postfixlen;
- decrRefCount(subst);
-
- /* Lookup substituted key */
- initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(long)*2));
- o = lookupKeyRead(db,&keyobj);
- if (o == NULL) return NULL;
-
- if (fieldlen > 0) {
- if (o->type != REDIS_HASH || fieldname.len < 1) return NULL;
-
- /* Retrieve value from hash by the field name. This operation
- * already increases the refcount of the returned object. */
- initStaticStringObject(fieldobj,((char*)&fieldname)+(sizeof(long)*2));
- o = hashTypeGet(o, &fieldobj);
- } else {
- if (o->type != REDIS_STRING) return NULL;
-
- /* Every object that this function returns needs to have its refcount
- * increased. sortCommand decreases it again. */
- incrRefCount(o);
- }
-
- return o;
-}
-
-/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
- * the additional parameter is not standard but a BSD-specific we have to
- * pass sorting parameters via the global 'server' structure */
-static int sortCompare(const void *s1, const void *s2) {
- const redisSortObject *so1 = s1, *so2 = s2;
- int cmp;
-
- if (!server.sort_alpha) {
- /* Numeric sorting. Here it's trivial as we precomputed scores */
- if (so1->u.score > so2->u.score) {
- cmp = 1;
- } else if (so1->u.score < so2->u.score) {
- cmp = -1;
- } else {
- cmp = 0;
- }
- } else {
- /* Alphanumeric sorting */
- if (server.sort_bypattern) {
- if (!so1->u.cmpobj || !so2->u.cmpobj) {
- /* At least one compare object is NULL */
- if (so1->u.cmpobj == so2->u.cmpobj)
- cmp = 0;
- else if (so1->u.cmpobj == NULL)
- cmp = -1;
- else
- cmp = 1;
- } else {
- /* We have both the objects, use strcoll */
- cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
- }
- } else {
- /* Compare elements directly. */
- cmp = compareStringObjects(so1->obj,so2->obj);
- }
- }
- return server.sort_desc ? -cmp : cmp;
-}
-
-/* The SORT command is the most complex command in Redis. Warning: this code
- * is optimized for speed and a bit less for readability */
-static void sortCommand(redisClient *c) {
- list *operations;
- unsigned int outputlen = 0;
- int desc = 0, alpha = 0;
- int limit_start = 0, limit_count = -1, start, end;
- int j, dontsort = 0, vectorlen;
- int getop = 0; /* GET operation counter */
- robj *sortval, *sortby = NULL, *storekey = NULL;
- redisSortObject *vector; /* Resulting vector to sort */
-
- /* Lookup the key to sort. It must be of the right types */
- sortval = lookupKeyRead(c->db,c->argv[1]);
- if (sortval == NULL) {
- addReply(c,shared.emptymultibulk);
- return;
- }
- if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
- sortval->type != REDIS_ZSET)
- {
- addReply(c,shared.wrongtypeerr);
- return;
- }
-
- /* Create a list of operations to perform for every sorted element.
- * Operations can be GET/DEL/INCR/DECR */
- operations = listCreate();
- listSetFreeMethod(operations,zfree);
- j = 2;
-
- /* Now we need to protect sortval incrementing its count, in the future
- * SORT may have options able to overwrite/delete keys during the sorting
- * and the sorted key itself may get destroied */
- incrRefCount(sortval);
-
- /* The SORT command has an SQL-alike syntax, parse it */
- while(j < c->argc) {
- int leftargs = c->argc-j-1;
- if (!strcasecmp(c->argv[j]->ptr,"asc")) {
- desc = 0;
- } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
- desc = 1;
- } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
- alpha = 1;
- } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
- limit_start = atoi(c->argv[j+1]->ptr);
- limit_count = atoi(c->argv[j+2]->ptr);
- j+=2;
- } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
- storekey = c->argv[j+1];
- j++;
- } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
- sortby = c->argv[j+1];
- /* If the BY pattern does not contain '*', i.e. it is constant,
- * we don't need to sort nor to lookup the weight keys. */
- if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
- j++;
- } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
- listAddNodeTail(operations,createSortOperation(
- REDIS_SORT_GET,c->argv[j+1]));
- getop++;
- j++;
- } else {
- decrRefCount(sortval);
- listRelease(operations);
- addReply(c,shared.syntaxerr);
- return;
- }
- j++;
- }
-
- /* Load the sorting vector with all the objects to sort */
- switch(sortval->type) {
- case REDIS_LIST: vectorlen = listTypeLength(sortval); break;
- case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
- case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
- default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
- }
- vector = zmalloc(sizeof(redisSortObject)*vectorlen);
- j = 0;
-
- if (sortval->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL);
- listTypeEntry entry;
- while(listTypeNext(li,&entry)) {
- vector[j].obj = listTypeGet(&entry);
- vector[j].u.score = 0;
- vector[j].u.cmpobj = NULL;
- j++;
- }
- listTypeReleaseIterator(li);
- } else {
- dict *set;
- dictIterator *di;
- dictEntry *setele;
-
- if (sortval->type == REDIS_SET) {
- set = sortval->ptr;
- } else {
- zset *zs = sortval->ptr;
- set = zs->dict;
- }
-
- di = dictGetIterator(set);
- while((setele = dictNext(di)) != NULL) {
- vector[j].obj = dictGetEntryKey(setele);
- vector[j].u.score = 0;
- vector[j].u.cmpobj = NULL;
- j++;
- }
- dictReleaseIterator(di);
- }
- redisAssert(j == vectorlen);
-
- /* Now it's time to load the right scores in the sorting vector */
- if (dontsort == 0) {
- for (j = 0; j < vectorlen; j++) {
- robj *byval;
- if (sortby) {
- /* lookup value to sort by */
- byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
- if (!byval) continue;
- } else {
- /* use object itself to sort by */
- byval = vector[j].obj;
- }
-
- if (alpha) {
- if (sortby) vector[j].u.cmpobj = getDecodedObject(byval);
- } else {
- if (byval->encoding == REDIS_ENCODING_RAW) {
- vector[j].u.score = strtod(byval->ptr,NULL);
- } else if (byval->encoding == REDIS_ENCODING_INT) {
- /* Don't need to decode the object if it's
- * integer-encoded (the only encoding supported) so
- * far. We can just cast it */
- vector[j].u.score = (long)byval->ptr;
- } else {
- redisAssert(1 != 1);
- }
- }
-
- /* when the object was retrieved using lookupKeyByPattern,
- * its refcount needs to be decreased. */
- if (sortby) {
- decrRefCount(byval);
- }
- }
- }
-
- /* We are ready to sort the vector... perform a bit of sanity check
- * on the LIMIT option too. We'll use a partial version of quicksort. */
- start = (limit_start < 0) ? 0 : limit_start;
- end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
- if (start >= vectorlen) {
- start = vectorlen-1;
- end = vectorlen-2;
- }
- if (end >= vectorlen) end = vectorlen-1;
-
- if (dontsort == 0) {
- server.sort_desc = desc;
- server.sort_alpha = alpha;
- server.sort_bypattern = sortby ? 1 : 0;
- if (sortby && (start != 0 || end != vectorlen-1))
- pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
- else
- qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
- }
-
- /* Send command output to the output buffer, performing the specified
- * GET/DEL/INCR/DECR operations if any. */
- outputlen = getop ? getop*(end-start+1) : end-start+1;
- if (storekey == NULL) {
- /* STORE option not specified, sent the sorting result to client */
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
- for (j = start; j <= end; j++) {
- listNode *ln;
- listIter li;
-
- if (!getop) addReplyBulk(c,vector[j].obj);
- listRewind(operations,&li);
- while((ln = listNext(&li))) {
- redisSortOperation *sop = ln->value;
- robj *val = lookupKeyByPattern(c->db,sop->pattern,
- vector[j].obj);
-
- if (sop->type == REDIS_SORT_GET) {
- if (!val) {
- addReply(c,shared.nullbulk);
- } else {
- addReplyBulk(c,val);
- decrRefCount(val);
- }
- } else {
- redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
- }
- }
- }
- } else {
- robj *sobj = createZiplistObject();
-
- /* STORE option specified, set the sorting result as a List object */
- for (j = start; j <= end; j++) {
- listNode *ln;
- listIter li;
-
- if (!getop) {
- listTypePush(sobj,vector[j].obj,REDIS_TAIL);
- } else {
- listRewind(operations,&li);
- while((ln = listNext(&li))) {
- redisSortOperation *sop = ln->value;
- robj *val = lookupKeyByPattern(c->db,sop->pattern,
- vector[j].obj);
-
- if (sop->type == REDIS_SORT_GET) {
- if (!val) val = createStringObject("",0);
-
- /* listTypePush does an incrRefCount, so we should take care
- * care of the incremented refcount caused by either
- * lookupKeyByPattern or createStringObject("",0) */
- listTypePush(sobj,val,REDIS_TAIL);
- decrRefCount(val);
- } else {
- /* always fails */
- redisAssert(sop->type == REDIS_SORT_GET);
- }
- }
- }
- }
- dbReplace(c->db,storekey,sobj);
- /* Note: we add 1 because the DB is dirty anyway since even if the
- * SORT result is empty a new key is set and maybe the old content
- * replaced. */
- server.dirty += 1+outputlen;
- addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
- }
-
- /* Cleanup */
- if (sortval->type == REDIS_LIST)
- for (j = 0; j < vectorlen; j++)
- decrRefCount(vector[j].obj);
- decrRefCount(sortval);
- listRelease(operations);
- for (j = 0; j < vectorlen; j++) {
- if (alpha && vector[j].u.cmpobj)
- decrRefCount(vector[j].u.cmpobj);
- }
- zfree(vector);
-}
-
-/* Convert an amount of bytes into a human readable string in the form
- * of 100B, 2G, 100M, 4K, and so forth. */
-static void bytesToHuman(char *s, unsigned long long n) {
- double d;
-
- if (n < 1024) {
- /* Bytes */
- sprintf(s,"%lluB",n);
- return;
- } else if (n < (1024*1024)) {
- d = (double)n/(1024);
- sprintf(s,"%.2fK",d);
- } else if (n < (1024LL*1024*1024)) {
- d = (double)n/(1024*1024);
- sprintf(s,"%.2fM",d);
- } else if (n < (1024LL*1024*1024*1024)) {
- d = (double)n/(1024LL*1024*1024);
- sprintf(s,"%.2fG",d);
- }
-}
-
-/* Create the string returned by the INFO command. This is decoupled
- * by the INFO command itself as we need to report the same information
- * on memory corruption problems. */
-static sds genRedisInfoString(void) {
- sds info;
- time_t uptime = time(NULL)-server.stat_starttime;
- int j;
- char hmem[64];
-
- bytesToHuman(hmem,zmalloc_used_memory());
- info = sdscatprintf(sdsempty(),
- "redis_version:%s\r\n"
- "redis_git_sha1:%s\r\n"
- "redis_git_dirty:%d\r\n"
- "arch_bits:%s\r\n"
- "multiplexing_api:%s\r\n"
- "process_id:%ld\r\n"
- "uptime_in_seconds:%ld\r\n"
- "uptime_in_days:%ld\r\n"
- "connected_clients:%d\r\n"
- "connected_slaves:%d\r\n"
- "blocked_clients:%d\r\n"
- "used_memory:%zu\r\n"
- "used_memory_human:%s\r\n"
- "changes_since_last_save:%lld\r\n"
- "bgsave_in_progress:%d\r\n"
- "last_save_time:%ld\r\n"
- "bgrewriteaof_in_progress:%d\r\n"
- "total_connections_received:%lld\r\n"
- "total_commands_processed:%lld\r\n"
- "expired_keys:%lld\r\n"
- "hash_max_zipmap_entries:%zu\r\n"
- "hash_max_zipmap_value:%zu\r\n"
- "pubsub_channels:%ld\r\n"
- "pubsub_patterns:%u\r\n"
- "vm_enabled:%d\r\n"
- "role:%s\r\n"
- ,REDIS_VERSION,
- redisGitSHA1(),
- strtol(redisGitDirty(),NULL,10) > 0,
- (sizeof(long) == 8) ? "64" : "32",
- aeGetApiName(),
- (long) getpid(),
- uptime,
- uptime/(3600*24),
- listLength(server.clients)-listLength(server.slaves),
- listLength(server.slaves),
- server.blpop_blocked_clients,
- zmalloc_used_memory(),
- hmem,
- server.dirty,
- server.bgsavechildpid != -1,
- server.lastsave,
- server.bgrewritechildpid != -1,
- server.stat_numconnections,
- server.stat_numcommands,
- server.stat_expiredkeys,
- server.hash_max_zipmap_entries,
- server.hash_max_zipmap_value,
- dictSize(server.pubsub_channels),
- listLength(server.pubsub_patterns),
- server.vm_enabled != 0,
- server.masterhost == NULL ? "master" : "slave"
- );
- if (server.masterhost) {
- info = sdscatprintf(info,
- "master_host:%s\r\n"
- "master_port:%d\r\n"
- "master_link_status:%s\r\n"
- "master_last_io_seconds_ago:%d\r\n"
- ,server.masterhost,
- server.masterport,
- (server.replstate == REDIS_REPL_CONNECTED) ?
- "up" : "down",
- server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
- );
- }
- if (server.vm_enabled) {
- lockThreadedIO();
- info = sdscatprintf(info,
- "vm_conf_max_memory:%llu\r\n"
- "vm_conf_page_size:%llu\r\n"
- "vm_conf_pages:%llu\r\n"
- "vm_stats_used_pages:%llu\r\n"
- "vm_stats_swapped_objects:%llu\r\n"
- "vm_stats_swappin_count:%llu\r\n"
- "vm_stats_swappout_count:%llu\r\n"
- "vm_stats_io_newjobs_len:%lu\r\n"
- "vm_stats_io_processing_len:%lu\r\n"
- "vm_stats_io_processed_len:%lu\r\n"
- "vm_stats_io_active_threads:%lu\r\n"
- "vm_stats_blocked_clients:%lu\r\n"
- ,(unsigned long long) server.vm_max_memory,
- (unsigned long long) server.vm_page_size,
- (unsigned long long) server.vm_pages,
- (unsigned long long) server.vm_stats_used_pages,
- (unsigned long long) server.vm_stats_swapped_objects,
- (unsigned long long) server.vm_stats_swapins,
- (unsigned long long) server.vm_stats_swapouts,
- (unsigned long) listLength(server.io_newjobs),
- (unsigned long) listLength(server.io_processing),
- (unsigned long) listLength(server.io_processed),
- (unsigned long) server.io_active_threads,
- (unsigned long) server.vm_blocked_clients
- );
- unlockThreadedIO();
- }
- for (j = 0; j < server.dbnum; j++) {
- long long keys, vkeys;
-
- keys = dictSize(server.db[j].dict);
- vkeys = dictSize(server.db[j].expires);
- if (keys || vkeys) {
- info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
- j, keys, vkeys);
- }
- }
- return info;
-}
-
-static void infoCommand(redisClient *c) {
- sds info = genRedisInfoString();
- addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
- (unsigned long)sdslen(info)));
- addReplySds(c,info);
- addReply(c,shared.crlf);
-}
-
-static void monitorCommand(redisClient *c) {
- /* ignore MONITOR if aleady slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
-
- c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
- c->slaveseldb = 0;
- listAddNodeTail(server.monitors,c);
- addReply(c,shared.ok);
-}
-
-/* ================================= Expire ================================= */
-static int removeExpire(redisDb *db, robj *key) {
- /* An expire may only be removed if there is a corresponding entry in the
- * main dict. Otherwise, the key will never be freed. */
- redisAssert(dictFind(db->dict,key->ptr) != NULL);
- if (dictDelete(db->expires,key->ptr) == DICT_OK) {
- return 1;
- } else {
- return 0;
- }
-}
-
-static int setExpire(redisDb *db, robj *key, time_t when) {
- dictEntry *de;
-
- /* Reuse the sds from the main dict in the expire dict */
- redisAssert((de = dictFind(db->dict,key->ptr)) != NULL);
- if (dictAdd(db->expires,dictGetEntryKey(de),(void*)when) == DICT_ERR) {
- return 0;
- } else {
- return 1;
- }
-}
-
-/* Return the expire time of the specified key, or -1 if no expire
- * is associated with this key (i.e. the key is non volatile) */
-static time_t getExpire(redisDb *db, robj *key) {
- dictEntry *de;
-
- /* No expire? return ASAP */
- if (dictSize(db->expires) == 0 ||
- (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
-
- /* The entry was found in the expire dict, this means it should also
- * be present in the main dict (safety check). */
- redisAssert(dictFind(db->dict,key->ptr) != NULL);
- return (time_t) dictGetEntryVal(de);
-}
-
-static int expireIfNeeded(redisDb *db, robj *key) {
- time_t when = getExpire(db,key);
- if (when < 0) return 0;
-
- /* Return when this key has not expired */
- if (time(NULL) <= when) return 0;
-
- /* Delete the key */
- server.stat_expiredkeys++;
- server.dirty++;
- return dbDelete(db,key);
-}
-
-static int deleteIfVolatile(redisDb *db, robj *key) {
- if (getExpire(db,key) < 0) return 0;
-
- /* Delete the key */
- server.stat_expiredkeys++;
- server.dirty++;
- return dbDelete(db,key);
-}
-
-static void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
- dictEntry *de;
- time_t seconds;
-
- if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
-
- seconds -= offset;
-
- de = dictFind(c->db->dict,key->ptr);
- if (de == NULL) {
- addReply(c,shared.czero);
- return;
- }
- if (seconds <= 0) {
- if (dbDelete(c->db,key)) server.dirty++;
- addReply(c, shared.cone);
- return;
- } else {
- time_t when = time(NULL)+seconds;
- if (setExpire(c->db,key,when)) {
- addReply(c,shared.cone);
- server.dirty++;
- } else {
- addReply(c,shared.czero);
- }
- return;
- }
-}
-
-static void expireCommand(redisClient *c) {
- expireGenericCommand(c,c->argv[1],c->argv[2],0);
-}
-
-static void expireatCommand(redisClient *c) {
- expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
-}
-
-static void ttlCommand(redisClient *c) {
- time_t expire;
- int ttl = -1;
-
- expire = getExpire(c->db,c->argv[1]);
- if (expire != -1) {
- ttl = (int) (expire-time(NULL));
- if (ttl < 0) ttl = -1;
- }
- addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
-}
-
-/* ================================ MULTI/EXEC ============================== */
-
-/* Client state initialization for MULTI/EXEC */
-static void initClientMultiState(redisClient *c) {
- c->mstate.commands = NULL;
- c->mstate.count = 0;
-}
-
-/* Release all the resources associated with MULTI/EXEC state */
-static void freeClientMultiState(redisClient *c) {
- int j;
-
- for (j = 0; j < c->mstate.count; j++) {
- int i;
- multiCmd *mc = c->mstate.commands+j;
-
- for (i = 0; i < mc->argc; i++)
- decrRefCount(mc->argv[i]);
- zfree(mc->argv);
- }
- zfree(c->mstate.commands);
-}
-
-/* Add a new command into the MULTI commands queue */
-static void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
- multiCmd *mc;
- int j;
-
- c->mstate.commands = zrealloc(c->mstate.commands,
- sizeof(multiCmd)*(c->mstate.count+1));
- mc = c->mstate.commands+c->mstate.count;
- mc->cmd = cmd;
- mc->argc = c->argc;
- mc->argv = zmalloc(sizeof(robj*)*c->argc);
- memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
- for (j = 0; j < c->argc; j++)
- incrRefCount(mc->argv[j]);
- c->mstate.count++;
-}
-
-static void multiCommand(redisClient *c) {
- if (c->flags & REDIS_MULTI) {
- addReplySds(c,sdsnew("-ERR MULTI calls can not be nested\r\n"));
- return;
- }
- c->flags |= REDIS_MULTI;
- addReply(c,shared.ok);
-}
-
-static void discardCommand(redisClient *c) {
- if (!(c->flags & REDIS_MULTI)) {
- addReplySds(c,sdsnew("-ERR DISCARD without MULTI\r\n"));
- return;
- }
-
- freeClientMultiState(c);
- initClientMultiState(c);
- c->flags &= (~REDIS_MULTI);
- unwatchAllKeys(c);
- addReply(c,shared.ok);
-}
-
-/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
- * implememntation for more information. */
-static void execCommandReplicateMulti(redisClient *c) {
- struct redisCommand *cmd;
- robj *multistring = createStringObject("MULTI",5);
-
- cmd = lookupCommand("multi");
- if (server.appendonly)
- feedAppendOnlyFile(cmd,c->db->id,&multistring,1);
- if (listLength(server.slaves))
- replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
- decrRefCount(multistring);
-}
-
-static void execCommand(redisClient *c) {
- int j;
- robj **orig_argv;
- int orig_argc;
-
- if (!(c->flags & REDIS_MULTI)) {
- addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
- return;
- }
-
- /* Check if we need to abort the EXEC if some WATCHed key was touched.
- * A failed EXEC will return a multi bulk nil object. */
- if (c->flags & REDIS_DIRTY_CAS) {
- freeClientMultiState(c);
- initClientMultiState(c);
- c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
- unwatchAllKeys(c);
- addReply(c,shared.nullmultibulk);
- return;
- }
-
- /* Replicate a MULTI request now that we are sure the block is executed.
- * This way we'll deliver the MULTI/..../EXEC block as a whole and
- * both the AOF and the replication link will have the same consistency
- * and atomicity guarantees. */
- execCommandReplicateMulti(c);
-
- /* Exec all the queued commands */
- unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
- orig_argv = c->argv;
- orig_argc = c->argc;
- addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
- for (j = 0; j < c->mstate.count; j++) {
- c->argc = c->mstate.commands[j].argc;
- c->argv = c->mstate.commands[j].argv;
- call(c,c->mstate.commands[j].cmd);
- }
- c->argv = orig_argv;
- c->argc = orig_argc;
- freeClientMultiState(c);
- initClientMultiState(c);
- c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
- /* Make sure the EXEC command is always replicated / AOF, since we
- * always send the MULTI command (we can't know beforehand if the
- * next operations will contain at least a modification to the DB). */
- server.dirty++;
-}
-
-/* =========================== Blocking Operations ========================= */
-
-/* Currently Redis blocking operations support is limited to list POP ops,
- * so the current implementation is not fully generic, but it is also not
- * completely specific so it will not require a rewrite to support new
- * kind of blocking operations in the future.
- *
- * Still it's important to note that list blocking operations can be already
- * used as a notification mechanism in order to implement other blocking
- * operations at application level, so there must be a very strong evidence
- * of usefulness and generality before new blocking operations are implemented.
- *
- * This is how the current blocking POP works, we use BLPOP as example:
- * - If the user calls BLPOP and the key exists and contains a non empty list
- * then LPOP is called instead. So BLPOP is semantically the same as LPOP
- * if there is not to block.
- * - If instead BLPOP is called and the key does not exists or the list is
- * empty we need to block. In order to do so we remove the notification for
- * new data to read in the client socket (so that we'll not serve new
- * requests if the blocking request is not served). Also we put the client
- * in a dictionary (db->blocking_keys) mapping keys to a list of clients
- * blocking for this keys.
- * - If a PUSH operation against a key with blocked clients waiting is
- * performed, we serve the first in the list: basically instead to push
- * the new element inside the list we return it to the (first / oldest)
- * blocking client, unblock the client, and remove it form the list.
- *
- * The above comment and the source code should be enough in order to understand
- * the implementation and modify / fix it later.
- */
-
-/* Set a client in blocking mode for the specified key, with the specified
- * timeout */
-static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
- dictEntry *de;
- list *l;
- int j;
-
- c->blocking_keys = zmalloc(sizeof(robj*)*numkeys);
- c->blocking_keys_num = numkeys;
- c->blockingto = timeout;
- for (j = 0; j < numkeys; j++) {
- /* Add the key in the client structure, to map clients -> keys */
- c->blocking_keys[j] = keys[j];
- incrRefCount(keys[j]);
-
- /* And in the other "side", to map keys -> clients */
- de = dictFind(c->db->blocking_keys,keys[j]);
- if (de == NULL) {
- int retval;
-
- /* For every key we take a list of clients blocked for it */
- l = listCreate();
- retval = dictAdd(c->db->blocking_keys,keys[j],l);
- incrRefCount(keys[j]);
- assert(retval == DICT_OK);
- } else {
- l = dictGetEntryVal(de);
- }
- listAddNodeTail(l,c);
- }
- /* Mark the client as a blocked client */
- c->flags |= REDIS_BLOCKED;
- server.blpop_blocked_clients++;
-}
-
-/* Unblock a client that's waiting in a blocking operation such as BLPOP */
-static void unblockClientWaitingData(redisClient *c) {
- dictEntry *de;
- list *l;
- int j;
-
- assert(c->blocking_keys != NULL);
- /* The client may wait for multiple keys, so unblock it for every key. */
- for (j = 0; j < c->blocking_keys_num; j++) {
- /* Remove this client from the list of clients waiting for this key. */
- de = dictFind(c->db->blocking_keys,c->blocking_keys[j]);
- assert(de != NULL);
- l = dictGetEntryVal(de);
- listDelNode(l,listSearchKey(l,c));
- /* If the list is empty we need to remove it to avoid wasting memory */
- if (listLength(l) == 0)
- dictDelete(c->db->blocking_keys,c->blocking_keys[j]);
- decrRefCount(c->blocking_keys[j]);
- }
- /* Cleanup the client structure */
- zfree(c->blocking_keys);
- c->blocking_keys = NULL;
- c->flags &= (~REDIS_BLOCKED);
- server.blpop_blocked_clients--;
- /* We want to process data if there is some command waiting
- * in the input buffer. Note that this is safe even if
- * unblockClientWaitingData() gets called from freeClient() because
- * freeClient() will be smart enough to call this function
- * *after* c->querybuf was set to NULL. */
- if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
-}
-
-/* This should be called from any function PUSHing into lists.
- * 'c' is the "pushing client", 'key' is the key it is pushing data against,
- * 'ele' is the element pushed.
- *
- * If the function returns 0 there was no client waiting for a list push
- * against this key.
- *
- * If the function returns 1 there was a client waiting for a list push
- * against this key, the element was passed to this client thus it's not
- * needed to actually add it to the list and the caller should return asap. */
-static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
- struct dictEntry *de;
- redisClient *receiver;
- list *l;
- listNode *ln;
-
- de = dictFind(c->db->blocking_keys,key);
- if (de == NULL) return 0;
- l = dictGetEntryVal(de);
- ln = listFirst(l);
- assert(ln != NULL);
- receiver = ln->value;
-
- addReplySds(receiver,sdsnew("*2\r\n"));
- addReplyBulk(receiver,key);
- addReplyBulk(receiver,ele);
- unblockClientWaitingData(receiver);
- return 1;
-}
-
-/* Blocking RPOP/LPOP */
-static void blockingPopGenericCommand(redisClient *c, int where) {
- robj *o;
- time_t timeout;
- int j;
-
- for (j = 1; j < c->argc-1; j++) {
- o = lookupKeyWrite(c->db,c->argv[j]);
- if (o != NULL) {
- if (o->type != REDIS_LIST) {
- addReply(c,shared.wrongtypeerr);
- return;
- } else {
- if (listTypeLength(o) != 0) {
- /* If the list contains elements fall back to the usual
- * non-blocking POP operation */
- robj *argv[2], **orig_argv;
- int orig_argc;
-
- /* We need to alter the command arguments before to call
- * popGenericCommand() as the command takes a single key. */
- orig_argv = c->argv;
- orig_argc = c->argc;
- argv[1] = c->argv[j];
- c->argv = argv;
- c->argc = 2;
-
- /* Also the return value is different, we need to output
- * the multi bulk reply header and the key name. The
- * "real" command will add the last element (the value)
- * for us. If this souds like an hack to you it's just
- * because it is... */
- addReplySds(c,sdsnew("*2\r\n"));
- addReplyBulk(c,argv[1]);
- popGenericCommand(c,where);
-
- /* Fix the client structure with the original stuff */
- c->argv = orig_argv;
- c->argc = orig_argc;
- return;
- }
- }
- }
- }
- /* If the list is empty or the key does not exists we must block */
- timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
- if (timeout > 0) timeout += time(NULL);
- blockForKeys(c,c->argv+1,c->argc-2,timeout);
-}
-
-static void blpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_HEAD);
-}
-
-static void brpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_TAIL);
-}
-
-/* =============================== Replication ============================= */
-
-static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
- ssize_t nwritten, ret = size;
- time_t start = time(NULL);
-
- timeout++;
- while(size) {
- if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
- nwritten = write(fd,ptr,size);
- if (nwritten == -1) return -1;
- ptr += nwritten;
- size -= nwritten;
- }
- if ((time(NULL)-start) > timeout) {
- errno = ETIMEDOUT;
- return -1;
- }
- }
- return ret;
-}
-
-static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
- ssize_t nread, totread = 0;
- time_t start = time(NULL);
-
- timeout++;
- while(size) {
- if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
- nread = read(fd,ptr,size);
- if (nread == -1) return -1;
- ptr += nread;
- size -= nread;
- totread += nread;
- }
- if ((time(NULL)-start) > timeout) {
- errno = ETIMEDOUT;
- return -1;
- }
- }
- return totread;
-}
-
-static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
- ssize_t nread = 0;
-
- size--;
- while(size) {
- char c;
-
- if (syncRead(fd,&c,1,timeout) == -1) return -1;
- if (c == '\n') {
- *ptr = '\0';
- if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
- return nread;
- } else {
- *ptr++ = c;
- *ptr = '\0';
- nread++;
- }
- }
- return nread;
-}
-
-static void syncCommand(redisClient *c) {
- /* ignore SYNC if aleady slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
-
- /* SYNC can't be issued when the server has pending data to send to
- * the client about already issued commands. We need a fresh reply
- * buffer registering the differences between the BGSAVE and the current
- * dataset, so that we can copy to other slaves if needed. */
- if (listLength(c->reply) != 0) {
- addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
- return;
- }
-
- redisLog(REDIS_NOTICE,"Slave ask for synchronization");
- /* Here we need to check if there is a background saving operation
- * in progress, or if it is required to start one */
- if (server.bgsavechildpid != -1) {
- /* Ok a background save is in progress. Let's check if it is a good
- * one for replication, i.e. if there is another slave that is
- * registering differences since the server forked to save */
- redisClient *slave;
- listNode *ln;
- listIter li;
-
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
- }
- if (ln) {
- /* Perfect, the server is already registering differences for
- * another slave. Set the right state, and copy the buffer. */
- listRelease(c->reply);
- c->reply = listDup(slave->reply);
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
- } else {
- /* No way, we need to wait for the next BGSAVE in order to
- * register differences */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
- redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
- }
- } else {
- /* Ok we don't have a BGSAVE in progress, let's start one */
- redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
- if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
- redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
- addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
- return;
- }
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- }
- c->repldbfd = -1;
- c->flags |= REDIS_SLAVE;
- c->slaveseldb = 0;
- listAddNodeTail(server.slaves,c);
- return;
-}
-
-static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *slave = privdata;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- char buf[REDIS_IOBUF_LEN];
- ssize_t nwritten, buflen;
-
- if (slave->repldboff == 0) {
- /* Write the bulk write count before to transfer the DB. In theory here
- * we don't know how much room there is in the output buffer of the
- * socket, but in pratice SO_SNDLOWAT (the minimum count for output
- * operations) will never be smaller than the few bytes we need. */
- sds bulkcount;
-
- bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
- slave->repldbsize);
- if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
- {
- sdsfree(bulkcount);
- freeClient(slave);
- return;
- }
- sdsfree(bulkcount);
- }
- lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
- buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
- if (buflen <= 0) {
- redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
- (buflen == 0) ? "premature EOF" : strerror(errno));
- freeClient(slave);
- return;
- }
- if ((nwritten = write(fd,buf,buflen)) == -1) {
- redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
- strerror(errno));
- freeClient(slave);
- return;
- }
- slave->repldboff += nwritten;
- if (slave->repldboff == slave->repldbsize) {
- close(slave->repldbfd);
- slave->repldbfd = -1;
- aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- slave->replstate = REDIS_REPL_ONLINE;
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
- sendReplyToClient, slave) == AE_ERR) {
- freeClient(slave);
- return;
- }
- addReplySds(slave,sdsempty());
- redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
- }
-}
-
-/* This function is called at the end of every backgrond saving.
- * The argument bgsaveerr is REDIS_OK if the background saving succeeded
- * otherwise REDIS_ERR is passed to the function.
- *
- * The goal of this function is to handle slaves waiting for a successful
- * background saving in order to perform non-blocking synchronization. */
-static void updateSlavesWaitingBgsave(int bgsaveerr) {
- listNode *ln;
- int startbgsave = 0;
- listIter li;
-
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
-
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
- startbgsave = 1;
- slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
- struct redis_stat buf;
-
- if (bgsaveerr != REDIS_OK) {
- freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
- continue;
- }
- if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
- redis_fstat(slave->repldbfd,&buf) == -1) {
- freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
- continue;
- }
- slave->repldboff = 0;
- slave->repldbsize = buf.st_size;
- slave->replstate = REDIS_REPL_SEND_BULK;
- aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
- freeClient(slave);
- continue;
- }
- }
- }
- if (startbgsave) {
- if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
- listIter li;
-
- listRewind(server.slaves,&li);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
-
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
- freeClient(slave);
- }
- }
- }
-}
-
-static int syncWithMaster(void) {
- char buf[1024], tmpfile[256], authcmd[1024];
- long dumpsize;
- int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
- int dfd, maxtries = 5;
-
- if (fd == -1) {
- redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
- strerror(errno));
- return REDIS_ERR;
- }
-
- /* AUTH with the master if required. */
- if(server.masterauth) {
- snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
- if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
- close(fd);
- redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- /* Read the AUTH result. */
- if (syncReadLine(fd,buf,1024,3600) == -1) {
- close(fd);
- redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- if (buf[0] != '+') {
- close(fd);
- redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
- return REDIS_ERR;
- }
- }
-
- /* Issue the SYNC command */
- if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
- close(fd);
- redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- /* Read the bulk write count */
- if (syncReadLine(fd,buf,1024,3600) == -1) {
- close(fd);
- redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- if (buf[0] != '$') {
- close(fd);
- redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
- return REDIS_ERR;
- }
- dumpsize = strtol(buf+1,NULL,10);
- redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
- /* Read the bulk write data on a temp file */
- while(maxtries--) {
- snprintf(tmpfile,256,
- "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
- dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
- if (dfd != -1) break;
- sleep(1);
- }
- if (dfd == -1) {
- close(fd);
- redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
- return REDIS_ERR;
- }
- while(dumpsize) {
- int nread, nwritten;
-
- nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
- if (nread == -1) {
- redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
- strerror(errno));
- close(fd);
- close(dfd);
- return REDIS_ERR;
- }
- nwritten = write(dfd,buf,nread);
- if (nwritten == -1) {
- redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
- close(fd);
- close(dfd);
- return REDIS_ERR;
- }
- dumpsize -= nread;
- }
- close(dfd);
- if (rename(tmpfile,server.dbfilename) == -1) {
- redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
- unlink(tmpfile);
- close(fd);
- return REDIS_ERR;
- }
- emptyDb();
- if (rdbLoad(server.dbfilename) != REDIS_OK) {
- redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
- close(fd);
- return REDIS_ERR;
- }
- server.master = createClient(fd);
- server.master->flags |= REDIS_MASTER;
- server.master->authenticated = 1;
- server.replstate = REDIS_REPL_CONNECTED;
- return REDIS_OK;
-}
-
-static void slaveofCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"no") &&
- !strcasecmp(c->argv[2]->ptr,"one")) {
- if (server.masterhost) {
- sdsfree(server.masterhost);
- server.masterhost = NULL;
- if (server.master) freeClient(server.master);
- server.replstate = REDIS_REPL_NONE;
- redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
- }
- } else {
- sdsfree(server.masterhost);
- server.masterhost = sdsdup(c->argv[1]->ptr);
- server.masterport = atoi(c->argv[2]->ptr);
- if (server.master) freeClient(server.master);
- server.replstate = REDIS_REPL_CONNECT;
- redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
- server.masterhost, server.masterport);
- }
- addReply(c,shared.ok);
-}
-
-/* ============================ Maxmemory directive ======================== */
-
-/* Try to free one object form the pre-allocated objects free list.
- * This is useful under low mem conditions as by default we take 1 million
- * free objects allocated. On success REDIS_OK is returned, otherwise
- * REDIS_ERR. */
-static int tryFreeOneObjectFromFreelist(void) {
- robj *o;
-
- if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
- if (listLength(server.objfreelist)) {
- listNode *head = listFirst(server.objfreelist);
- o = listNodeValue(head);
- listDelNode(server.objfreelist,head);
- if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
- zfree(o);
- return REDIS_OK;
- } else {
- if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
- return REDIS_ERR;
- }
-}
-
-/* This function gets called when 'maxmemory' is set on the config file to limit
- * the max memory used by the server, and we are out of memory.
- * This function will try to, in order:
- *
- * - Free objects from the free list
- * - Try to remove keys with an EXPIRE set
- *
- * It is not possible to free enough memory to reach used-memory < maxmemory
- * the server will start refusing commands that will enlarge even more the
- * memory usage.
- */
-static void freeMemoryIfNeeded(void) {
- while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
- int j, k, freed = 0;
-
- if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
- for (j = 0; j < server.dbnum; j++) {
- int minttl = -1;
- robj *minkey = NULL;
- struct dictEntry *de;
-
- if (dictSize(server.db[j].expires)) {
- freed = 1;
- /* From a sample of three keys drop the one nearest to
- * the natural expire */
- for (k = 0; k < 3; k++) {
- time_t t;
-
- de = dictGetRandomKey(server.db[j].expires);
- t = (time_t) dictGetEntryVal(de);
- if (minttl == -1 || t < minttl) {
- minkey = dictGetEntryKey(de);
- minttl = t;
- }
- }
- dbDelete(server.db+j,minkey);
- }
- }
- if (!freed) return; /* nothing to free... */
- }
-}
-
-/* ============================== Append Only file ========================== */
-
-/* Called when the user switches from "appendonly yes" to "appendonly no"
- * at runtime using the CONFIG command. */
-static void stopAppendOnly(void) {
- flushAppendOnlyFile();
- aof_fsync(server.appendfd);
- close(server.appendfd);
-
- server.appendfd = -1;
- server.appendseldb = -1;
- server.appendonly = 0;
- /* rewrite operation in progress? kill it, wait child exit */
- if (server.bgsavechildpid != -1) {
- int statloc;
-
- if (kill(server.bgsavechildpid,SIGKILL) != -1)
- wait3(&statloc,0,NULL);
- /* reset the buffer accumulating changes while the child saves */
- sdsfree(server.bgrewritebuf);
- server.bgrewritebuf = sdsempty();
- server.bgsavechildpid = -1;
- }
-}
-
-/* Called when the user switches from "appendonly no" to "appendonly yes"
- * at runtime using the CONFIG command. */
-static int startAppendOnly(void) {
- server.appendonly = 1;
- server.lastfsync = time(NULL);
- server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
- if (server.appendfd == -1) {
- redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno));
- return REDIS_ERR;
- }
- if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
- server.appendonly = 0;
- close(server.appendfd);
- redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno));
- return REDIS_ERR;
- }
- return REDIS_OK;
-}
-
-/* Write the append only file buffer on disk.
- *
- * Since we are required to write the AOF before replying to the client,
- * and the only way the client socket can get a write is entering when the
- * the event loop, we accumulate all the AOF writes in a memory
- * buffer and write it on disk using this function just before entering
- * the event loop again. */
-static void flushAppendOnlyFile(void) {
- time_t now;
- ssize_t nwritten;
-
- if (sdslen(server.aofbuf) == 0) return;
-
- /* We want to perform a single write. This should be guaranteed atomic
- * at least if the filesystem we are writing is a real physical one.
- * While this will save us against the server being killed I don't think
- * there is much to do about the whole server stopping for power problems
- * or alike */
- nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf));
- if (nwritten != (signed)sdslen(server.aofbuf)) {
- /* Ooops, we are in troubles. The best thing to do for now is
- * aborting instead of giving the illusion that everything is
- * working as expected. */
- if (nwritten == -1) {
- redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
- } else {
- redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
- }
- exit(1);
- }
- sdsfree(server.aofbuf);
- server.aofbuf = sdsempty();
-
- /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
- * childs performing heavy I/O on disk. */
- if (server.no_appendfsync_on_rewrite &&
- (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1))
- return;
- /* Fsync if needed */
- now = time(NULL);
- if (server.appendfsync == APPENDFSYNC_ALWAYS ||
- (server.appendfsync == APPENDFSYNC_EVERYSEC &&
- now-server.lastfsync > 1))
- {
- /* aof_fsync is defined as fdatasync() for Linux in order to avoid
- * flushing metadata. */
- aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
- server.lastfsync = now;
- }
-}
-
-static sds catAppendOnlyGenericCommand(sds buf, int argc, robj **argv) {
- int j;
- buf = sdscatprintf(buf,"*%d\r\n",argc);
- for (j = 0; j < argc; j++) {
- robj *o = getDecodedObject(argv[j]);
- buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
- buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
- buf = sdscatlen(buf,"\r\n",2);
- decrRefCount(o);
- }
- return buf;
-}
-
-static sds catAppendOnlyExpireAtCommand(sds buf, robj *key, robj *seconds) {
- int argc = 3;
- long when;
- robj *argv[3];
-
- /* Make sure we can use strtol */
- seconds = getDecodedObject(seconds);
- when = time(NULL)+strtol(seconds->ptr,NULL,10);
- decrRefCount(seconds);
-
- argv[0] = createStringObject("EXPIREAT",8);
- argv[1] = key;
- argv[2] = createObject(REDIS_STRING,
- sdscatprintf(sdsempty(),"%ld",when));
- buf = catAppendOnlyGenericCommand(buf, argc, argv);
- decrRefCount(argv[0]);
- decrRefCount(argv[2]);
- return buf;
-}
-
-static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
- sds buf = sdsempty();
- robj *tmpargv[3];
-
- /* The DB this command was targetting is not the same as the last command
- * we appendend. To issue a SELECT command is needed. */
- if (dictid != server.appendseldb) {
- char seldb[64];
-
- snprintf(seldb,sizeof(seldb),"%d",dictid);
- buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
- (unsigned long)strlen(seldb),seldb);
- server.appendseldb = dictid;
- }
-
- if (cmd->proc == expireCommand) {
- /* Translate EXPIRE into EXPIREAT */
- buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
- } else if (cmd->proc == setexCommand) {
- /* Translate SETEX to SET and EXPIREAT */
- tmpargv[0] = createStringObject("SET",3);
- tmpargv[1] = argv[1];
- tmpargv[2] = argv[3];
- buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
- decrRefCount(tmpargv[0]);
- buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
- } else {
- buf = catAppendOnlyGenericCommand(buf,argc,argv);
- }
-
- /* Append to the AOF buffer. This will be flushed on disk just before
- * of re-entering the event loop, so before the client will get a
- * positive reply about the operation performed. */
- server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf));
-
- /* If a background append only file rewriting is in progress we want to
- * accumulate the differences between the child DB and the current one
- * in a buffer, so that when the child process will do its work we
- * can append the differences to the new append only file. */
- if (server.bgrewritechildpid != -1)
- server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
-
- sdsfree(buf);
-}
-
-/* In Redis commands are always executed in the context of a client, so in
- * order to load the append only file we need to create a fake client. */
-static struct redisClient *createFakeClient(void) {
- struct redisClient *c = zmalloc(sizeof(*c));
-
- selectDb(c,0);
- c->fd = -1;
- c->querybuf = sdsempty();
- c->argc = 0;
- c->argv = NULL;
- c->flags = 0;
- /* We set the fake client as a slave waiting for the synchronization
- * so that Redis will not try to send replies to this client. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
- c->reply = listCreate();
- listSetFreeMethod(c->reply,decrRefCount);
- listSetDupMethod(c->reply,dupClientReplyValue);
- initClientMultiState(c);
- return c;
-}
-
-static void freeFakeClient(struct redisClient *c) {
- sdsfree(c->querybuf);
- listRelease(c->reply);
- freeClientMultiState(c);
- zfree(c);
-}
-
-/* Replay the append log file. On error REDIS_OK is returned. On non fatal
- * error (the append only file is zero-length) REDIS_ERR is returned. On
- * fatal error an error message is logged and the program exists. */
-int loadAppendOnlyFile(char *filename) {
- struct redisClient *fakeClient;
- FILE *fp = fopen(filename,"r");
- struct redis_stat sb;
- int appendonly = server.appendonly;
-
- if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
- return REDIS_ERR;
-
- if (fp == NULL) {
- redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
- exit(1);
- }
-
- /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
- * to the same file we're about to read. */
- server.appendonly = 0;
-
- fakeClient = createFakeClient();
- while(1) {
- int argc, j;
- unsigned long len;
- robj **argv;
- char buf[128];
- sds argsds;
- struct redisCommand *cmd;
- int force_swapout;
-
- if (fgets(buf,sizeof(buf),fp) == NULL) {
- if (feof(fp))
- break;
- else
- goto readerr;
- }
- if (buf[0] != '*') goto fmterr;
- argc = atoi(buf+1);
- argv = zmalloc(sizeof(robj*)*argc);
- for (j = 0; j < argc; j++) {
- if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
- if (buf[0] != '$') goto fmterr;
- len = strtol(buf+1,NULL,10);
- argsds = sdsnewlen(NULL,len);
- if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
- argv[j] = createObject(REDIS_STRING,argsds);
- if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
- }
-
- /* Command lookup */
- cmd = lookupCommand(argv[0]->ptr);
- if (!cmd) {
- redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
- exit(1);
- }
- /* Try object encoding */
- if (cmd->flags & REDIS_CMD_BULK)
- argv[argc-1] = tryObjectEncoding(argv[argc-1]);
- /* Run the command in the context of a fake client */
- fakeClient->argc = argc;
- fakeClient->argv = argv;
- cmd->proc(fakeClient);
- /* Discard the reply objects list from the fake client */
- while(listLength(fakeClient->reply))
- listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
- /* Clean up, ready for the next command */
- for (j = 0; j < argc; j++) decrRefCount(argv[j]);
- zfree(argv);
- /* Handle swapping while loading big datasets when VM is on */
- force_swapout = 0;
- if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
- force_swapout = 1;
-
- if (server.vm_enabled && force_swapout) {
- while (zmalloc_used_memory() > server.vm_max_memory) {
- if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
- }
- }
- }
-
- /* This point can only be reached when EOF is reached without errors.
- * If the client is in the middle of a MULTI/EXEC, log error and quit. */
- if (fakeClient->flags & REDIS_MULTI) goto readerr;
-
- fclose(fp);
- freeFakeClient(fakeClient);
- server.appendonly = appendonly;
- return REDIS_OK;
-
-readerr:
- if (feof(fp)) {
- redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
- } else {
- redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
- }
- exit(1);
-fmterr:
- redisLog(REDIS_WARNING,"Bad file format reading the append only file");
- exit(1);
-}
-
-/* Write binary-safe string into a file in the bulkformat
- * $<count>\r\n<payload>\r\n */
-static int fwriteBulkString(FILE *fp, char *s, unsigned long len) {
- char cbuf[128];
- int clen;
- cbuf[0] = '$';
- clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len);
- cbuf[clen++] = '\r';
- cbuf[clen++] = '\n';
- if (fwrite(cbuf,clen,1,fp) == 0) return 0;
- if (len > 0 && fwrite(s,len,1,fp) == 0) return 0;
- if (fwrite("\r\n",2,1,fp) == 0) return 0;
- return 1;
-}
-
-/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
-static int fwriteBulkDouble(FILE *fp, double d) {
- char buf[128], dbuf[128];
-
- snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
- snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
- if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
- if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
- return 1;
-}
-
-/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
-static int fwriteBulkLongLong(FILE *fp, long long l) {
- char bbuf[128], lbuf[128];
- unsigned int blen, llen;
- llen = ll2string(lbuf,32,l);
- blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf);
- if (fwrite(bbuf,blen,1,fp) == 0) return 0;
- return 1;
-}
-
-/* Delegate writing an object to writing a bulk string or bulk long long. */
-static int fwriteBulkObject(FILE *fp, robj *obj) {
- /* Avoid using getDecodedObject to help copy-on-write (we are often
- * in a child process when this function is called). */
- if (obj->encoding == REDIS_ENCODING_INT) {
- return fwriteBulkLongLong(fp,(long)obj->ptr);
- } else if (obj->encoding == REDIS_ENCODING_RAW) {
- return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr));
- } else {
- redisPanic("Unknown string encoding");
- }
-}
-
-/* Write a sequence of commands able to fully rebuild the dataset into
- * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
-static int rewriteAppendOnlyFile(char *filename) {
- dictIterator *di = NULL;
- dictEntry *de;
- FILE *fp;
- char tmpfile[256];
- int j;
- time_t now = time(NULL);
-
- /* Note that we have to use a different temp name here compared to the
- * one used by rewriteAppendOnlyFileBackground() function. */
- snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
- fp = fopen(tmpfile,"w");
- if (!fp) {
- redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
- return REDIS_ERR;
- }
- for (j = 0; j < server.dbnum; j++) {
- char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
- redisDb *db = server.db+j;
- dict *d = db->dict;
- if (dictSize(d) == 0) continue;
- di = dictGetIterator(d);
- if (!di) {
- fclose(fp);
- return REDIS_ERR;
- }
-
- /* SELECT the new DB */
- if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkLongLong(fp,j) == 0) goto werr;
-
- /* Iterate this DB writing every entry */
- while((de = dictNext(di)) != NULL) {
- sds keystr = dictGetEntryKey(de);
- robj key, *o;
- time_t expiretime;
- int swapped;
-
- keystr = dictGetEntryKey(de);
- o = dictGetEntryVal(de);
- initStaticStringObject(key,keystr);
- /* If the value for this key is swapped, load a preview in memory.
- * We use a "swapped" flag to remember if we need to free the
- * value object instead to just increment the ref count anyway
- * in order to avoid copy-on-write of pages if we are forked() */
- if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
- o->storage == REDIS_VM_SWAPPING) {
- swapped = 0;
- } else {
- o = vmPreviewObject(o);
- swapped = 1;
- }
- expiretime = getExpire(db,&key);
-
- /* Save the key and associated value */
- if (o->type == REDIS_STRING) {
- /* Emit a SET command */
- char cmd[]="*3\r\n$3\r\nSET\r\n";
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- /* Key and value */
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkObject(fp,o) == 0) goto werr;
- } else if (o->type == REDIS_LIST) {
- /* Emit the RPUSHes needed to rebuild the list */
- char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = o->ptr;
- unsigned char *p = ziplistIndex(zl,0);
- unsigned char *vstr;
- unsigned int vlen;
- long long vlong;
-
- while(ziplistGet(p,&vstr,&vlen,&vlong)) {
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (vstr) {
- if (fwriteBulkString(fp,(char*)vstr,vlen) == 0)
- goto werr;
- } else {
- if (fwriteBulkLongLong(fp,vlong) == 0)
- goto werr;
- }
- p = ziplistNext(zl,p);
- }
- } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
- list *list = o->ptr;
- listNode *ln;
- listIter li;
-
- listRewind(list,&li);
- while((ln = listNext(&li))) {
- robj *eleobj = listNodeValue(ln);
-
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
- }
- } else {
- redisPanic("Unknown list encoding");
- }
- } else if (o->type == REDIS_SET) {
- /* Emit the SADDs needed to rebuild the set */
- dict *set = o->ptr;
- dictIterator *di = dictGetIterator(set);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- char cmd[]="*3\r\n$4\r\nSADD\r\n";
- robj *eleobj = dictGetEntryKey(de);
-
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_ZSET) {
- /* Emit the ZADDs needed to rebuild the sorted set */
- zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- char cmd[]="*4\r\n$4\r\nZADD\r\n";
- robj *eleobj = dictGetEntryKey(de);
- double *score = dictGetEntryVal(de);
-
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkDouble(fp,*score) == 0) goto werr;
- if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_HASH) {
- char cmd[]="*4\r\n$4\r\nHSET\r\n";
-
- /* Emit the HSETs needed to rebuild the hash */
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- unsigned char *p = zipmapRewind(o->ptr);
- unsigned char *field, *val;
- unsigned int flen, vlen;
-
- while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) {
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkString(fp,(char*)field,flen) == -1)
- return -1;
- if (fwriteBulkString(fp,(char*)val,vlen) == -1)
- return -1;
- }
- } else {
- dictIterator *di = dictGetIterator(o->ptr);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- robj *field = dictGetEntryKey(de);
- robj *val = dictGetEntryVal(de);
-
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkObject(fp,field) == -1) return -1;
- if (fwriteBulkObject(fp,val) == -1) return -1;
- }
- dictReleaseIterator(di);
- }
- } else {
- redisPanic("Unknown object type");
- }
- /* Save the expire time */
- if (expiretime != -1) {
- char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
- /* If this key is already expired skip it */
- if (expiretime < now) continue;
- if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
- if (fwriteBulkObject(fp,&key) == 0) goto werr;
- if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr;
- }
- if (swapped) decrRefCount(o);
- }
- dictReleaseIterator(di);
- }
-
- /* Make sure data will not remain on the OS's output buffers */
- fflush(fp);
- aof_fsync(fileno(fp));
- fclose(fp);
-
- /* Use RENAME to make sure the DB file is changed atomically only
- * if the generate DB file is ok. */
- if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
- unlink(tmpfile);
- return REDIS_ERR;
- }
- redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
- return REDIS_OK;
-
-werr:
- fclose(fp);
- unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
- if (di) dictReleaseIterator(di);
- return REDIS_ERR;
-}
-
-/* This is how rewriting of the append only file in background works:
- *
- * 1) The user calls BGREWRITEAOF
- * 2) Redis calls this function, that forks():
- * 2a) the child rewrite the append only file in a temp file.
- * 2b) the parent accumulates differences in server.bgrewritebuf.
- * 3) When the child finished '2a' exists.
- * 4) The parent will trap the exit code, if it's OK, will append the
- * data accumulated into server.bgrewritebuf into the temp file, and
- * finally will rename(2) the temp file in the actual file name.
- * The the new file is reopened as the new append only file. Profit!
- */
-static int rewriteAppendOnlyFileBackground(void) {
- pid_t childpid;
-
- if (server.bgrewritechildpid != -1) return REDIS_ERR;
- if (server.vm_enabled) waitEmptyIOJobsQueue();
- if ((childpid = fork()) == 0) {
- /* Child */
- char tmpfile[256];
-
- if (server.vm_enabled) vmReopenSwapFile();
- close(server.fd);
- snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
- if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
- _exit(0);
- } else {
- _exit(1);
- }
- } else {
- /* Parent */
- if (childpid == -1) {
- redisLog(REDIS_WARNING,
- "Can't rewrite append only file in background: fork: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- redisLog(REDIS_NOTICE,
- "Background append only file rewriting started by pid %d",childpid);
- server.bgrewritechildpid = childpid;
- updateDictResizePolicy();
- /* We set appendseldb to -1 in order to force the next call to the
- * feedAppendOnlyFile() to issue a SELECT command, so the differences
- * accumulated by the parent into server.bgrewritebuf will start
- * with a SELECT statement and it will be safe to merge. */
- server.appendseldb = -1;
- return REDIS_OK;
- }
- return REDIS_OK; /* unreached */
-}
-
-static void bgrewriteaofCommand(redisClient *c) {
- if (server.bgrewritechildpid != -1) {
- addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
- return;
- }
- if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
- char *status = "+Background append only file rewriting started\r\n";
- addReplySds(c,sdsnew(status));
- } else {
- addReply(c,shared.err);
- }
-}
-
-static void aofRemoveTempFile(pid_t childpid) {
- char tmpfile[256];
-
- snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
- unlink(tmpfile);
-}
-
-/* Virtual Memory is composed mainly of two subsystems:
- * - Blocking Virutal Memory
- * - Threaded Virtual Memory I/O
- * The two parts are not fully decoupled, but functions are split among two
- * different sections of the source code (delimited by comments) in order to
- * make more clear what functionality is about the blocking VM and what about
- * the threaded (not blocking) VM.
- *
- * Redis VM design:
- *
- * Redis VM is a blocking VM (one that blocks reading swapped values from
- * disk into memory when a value swapped out is needed in memory) that is made
- * unblocking by trying to examine the command argument vector in order to
- * load in background values that will likely be needed in order to exec
- * the command. The command is executed only once all the relevant keys
- * are loaded into memory.
- *
- * This basically is almost as simple of a blocking VM, but almost as parallel
- * as a fully non-blocking VM.
- */
-
-/* =================== Virtual Memory - Blocking Side ====================== */
-
-/* Create a VM pointer object. This kind of objects are used in place of
- * values in the key -> value hash table, for swapped out objects. */
-static vmpointer *createVmPointer(int vtype) {
- vmpointer *vp = zmalloc(sizeof(vmpointer));
-
- vp->type = REDIS_VMPOINTER;
- vp->storage = REDIS_VM_SWAPPED;
- vp->vtype = vtype;
- return vp;
-}
-
-static void vmInit(void) {
- off_t totsize;
- int pipefds[2];
- size_t stacksize;
- struct flock fl;
-
- if (server.vm_max_threads != 0)
- zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
-
- redisLog(REDIS_NOTICE,"Using '%s' as swap file",server.vm_swap_file);
- /* Try to open the old swap file, otherwise create it */
- if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) {
- server.vm_fp = fopen(server.vm_swap_file,"w+b");
- }
- if (server.vm_fp == NULL) {
- redisLog(REDIS_WARNING,
- "Can't open the swap file: %s. Exiting.",
- strerror(errno));
- exit(1);
- }
- server.vm_fd = fileno(server.vm_fp);
- /* Lock the swap file for writing, this is useful in order to avoid
- * another instance to use the same swap file for a config error. */
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = fl.l_len = 0;
- if (fcntl(server.vm_fd,F_SETLK,&fl) == -1) {
- redisLog(REDIS_WARNING,
- "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server.vm_swap_file, strerror(errno));
- exit(1);
- }
- /* Initialize */
- server.vm_next_page = 0;
- server.vm_near_pages = 0;
- server.vm_stats_used_pages = 0;
- server.vm_stats_swapped_objects = 0;
- server.vm_stats_swapouts = 0;
- server.vm_stats_swapins = 0;
- totsize = server.vm_pages*server.vm_page_size;
- redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
- if (ftruncate(server.vm_fd,totsize) == -1) {
- redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
- strerror(errno));
- exit(1);
- } else {
- redisLog(REDIS_NOTICE,"Swap file allocated with success");
- }
- server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
- redisLog(REDIS_VERBOSE,"Allocated %lld bytes page table for %lld pages",
- (long long) (server.vm_pages+7)/8, server.vm_pages);
- memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
-
- /* Initialize threaded I/O (used by Virtual Memory) */
- server.io_newjobs = listCreate();
- server.io_processing = listCreate();
- server.io_processed = listCreate();
- server.io_ready_clients = listCreate();
- pthread_mutex_init(&server.io_mutex,NULL);
- pthread_mutex_init(&server.obj_freelist_mutex,NULL);
- pthread_mutex_init(&server.io_swapfile_mutex,NULL);
- server.io_active_threads = 0;
- if (pipe(pipefds) == -1) {
- redisLog(REDIS_WARNING,"Unable to intialized VM: pipe(2): %s. Exiting."
- ,strerror(errno));
- exit(1);
- }
- server.io_ready_pipe_read = pipefds[0];
- server.io_ready_pipe_write = pipefds[1];
- redisAssert(anetNonBlock(NULL,server.io_ready_pipe_read) != ANET_ERR);
- /* LZF requires a lot of stack */
- pthread_attr_init(&server.io_threads_attr);
- pthread_attr_getstacksize(&server.io_threads_attr, &stacksize);
- while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
- pthread_attr_setstacksize(&server.io_threads_attr, stacksize);
- /* Listen for events in the threaded I/O pipe */
- if (aeCreateFileEvent(server.el, server.io_ready_pipe_read, AE_READABLE,
- vmThreadedIOCompletedJob, NULL) == AE_ERR)
- oom("creating file event");
-}
-
-/* Mark the page as used */
-static void vmMarkPageUsed(off_t page) {
- off_t byte = page/8;
- int bit = page&7;
- redisAssert(vmFreePage(page) == 1);
- server.vm_bitmap[byte] |= 1<<bit;
-}
-
-/* Mark N contiguous pages as used, with 'page' being the first. */
-static void vmMarkPagesUsed(off_t page, off_t count) {
- off_t j;
-
- for (j = 0; j < count; j++)
- vmMarkPageUsed(page+j);
- server.vm_stats_used_pages += count;
- redisLog(REDIS_DEBUG,"Mark USED pages: %lld pages at %lld\n",
- (long long)count, (long long)page);
-}
-
-/* Mark the page as free */
-static void vmMarkPageFree(off_t page) {
- off_t byte = page/8;
- int bit = page&7;
- redisAssert(vmFreePage(page) == 0);
- server.vm_bitmap[byte] &= ~(1<<bit);
-}
-
-/* Mark N contiguous pages as free, with 'page' being the first. */
-static void vmMarkPagesFree(off_t page, off_t count) {
- off_t j;
-
- for (j = 0; j < count; j++)
- vmMarkPageFree(page+j);
- server.vm_stats_used_pages -= count;
- redisLog(REDIS_DEBUG,"Mark FREE pages: %lld pages at %lld\n",
- (long long)count, (long long)page);
-}
-
-/* Test if the page is free */
-static int vmFreePage(off_t page) {
- off_t byte = page/8;
- int bit = page&7;
- return (server.vm_bitmap[byte] & (1<<bit)) == 0;
-}
-
-/* Find N contiguous free pages storing the first page of the cluster in *first.
- * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
- * REDIS_ERR is returned.
- *
- * This function uses a simple algorithm: we try to allocate
- * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
- * again from the start of the swap file searching for free spaces.
- *
- * If it looks pretty clear that there are no free pages near our offset
- * we try to find less populated places doing a forward jump of
- * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
- * without hurry, and then we jump again and so forth...
- *
- * This function can be improved using a free list to avoid to guess
- * too much, since we could collect data about freed pages.
- *
- * note: I implemented this function just after watching an episode of
- * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
- */
-static int vmFindContiguousPages(off_t *first, off_t n) {
- off_t base, offset = 0, since_jump = 0, numfree = 0;
-
- if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
- server.vm_near_pages = 0;
- server.vm_next_page = 0;
- }
- server.vm_near_pages++; /* Yet another try for pages near to the old ones */
- base = server.vm_next_page;
-
- while(offset < server.vm_pages) {
- off_t this = base+offset;
-
- /* If we overflow, restart from page zero */
- if (this >= server.vm_pages) {
- this -= server.vm_pages;
- if (this == 0) {
- /* Just overflowed, what we found on tail is no longer
- * interesting, as it's no longer contiguous. */
- numfree = 0;
- }
- }
- if (vmFreePage(this)) {
- /* This is a free page */
- numfree++;
- /* Already got N free pages? Return to the caller, with success */
- if (numfree == n) {
- *first = this-(n-1);
- server.vm_next_page = this+1;
- redisLog(REDIS_DEBUG, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n, (long long) *first);
- return REDIS_OK;
- }
- } else {
- /* The current one is not a free page */
- numfree = 0;
- }
-
- /* Fast-forward if the current page is not free and we already
- * searched enough near this place. */
- since_jump++;
- if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
- offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
- since_jump = 0;
- /* Note that even if we rewind after the jump, we are don't need
- * to make sure numfree is set to zero as we only jump *if* it
- * is set to zero. */
- } else {
- /* Otherwise just check the next page */
- offset++;
- }
- }
- return REDIS_ERR;
-}
-
-/* Write the specified object at the specified page of the swap file */
-static int vmWriteObjectOnSwap(robj *o, off_t page) {
- if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
- if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
- if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
- redisLog(REDIS_WARNING,
- "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
- strerror(errno));
- return REDIS_ERR;
- }
- rdbSaveObject(server.vm_fp,o);
- fflush(server.vm_fp);
- if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
- return REDIS_OK;
-}
-
-/* Transfers the 'val' object to disk. Store all the information
- * a 'vmpointer' object containing all the information needed to load the
- * object back later is returned.
- *
- * If we can't find enough contiguous empty pages to swap the object on disk
- * NULL is returned. */
-static vmpointer *vmSwapObjectBlocking(robj *val) {
- off_t pages = rdbSavedObjectPages(val,NULL);
- off_t page;
- vmpointer *vp;
-
- assert(val->storage == REDIS_VM_MEMORY);
- assert(val->refcount == 1);
- if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return NULL;
- if (vmWriteObjectOnSwap(val,page) == REDIS_ERR) return NULL;
-
- vp = createVmPointer(val->type);
- vp->page = page;
- vp->usedpages = pages;
- decrRefCount(val); /* Deallocate the object from memory. */
- vmMarkPagesUsed(page,pages);
- redisLog(REDIS_DEBUG,"VM: object %p swapped out at %lld (%lld pages)",
- (void*) val,
- (unsigned long long) page, (unsigned long long) pages);
- server.vm_stats_swapped_objects++;
- server.vm_stats_swapouts++;
- return vp;
-}
-
-static robj *vmReadObjectFromSwap(off_t page, int type) {
- robj *o;
-
- if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
- if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
- redisLog(REDIS_WARNING,
- "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
- strerror(errno));
- _exit(1);
- }
- o = rdbLoadObject(type,server.vm_fp);
- if (o == NULL) {
- redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno));
- _exit(1);
- }
- if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
- return o;
-}
-
-/* Load the specified object from swap to memory.
- * The newly allocated object is returned.
- *
- * If preview is true the unserialized object is returned to the caller but
- * the pages are not marked as freed, nor the vp object is freed. */
-static robj *vmGenericLoadObject(vmpointer *vp, int preview) {
- robj *val;
-
- redisAssert(vp->type == REDIS_VMPOINTER &&
- (vp->storage == REDIS_VM_SWAPPED || vp->storage == REDIS_VM_LOADING));
- val = vmReadObjectFromSwap(vp->page,vp->vtype);
- if (!preview) {
- redisLog(REDIS_DEBUG, "VM: object %p loaded from disk", (void*)vp);
- vmMarkPagesFree(vp->page,vp->usedpages);
- zfree(vp);
- server.vm_stats_swapped_objects--;
- } else {
- redisLog(REDIS_DEBUG, "VM: object %p previewed from disk", (void*)vp);
- }
- server.vm_stats_swapins++;
- return val;
-}
-
-/* Plain object loading, from swap to memory.
- *
- * 'o' is actually a redisVmPointer structure that will be freed by the call.
- * The return value is the loaded object. */
-static robj *vmLoadObject(robj *o) {
- /* If we are loading the object in background, stop it, we
- * need to load this object synchronously ASAP. */
- if (o->storage == REDIS_VM_LOADING)
- vmCancelThreadedIOJob(o);
- return vmGenericLoadObject((vmpointer*)o,0);
-}
-
-/* Just load the value on disk, without to modify the key.
- * This is useful when we want to perform some operation on the value
- * without to really bring it from swap to memory, like while saving the
- * dataset or rewriting the append only log. */
-static robj *vmPreviewObject(robj *o) {
- return vmGenericLoadObject((vmpointer*)o,1);
-}
-
-/* How a good candidate is this object for swapping?
- * The better candidate it is, the greater the returned value.
- *
- * Currently we try to perform a fast estimation of the object size in
- * memory, and combine it with aging informations.
- *
- * Basically swappability = idle-time * log(estimated size)
- *
- * Bigger objects are preferred over smaller objects, but not
- * proportionally, this is why we use the logarithm. This algorithm is
- * just a first try and will probably be tuned later. */
-static double computeObjectSwappability(robj *o) {
- /* actual age can be >= minage, but not < minage. As we use wrapping
- * 21 bit clocks with minutes resolution for the LRU. */
- time_t minage = abs(server.lruclock - o->lru);
- long asize = 0, elesize;
- robj *ele;
- list *l;
- listNode *ln;
- dict *d;
- struct dictEntry *de;
- int z;
-
- if (minage <= 0) return 0;
- switch(o->type) {
- case REDIS_STRING:
- if (o->encoding != REDIS_ENCODING_RAW) {
- asize = sizeof(*o);
- } else {
- asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
- }
- break;
- case REDIS_LIST:
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
- asize = sizeof(*o)+ziplistSize(o->ptr);
- } else {
- l = o->ptr;
- ln = listFirst(l);
- asize = sizeof(list);
- if (ln) {
- ele = ln->value;
- elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
- (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
- asize += (sizeof(listNode)+elesize)*listLength(l);
- }
- }
- break;
- case REDIS_SET:
- case REDIS_ZSET:
- z = (o->type == REDIS_ZSET);
- d = z ? ((zset*)o->ptr)->dict : o->ptr;
-
- asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
- if (z) asize += sizeof(zset)-sizeof(dict);
- if (dictSize(d)) {
- de = dictGetRandomKey(d);
- ele = dictGetEntryKey(de);
- elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
- (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
- asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
- if (z) asize += sizeof(zskiplistNode)*dictSize(d);
- }
- break;
- case REDIS_HASH:
- if (o->encoding == REDIS_ENCODING_ZIPMAP) {
- unsigned char *p = zipmapRewind((unsigned char*)o->ptr);
- unsigned int len = zipmapLen((unsigned char*)o->ptr);
- unsigned int klen, vlen;
- unsigned char *key, *val;
-
- if ((p = zipmapNext(p,&key,&klen,&val,&vlen)) == NULL) {
- klen = 0;
- vlen = 0;
- }
- asize = len*(klen+vlen+3);
- } else if (o->encoding == REDIS_ENCODING_HT) {
- d = o->ptr;
- asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
- if (dictSize(d)) {
- de = dictGetRandomKey(d);
- ele = dictGetEntryKey(de);
- elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
- (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
- ele = dictGetEntryVal(de);
- elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
- (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
- asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
- }
- }
- break;
- }
- return (double)minage*log(1+asize);
-}
-
-/* Try to swap an object that's a good candidate for swapping.
- * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
- * to swap any object at all.
- *
- * If 'usethreaded' is true, Redis will try to swap the object in background
- * using I/O threads. */
-static int vmSwapOneObject(int usethreads) {
- int j, i;
- struct dictEntry *best = NULL;
- double best_swappability = 0;
- redisDb *best_db = NULL;
- robj *val;
- sds key;
-
- for (j = 0; j < server.dbnum; j++) {
- redisDb *db = server.db+j;
- /* Why maxtries is set to 100?
- * Because this way (usually) we'll find 1 object even if just 1% - 2%
- * are swappable objects */
- int maxtries = 100;
-
- if (dictSize(db->dict) == 0) continue;
- for (i = 0; i < 5; i++) {
- dictEntry *de;
- double swappability;
-
- if (maxtries) maxtries--;
- de = dictGetRandomKey(db->dict);
- val = dictGetEntryVal(de);
- /* Only swap objects that are currently in memory.
- *
- * Also don't swap shared objects: not a good idea in general and
- * we need to ensure that the main thread does not touch the
- * object while the I/O thread is using it, but we can't
- * control other keys without adding additional mutex. */
- if (val->storage != REDIS_VM_MEMORY || val->refcount != 1) {
- if (maxtries) i--; /* don't count this try */
- continue;
- }
- swappability = computeObjectSwappability(val);
- if (!best || swappability > best_swappability) {
- best = de;
- best_swappability = swappability;
- best_db = db;
- }
- }
- }
- if (best == NULL) return REDIS_ERR;
- key = dictGetEntryKey(best);
- val = dictGetEntryVal(best);
-
- redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f",
- key, best_swappability);
-
- /* Swap it */
- if (usethreads) {
- robj *keyobj = createStringObject(key,sdslen(key));
- vmSwapObjectThreaded(keyobj,val,best_db);
- decrRefCount(keyobj);
- return REDIS_OK;
- } else {
- vmpointer *vp;
-
- if ((vp = vmSwapObjectBlocking(val)) != NULL) {
- dictGetEntryVal(best) = vp;
- return REDIS_OK;
- } else {
- return REDIS_ERR;
- }
- }
-}
-
-static int vmSwapOneObjectBlocking() {
- return vmSwapOneObject(0);
-}
-
-static int vmSwapOneObjectThreaded() {
- return vmSwapOneObject(1);
-}
-
-/* Return true if it's safe to swap out objects in a given moment.
- * Basically we don't want to swap objects out while there is a BGSAVE
- * or a BGAEOREWRITE running in backgroud. */
-static int vmCanSwapOut(void) {
- return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1);
-}
-
-/* =================== Virtual Memory - Threaded I/O ======================= */
-
-static void freeIOJob(iojob *j) {
- if ((j->type == REDIS_IOJOB_PREPARE_SWAP ||
- j->type == REDIS_IOJOB_DO_SWAP ||
- j->type == REDIS_IOJOB_LOAD) && j->val != NULL)
- {
- /* we fix the storage type, otherwise decrRefCount() will try to
- * kill the I/O thread Job (that does no longer exists). */
- if (j->val->storage == REDIS_VM_SWAPPING)
- j->val->storage = REDIS_VM_MEMORY;
- decrRefCount(j->val);
- }
- decrRefCount(j->key);
- zfree(j);
-}
-
-/* Every time a thread finished a Job, it writes a byte into the write side
- * of an unix pipe in order to "awake" the main thread, and this function
- * is called. */
-static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
- int mask)
-{
- char buf[1];
- int retval, processed = 0, toprocess = -1, trytoswap = 1;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
-
- /* For every byte we read in the read side of the pipe, there is one
- * I/O job completed to process. */
- while((retval = read(fd,buf,1)) == 1) {
- iojob *j;
- listNode *ln;
- struct dictEntry *de;
-
- redisLog(REDIS_DEBUG,"Processing I/O completed job");
-
- /* Get the processed element (the oldest one) */
- lockThreadedIO();
- assert(listLength(server.io_processed) != 0);
- if (toprocess == -1) {
- toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100;
- if (toprocess <= 0) toprocess = 1;
- }
- ln = listFirst(server.io_processed);
- j = ln->value;
- listDelNode(server.io_processed,ln);
- unlockThreadedIO();
- /* If this job is marked as canceled, just ignore it */
- if (j->canceled) {
- freeIOJob(j);
- continue;
- }
- /* Post process it in the main thread, as there are things we
- * can do just here to avoid race conditions and/or invasive locks */
- redisLog(REDIS_DEBUG,"COMPLETED Job type: %d, ID %p, key: %s", j->type, (void*)j->id, (unsigned char*)j->key->ptr);
- de = dictFind(j->db->dict,j->key->ptr);
- redisAssert(de != NULL);
- if (j->type == REDIS_IOJOB_LOAD) {
- redisDb *db;
- vmpointer *vp = dictGetEntryVal(de);
-
- /* Key loaded, bring it at home */
- vmMarkPagesFree(vp->page,vp->usedpages);
- redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)",
- (unsigned char*) j->key->ptr);
- server.vm_stats_swapped_objects--;
- server.vm_stats_swapins++;
- dictGetEntryVal(de) = j->val;
- incrRefCount(j->val);
- db = j->db;
- /* Handle clients waiting for this key to be loaded. */
- handleClientsBlockedOnSwappedKey(db,j->key);
- freeIOJob(j);
- zfree(vp);
- } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
- /* Now we know the amount of pages required to swap this object.
- * Let's find some space for it, and queue this task again
- * rebranded as REDIS_IOJOB_DO_SWAP. */
- if (!vmCanSwapOut() ||
- vmFindContiguousPages(&j->page,j->pages) == REDIS_ERR)
- {
- /* Ooops... no space or we can't swap as there is
- * a fork()ed Redis trying to save stuff on disk. */
- j->val->storage = REDIS_VM_MEMORY; /* undo operation */
- freeIOJob(j);
- } else {
- /* Note that we need to mark this pages as used now,
- * if the job will be canceled, we'll mark them as freed
- * again. */
- vmMarkPagesUsed(j->page,j->pages);
- j->type = REDIS_IOJOB_DO_SWAP;
- lockThreadedIO();
- queueIOJob(j);
- unlockThreadedIO();
- }
- } else if (j->type == REDIS_IOJOB_DO_SWAP) {
- vmpointer *vp;
-
- /* Key swapped. We can finally free some memory. */
- if (j->val->storage != REDIS_VM_SWAPPING) {
- vmpointer *vp = (vmpointer*) j->id;
- printf("storage: %d\n",vp->storage);
- printf("key->name: %s\n",(char*)j->key->ptr);
- printf("val: %p\n",(void*)j->val);
- printf("val->type: %d\n",j->val->type);
- printf("val->ptr: %s\n",(char*)j->val->ptr);
- }
- redisAssert(j->val->storage == REDIS_VM_SWAPPING);
- vp = createVmPointer(j->val->type);
- vp->page = j->page;
- vp->usedpages = j->pages;
- dictGetEntryVal(de) = vp;
- /* Fix the storage otherwise decrRefCount will attempt to
- * remove the associated I/O job */
- j->val->storage = REDIS_VM_MEMORY;
- decrRefCount(j->val);
- redisLog(REDIS_DEBUG,
- "VM: object %s swapped out at %lld (%lld pages) (threaded)",
- (unsigned char*) j->key->ptr,
- (unsigned long long) j->page, (unsigned long long) j->pages);
- server.vm_stats_swapped_objects++;
- server.vm_stats_swapouts++;
- freeIOJob(j);
- /* Put a few more swap requests in queue if we are still
- * out of memory */
- if (trytoswap && vmCanSwapOut() &&
- zmalloc_used_memory() > server.vm_max_memory)
- {
- int more = 1;
- while(more) {
- lockThreadedIO();
- more = listLength(server.io_newjobs) <
- (unsigned) server.vm_max_threads;
- unlockThreadedIO();
- /* Don't waste CPU time if swappable objects are rare. */
- if (vmSwapOneObjectThreaded() == REDIS_ERR) {
- trytoswap = 0;
- break;
- }
- }
- }
- }
- processed++;
- if (processed == toprocess) return;
- }
- if (retval < 0 && errno != EAGAIN) {
- redisLog(REDIS_WARNING,
- "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
- strerror(errno));
- }
-}
-
-static void lockThreadedIO(void) {
- pthread_mutex_lock(&server.io_mutex);
-}
-
-static void unlockThreadedIO(void) {
- pthread_mutex_unlock(&server.io_mutex);
-}
-
-/* Remove the specified object from the threaded I/O queue if still not
- * processed, otherwise make sure to flag it as canceled. */
-static void vmCancelThreadedIOJob(robj *o) {
- list *lists[3] = {
- server.io_newjobs, /* 0 */
- server.io_processing, /* 1 */
- server.io_processed /* 2 */
- };
- int i;
-
- assert(o->storage == REDIS_VM_LOADING || o->storage == REDIS_VM_SWAPPING);
-again:
- lockThreadedIO();
- /* Search for a matching object in one of the queues */
- for (i = 0; i < 3; i++) {
- listNode *ln;
- listIter li;
-
- listRewind(lists[i],&li);
- while ((ln = listNext(&li)) != NULL) {
- iojob *job = ln->value;
-
- if (job->canceled) continue; /* Skip this, already canceled. */
- if (job->id == o) {
- redisLog(REDIS_DEBUG,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
- (void*)job, (char*)job->key->ptr, job->type, i);
- /* Mark the pages as free since the swap didn't happened
- * or happened but is now discarded. */
- if (i != 1 && job->type == REDIS_IOJOB_DO_SWAP)
- vmMarkPagesFree(job->page,job->pages);
- /* Cancel the job. It depends on the list the job is
- * living in. */
- switch(i) {
- case 0: /* io_newjobs */
- /* If the job was yet not processed the best thing to do
- * is to remove it from the queue at all */
- freeIOJob(job);
- listDelNode(lists[i],ln);
- break;
- case 1: /* io_processing */
- /* Oh Shi- the thread is messing with the Job:
- *
- * Probably it's accessing the object if this is a
- * PREPARE_SWAP or DO_SWAP job.
- * If it's a LOAD job it may be reading from disk and
- * if we don't wait for the job to terminate before to
- * cancel it, maybe in a few microseconds data can be
- * corrupted in this pages. So the short story is:
- *
- * Better to wait for the job to move into the
- * next queue (processed)... */
-
- /* We try again and again until the job is completed. */
- unlockThreadedIO();
- /* But let's wait some time for the I/O thread
- * to finish with this job. After all this condition
- * should be very rare. */
- usleep(1);
- goto again;
- case 2: /* io_processed */
- /* The job was already processed, that's easy...
- * just mark it as canceled so that we'll ignore it
- * when processing completed jobs. */
- job->canceled = 1;
- break;
- }
- /* Finally we have to adjust the storage type of the object
- * in order to "UNDO" the operaiton. */
- if (o->storage == REDIS_VM_LOADING)
- o->storage = REDIS_VM_SWAPPED;
- else if (o->storage == REDIS_VM_SWAPPING)
- o->storage = REDIS_VM_MEMORY;
- unlockThreadedIO();
- redisLog(REDIS_DEBUG,"*** DONE");
- return;
- }
- }
- }
- unlockThreadedIO();
- printf("Not found: %p\n", (void*)o);
- redisAssert(1 != 1); /* We should never reach this */
-}
-
-static void *IOThreadEntryPoint(void *arg) {
- iojob *j;
- listNode *ln;
- REDIS_NOTUSED(arg);
-
- pthread_detach(pthread_self());
- while(1) {
- /* Get a new job to process */
- lockThreadedIO();
- if (listLength(server.io_newjobs) == 0) {
- /* No new jobs in queue, exit. */
- redisLog(REDIS_DEBUG,"Thread %ld exiting, nothing to do",
- (long) pthread_self());
- server.io_active_threads--;
- unlockThreadedIO();
- return NULL;
- }
- ln = listFirst(server.io_newjobs);
- j = ln->value;
- listDelNode(server.io_newjobs,ln);
- /* Add the job in the processing queue */
- j->thread = pthread_self();
- listAddNodeTail(server.io_processing,j);
- ln = listLast(server.io_processing); /* We use ln later to remove it */
- unlockThreadedIO();
- redisLog(REDIS_DEBUG,"Thread %ld got a new job (type %d): %p about key '%s'",
- (long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr);
-
- /* Process the Job */
- if (j->type == REDIS_IOJOB_LOAD) {
- vmpointer *vp = (vmpointer*)j->id;
- j->val = vmReadObjectFromSwap(j->page,vp->vtype);
- } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
- FILE *fp = fopen("/dev/null","w+");
- j->pages = rdbSavedObjectPages(j->val,fp);
- fclose(fp);
- } else if (j->type == REDIS_IOJOB_DO_SWAP) {
- if (vmWriteObjectOnSwap(j->val,j->page) == REDIS_ERR)
- j->canceled = 1;
- }
-
- /* Done: insert the job into the processed queue */
- redisLog(REDIS_DEBUG,"Thread %ld completed the job: %p (key %s)",
- (long) pthread_self(), (void*)j, (char*)j->key->ptr);
- lockThreadedIO();
- listDelNode(server.io_processing,ln);
- listAddNodeTail(server.io_processed,j);
- unlockThreadedIO();
-
- /* Signal the main thread there is new stuff to process */
- assert(write(server.io_ready_pipe_write,"x",1) == 1);
- }
- return NULL; /* never reached */
-}
-
-static void spawnIOThread(void) {
- pthread_t thread;
- sigset_t mask, omask;
- int err;
-
- sigemptyset(&mask);
- sigaddset(&mask,SIGCHLD);
- sigaddset(&mask,SIGHUP);
- sigaddset(&mask,SIGPIPE);
- pthread_sigmask(SIG_SETMASK, &mask, &omask);
- while ((err = pthread_create(&thread,&server.io_threads_attr,IOThreadEntryPoint,NULL)) != 0) {
- redisLog(REDIS_WARNING,"Unable to spawn an I/O thread: %s",
- strerror(err));
- usleep(1000000);
- }
- pthread_sigmask(SIG_SETMASK, &omask, NULL);
- server.io_active_threads++;
-}
-
-/* We need to wait for the last thread to exit before we are able to
- * fork() in order to BGSAVE or BGREWRITEAOF. */
-static void waitEmptyIOJobsQueue(void) {
- while(1) {
- int io_processed_len;
-
- lockThreadedIO();
- if (listLength(server.io_newjobs) == 0 &&
- listLength(server.io_processing) == 0 &&
- server.io_active_threads == 0)
- {
- unlockThreadedIO();
- return;
- }
- /* While waiting for empty jobs queue condition we post-process some
- * finshed job, as I/O threads may be hanging trying to write against
- * the io_ready_pipe_write FD but there are so much pending jobs that
- * it's blocking. */
- io_processed_len = listLength(server.io_processed);
- unlockThreadedIO();
- if (io_processed_len) {
- vmThreadedIOCompletedJob(NULL,server.io_ready_pipe_read,NULL,0);
- usleep(1000); /* 1 millisecond */
- } else {
- usleep(10000); /* 10 milliseconds */
- }
- }
-}
-
-static void vmReopenSwapFile(void) {
- /* Note: we don't close the old one as we are in the child process
- * and don't want to mess at all with the original file object. */
- server.vm_fp = fopen(server.vm_swap_file,"r+b");
- if (server.vm_fp == NULL) {
- redisLog(REDIS_WARNING,"Can't re-open the VM swap file: %s. Exiting.",
- server.vm_swap_file);
- _exit(1);
- }
- server.vm_fd = fileno(server.vm_fp);
-}
-
-/* This function must be called while with threaded IO locked */
-static void queueIOJob(iojob *j) {
- redisLog(REDIS_DEBUG,"Queued IO Job %p type %d about key '%s'\n",
- (void*)j, j->type, (char*)j->key->ptr);
- listAddNodeTail(server.io_newjobs,j);
- if (server.io_active_threads < server.vm_max_threads)
- spawnIOThread();
-}
-
-static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db) {
- iojob *j;
-
- j = zmalloc(sizeof(*j));
- j->type = REDIS_IOJOB_PREPARE_SWAP;
- j->db = db;
- j->key = key;
- incrRefCount(key);
- j->id = j->val = val;
- incrRefCount(val);
- j->canceled = 0;
- j->thread = (pthread_t) -1;
- val->storage = REDIS_VM_SWAPPING;
-
- lockThreadedIO();
- queueIOJob(j);
- unlockThreadedIO();
- return REDIS_OK;
-}
-
-/* ============ Virtual Memory - Blocking clients on missing keys =========== */
-
-/* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
- * If there is not already a job loading the key, it is craeted.
- * The key is added to the io_keys list in the client structure, and also
- * in the hash table mapping swapped keys to waiting clients, that is,
- * server.io_waited_keys. */
-static int waitForSwappedKey(redisClient *c, robj *key) {
- struct dictEntry *de;
- robj *o;
- list *l;
-
- /* If the key does not exist or is already in RAM we don't need to
- * block the client at all. */
- de = dictFind(c->db->dict,key->ptr);
- if (de == NULL) return 0;
- o = dictGetEntryVal(de);
- if (o->storage == REDIS_VM_MEMORY) {
- return 0;
- } else if (o->storage == REDIS_VM_SWAPPING) {
- /* We were swapping the key, undo it! */
- vmCancelThreadedIOJob(o);
- return 0;
- }
-
- /* OK: the key is either swapped, or being loaded just now. */
-
- /* Add the key to the list of keys this client is waiting for.
- * This maps clients to keys they are waiting for. */
- listAddNodeTail(c->io_keys,key);
- incrRefCount(key);
-
- /* Add the client to the swapped keys => clients waiting map. */
- de = dictFind(c->db->io_keys,key);
- if (de == NULL) {
- int retval;
-
- /* For every key we take a list of clients blocked for it */
- l = listCreate();
- retval = dictAdd(c->db->io_keys,key,l);
- incrRefCount(key);
- assert(retval == DICT_OK);
- } else {
- l = dictGetEntryVal(de);
- }
- listAddNodeTail(l,c);
-
- /* Are we already loading the key from disk? If not create a job */
- if (o->storage == REDIS_VM_SWAPPED) {
- iojob *j;
- vmpointer *vp = (vmpointer*)o;
-
- o->storage = REDIS_VM_LOADING;
- j = zmalloc(sizeof(*j));
- j->type = REDIS_IOJOB_LOAD;
- j->db = c->db;
- j->id = (robj*)vp;
- j->key = key;
- incrRefCount(key);
- j->page = vp->page;
- j->val = NULL;
- j->canceled = 0;
- j->thread = (pthread_t) -1;
- lockThreadedIO();
- queueIOJob(j);
- unlockThreadedIO();
- }
- return 1;
-}
-
-/* Preload keys for any command with first, last and step values for
- * the command keys prototype, as defined in the command table. */
-static void waitForMultipleSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
- int j, last;
- if (cmd->vm_firstkey == 0) return;
- last = cmd->vm_lastkey;
- if (last < 0) last = argc+last;
- for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) {
- redisAssert(j < argc);
- waitForSwappedKey(c,argv[j]);
- }
-}
-
-/* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
- * Note that the number of keys to preload is user-defined, so we need to
- * apply a sanity check against argc. */
-static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
- int i, num;
- REDIS_NOTUSED(cmd);
-
- num = atoi(argv[2]->ptr);
- if (num > (argc-3)) return;
- for (i = 0; i < num; i++) {
- waitForSwappedKey(c,argv[3+i]);
- }
-}
-
-/* Preload keys needed to execute the entire MULTI/EXEC block.
- *
- * This function is called by blockClientOnSwappedKeys when EXEC is issued,
- * and will block the client when any command requires a swapped out value. */
-static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
- int i, margc;
- struct redisCommand *mcmd;
- robj **margv;
- REDIS_NOTUSED(cmd);
- REDIS_NOTUSED(argc);
- REDIS_NOTUSED(argv);
-
- if (!(c->flags & REDIS_MULTI)) return;
- for (i = 0; i < c->mstate.count; i++) {
- mcmd = c->mstate.commands[i].cmd;
- margc = c->mstate.commands[i].argc;
- margv = c->mstate.commands[i].argv;
-
- if (mcmd->vm_preload_proc != NULL) {
- mcmd->vm_preload_proc(c,mcmd,margc,margv);
- } else {
- waitForMultipleSwappedKeys(c,mcmd,margc,margv);
- }
- }
-}
-
-/* Is this client attempting to run a command against swapped keys?
- * If so, block it ASAP, load the keys in background, then resume it.
- *
- * The important idea about this function is that it can fail! If keys will
- * still be swapped when the client is resumed, this key lookups will
- * just block loading keys from disk. In practical terms this should only
- * happen with SORT BY command or if there is a bug in this function.
- *
- * Return 1 if the client is marked as blocked, 0 if the client can
- * continue as the keys it is going to access appear to be in memory. */
-static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd) {
- if (cmd->vm_preload_proc != NULL) {
- cmd->vm_preload_proc(c,cmd,c->argc,c->argv);
- } else {
- waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv);
- }
-
- /* If the client was blocked for at least one key, mark it as blocked. */
- if (listLength(c->io_keys)) {
- c->flags |= REDIS_IO_WAIT;
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- server.vm_blocked_clients++;
- return 1;
- } else {
- return 0;
- }
-}
-
-/* Remove the 'key' from the list of blocked keys for a given client.
- *
- * The function returns 1 when there are no longer blocking keys after
- * the current one was removed (and the client can be unblocked). */
-static int dontWaitForSwappedKey(redisClient *c, robj *key) {
- list *l;
- listNode *ln;
- listIter li;
- struct dictEntry *de;
-
- /* Remove the key from the list of keys this client is waiting for. */
- listRewind(c->io_keys,&li);
- while ((ln = listNext(&li)) != NULL) {
- if (equalStringObjects(ln->value,key)) {
- listDelNode(c->io_keys,ln);
- break;
- }
- }
- assert(ln != NULL);
-
- /* Remove the client form the key => waiting clients map. */
- de = dictFind(c->db->io_keys,key);
- assert(de != NULL);
- l = dictGetEntryVal(de);
- ln = listSearchKey(l,c);
- assert(ln != NULL);
- listDelNode(l,ln);
- if (listLength(l) == 0)
- dictDelete(c->db->io_keys,key);
-
- return listLength(c->io_keys) == 0;
-}
-
-/* Every time we now a key was loaded back in memory, we handle clients
- * waiting for this key if any. */
-static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) {
- struct dictEntry *de;
- list *l;
- listNode *ln;
- int len;
-
- de = dictFind(db->io_keys,key);
- if (!de) return;
-
- l = dictGetEntryVal(de);
- len = listLength(l);
- /* Note: we can't use something like while(listLength(l)) as the list
- * can be freed by the calling function when we remove the last element. */
- while (len--) {
- ln = listFirst(l);
- redisClient *c = ln->value;
-
- if (dontWaitForSwappedKey(c,key)) {
- /* Put the client in the list of clients ready to go as we
- * loaded all the keys about it. */
- listAddNodeTail(server.io_ready_clients,c);
- }
- }
-}
-
-/* =========================== Remote Configuration ========================= */
-
-static void configSetCommand(redisClient *c) {
- robj *o = getDecodedObject(c->argv[3]);
- long long ll;
-
- if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
- zfree(server.dbfilename);
- server.dbfilename = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
- zfree(server.requirepass);
- server.requirepass = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
- zfree(server.masterauth);
- server.masterauth = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.maxmemory = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > LONG_MAX) goto badfmt;
- server.maxidletime = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
- if (!strcasecmp(o->ptr,"no")) {
- server.appendfsync = APPENDFSYNC_NO;
- } else if (!strcasecmp(o->ptr,"everysec")) {
- server.appendfsync = APPENDFSYNC_EVERYSEC;
- } else if (!strcasecmp(o->ptr,"always")) {
- server.appendfsync = APPENDFSYNC_ALWAYS;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.no_appendfsync_on_rewrite = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
- int old = server.appendonly;
- int new = yesnotoi(o->ptr);
-
- if (new == -1) goto badfmt;
- if (old != new) {
- if (new == 0) {
- stopAppendOnly();
- } else {
- if (startAppendOnly() == REDIS_ERR) {
- addReplySds(c,sdscatprintf(sdsempty(),
- "-ERR Unable to turn on AOF. Check server logs.\r\n"));
- decrRefCount(o);
- return;
- }
- }
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
- int vlen, j;
- sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
-
- /* Perform sanity check before setting the new config:
- * - Even number of args
- * - Seconds >= 1, changes >= 0 */
- if (vlen & 1) {
- sdsfreesplitres(v,vlen);
- goto badfmt;
- }
- for (j = 0; j < vlen; j++) {
- char *eptr;
- long val;
-
- val = strtoll(v[j], &eptr, 10);
- if (eptr[0] != '\0' ||
- ((j & 1) == 0 && val < 1) ||
- ((j & 1) == 1 && val < 0)) {
- sdsfreesplitres(v,vlen);
- goto badfmt;
- }
- }
- /* Finally set the new config */
- resetServerSaveParams();
- for (j = 0; j < vlen; j += 2) {
- time_t seconds;
- int changes;
-
- seconds = strtoll(v[j],NULL,10);
- changes = strtoll(v[j+1],NULL,10);
- appendServerSaveParams(seconds, changes);
- }
- sdsfreesplitres(v,vlen);
- } else {
- addReplySds(c,sdscatprintf(sdsempty(),
- "-ERR not supported CONFIG parameter %s\r\n",
- (char*)c->argv[2]->ptr));
- decrRefCount(o);
- return;
- }
- decrRefCount(o);
- addReply(c,shared.ok);
- return;
-
-badfmt: /* Bad format errors */
- addReplySds(c,sdscatprintf(sdsempty(),
- "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
- (char*)o->ptr,
- (char*)c->argv[2]->ptr));
- decrRefCount(o);
-}
-
-static void configGetCommand(redisClient *c) {
- robj *o = getDecodedObject(c->argv[2]);
- robj *lenobj = createObject(REDIS_STRING,NULL);
- char *pattern = o->ptr;
- int matches = 0;
-
- addReply(c,lenobj);
- decrRefCount(lenobj);
-
- if (stringmatch(pattern,"dbfilename",0)) {
- addReplyBulkCString(c,"dbfilename");
- addReplyBulkCString(c,server.dbfilename);
- matches++;
- }
- if (stringmatch(pattern,"requirepass",0)) {
- addReplyBulkCString(c,"requirepass");
- addReplyBulkCString(c,server.requirepass);
- matches++;
- }
- if (stringmatch(pattern,"masterauth",0)) {
- addReplyBulkCString(c,"masterauth");
- addReplyBulkCString(c,server.masterauth);
- matches++;
- }
- if (stringmatch(pattern,"maxmemory",0)) {
- char buf[128];
-
- ll2string(buf,128,server.maxmemory);
- addReplyBulkCString(c,"maxmemory");
- addReplyBulkCString(c,buf);
- matches++;
- }
- if (stringmatch(pattern,"timeout",0)) {
- char buf[128];
-
- ll2string(buf,128,server.maxidletime);
- addReplyBulkCString(c,"timeout");
- addReplyBulkCString(c,buf);
- matches++;
- }
- if (stringmatch(pattern,"appendonly",0)) {
- addReplyBulkCString(c,"appendonly");
- addReplyBulkCString(c,server.appendonly ? "yes" : "no");
- matches++;
- }
- if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) {
- addReplyBulkCString(c,"no-appendfsync-on-rewrite");
- addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no");
- matches++;
- }
- if (stringmatch(pattern,"appendfsync",0)) {
- char *policy;
-
- switch(server.appendfsync) {
- case APPENDFSYNC_NO: policy = "no"; break;
- case APPENDFSYNC_EVERYSEC: policy = "everysec"; break;
- case APPENDFSYNC_ALWAYS: policy = "always"; break;
- default: policy = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"appendfsync");
- addReplyBulkCString(c,policy);
- matches++;
- }
- if (stringmatch(pattern,"save",0)) {
- sds buf = sdsempty();
- int j;
-
- for (j = 0; j < server.saveparamslen; j++) {
- buf = sdscatprintf(buf,"%ld %d",
- server.saveparams[j].seconds,
- server.saveparams[j].changes);
- if (j != server.saveparamslen-1)
- buf = sdscatlen(buf," ",1);
- }
- addReplyBulkCString(c,"save");
- addReplyBulkCString(c,buf);
- sdsfree(buf);
- matches++;
- }
- decrRefCount(o);
- lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2);
-}
-
-static void configCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"set")) {
- if (c->argc != 4) goto badarity;
- configSetCommand(c);
- } else if (!strcasecmp(c->argv[1]->ptr,"get")) {
- if (c->argc != 3) goto badarity;
- configGetCommand(c);
- } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) {
- if (c->argc != 2) goto badarity;
- server.stat_numcommands = 0;
- server.stat_numconnections = 0;
- server.stat_expiredkeys = 0;
- server.stat_starttime = time(NULL);
- addReply(c,shared.ok);
- } else {
- addReplySds(c,sdscatprintf(sdsempty(),
- "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
- }
- return;
-
-badarity:
- addReplySds(c,sdscatprintf(sdsempty(),
- "-ERR Wrong number of arguments for CONFIG %s\r\n",
- (char*) c->argv[1]->ptr));
-}
-
-/* =========================== Pubsub implementation ======================== */
-
-static void freePubsubPattern(void *p) {
- pubsubPattern *pat = p;
-
- decrRefCount(pat->pattern);
- zfree(pat);
-}
-
-static int listMatchPubsubPattern(void *a, void *b) {
- pubsubPattern *pa = a, *pb = b;
-
- return (pa->client == pb->client) &&
- (equalStringObjects(pa->pattern,pb->pattern));
-}
-
-/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
- * 0 if the client was already subscribed to that channel. */
-static int pubsubSubscribeChannel(redisClient *c, robj *channel) {
- struct dictEntry *de;
- list *clients = NULL;
- int retval = 0;
-
- /* Add the channel to the client -> channels hash table */
- if (dictAdd(c->pubsub_channels,channel,NULL) == DICT_OK) {
- retval = 1;
- incrRefCount(channel);
- /* Add the client to the channel -> list of clients hash table */
- de = dictFind(server.pubsub_channels,channel);
- if (de == NULL) {
- clients = listCreate();
- dictAdd(server.pubsub_channels,channel,clients);
- incrRefCount(channel);
- } else {
- clients = dictGetEntryVal(de);
- }
- listAddNodeTail(clients,c);
- }
- /* Notify the client */
- addReply(c,shared.mbulk3);
- addReply(c,shared.subscribebulk);
- addReplyBulk(c,channel);
- addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
- return retval;
-}
-
-/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
- * 0 if the client was not subscribed to the specified channel. */
-static int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
- struct dictEntry *de;
- list *clients;
- listNode *ln;
- int retval = 0;
-
- /* Remove the channel from the client -> channels hash table */
- incrRefCount(channel); /* channel may be just a pointer to the same object
- we have in the hash tables. Protect it... */
- if (dictDelete(c->pubsub_channels,channel) == DICT_OK) {
- retval = 1;
- /* Remove the client from the channel -> clients list hash table */
- de = dictFind(server.pubsub_channels,channel);
- assert(de != NULL);
- clients = dictGetEntryVal(de);
- ln = listSearchKey(clients,c);
- assert(ln != NULL);
- listDelNode(clients,ln);
- if (listLength(clients) == 0) {
- /* Free the list and associated hash entry at all if this was
- * the latest client, so that it will be possible to abuse
- * Redis PUBSUB creating millions of channels. */
- dictDelete(server.pubsub_channels,channel);
- }
- }
- /* Notify the client */
- if (notify) {
- addReply(c,shared.mbulk3);
- addReply(c,shared.unsubscribebulk);
- addReplyBulk(c,channel);
- addReplyLongLong(c,dictSize(c->pubsub_channels)+
- listLength(c->pubsub_patterns));
-
- }
- decrRefCount(channel); /* it is finally safe to release it */
- return retval;
-}
-
-/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
-static int pubsubSubscribePattern(redisClient *c, robj *pattern) {
- int retval = 0;
-
- if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
- retval = 1;
- pubsubPattern *pat;
- listAddNodeTail(c->pubsub_patterns,pattern);
- incrRefCount(pattern);
- pat = zmalloc(sizeof(*pat));
- pat->pattern = getDecodedObject(pattern);
- pat->client = c;
- listAddNodeTail(server.pubsub_patterns,pat);
- }
- /* Notify the client */
- addReply(c,shared.mbulk3);
- addReply(c,shared.psubscribebulk);
- addReplyBulk(c,pattern);
- addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
- return retval;
-}
-
-/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
- * 0 if the client was not subscribed to the specified channel. */
-static int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
- listNode *ln;
- pubsubPattern pat;
- int retval = 0;
-
- incrRefCount(pattern); /* Protect the object. May be the same we remove */
- if ((ln = listSearchKey(c->pubsub_patterns,pattern)) != NULL) {
- retval = 1;
- listDelNode(c->pubsub_patterns,ln);
- pat.client = c;
- pat.pattern = pattern;
- ln = listSearchKey(server.pubsub_patterns,&pat);
- listDelNode(server.pubsub_patterns,ln);
- }
- /* Notify the client */
- if (notify) {
- addReply(c,shared.mbulk3);
- addReply(c,shared.punsubscribebulk);
- addReplyBulk(c,pattern);
- addReplyLongLong(c,dictSize(c->pubsub_channels)+
- listLength(c->pubsub_patterns));
- }
- decrRefCount(pattern);
- return retval;
-}
-
-/* Unsubscribe from all the channels. Return the number of channels the
- * client was subscribed from. */
-static int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
- dictIterator *di = dictGetIterator(c->pubsub_channels);
- dictEntry *de;
- int count = 0;
-
- while((de = dictNext(di)) != NULL) {
- robj *channel = dictGetEntryKey(de);
-
- count += pubsubUnsubscribeChannel(c,channel,notify);
- }
- dictReleaseIterator(di);
- return count;
-}
-
-/* Unsubscribe from all the patterns. Return the number of patterns the
- * client was subscribed from. */
-static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
- listNode *ln;
- listIter li;
- int count = 0;
-
- listRewind(c->pubsub_patterns,&li);
- while ((ln = listNext(&li)) != NULL) {
- robj *pattern = ln->value;
-
- count += pubsubUnsubscribePattern(c,pattern,notify);
- }
- return count;
-}
-
-/* Publish a message */
-static int pubsubPublishMessage(robj *channel, robj *message) {
- int receivers = 0;
- struct dictEntry *de;
- listNode *ln;
- listIter li;
-
- /* Send to clients listening for that channel */
- de = dictFind(server.pubsub_channels,channel);
- if (de) {
- list *list = dictGetEntryVal(de);
- listNode *ln;
- listIter li;
-
- listRewind(list,&li);
- while ((ln = listNext(&li)) != NULL) {
- redisClient *c = ln->value;
-
- addReply(c,shared.mbulk3);
- addReply(c,shared.messagebulk);
- addReplyBulk(c,channel);
- addReplyBulk(c,message);
- receivers++;
- }
- }
- /* Send to clients listening to matching channels */
- if (listLength(server.pubsub_patterns)) {
- listRewind(server.pubsub_patterns,&li);
- channel = getDecodedObject(channel);
- while ((ln = listNext(&li)) != NULL) {
- pubsubPattern *pat = ln->value;
-
- if (stringmatchlen((char*)pat->pattern->ptr,
- sdslen(pat->pattern->ptr),
- (char*)channel->ptr,
- sdslen(channel->ptr),0)) {
- addReply(pat->client,shared.mbulk4);
- addReply(pat->client,shared.pmessagebulk);
- addReplyBulk(pat->client,pat->pattern);
- addReplyBulk(pat->client,channel);
- addReplyBulk(pat->client,message);
- receivers++;
- }
- }
- decrRefCount(channel);
- }
- return receivers;
-}
-
-static void subscribeCommand(redisClient *c) {
- int j;
-
- for (j = 1; j < c->argc; j++)
- pubsubSubscribeChannel(c,c->argv[j]);
-}
-
-static void unsubscribeCommand(redisClient *c) {
- if (c->argc == 1) {
- pubsubUnsubscribeAllChannels(c,1);
- return;
- } else {
- int j;
-
- for (j = 1; j < c->argc; j++)
- pubsubUnsubscribeChannel(c,c->argv[j],1);
- }
-}
-
-static void psubscribeCommand(redisClient *c) {
- int j;
-
- for (j = 1; j < c->argc; j++)
- pubsubSubscribePattern(c,c->argv[j]);
-}
-
-static void punsubscribeCommand(redisClient *c) {
- if (c->argc == 1) {
- pubsubUnsubscribeAllPatterns(c,1);
- return;
- } else {
- int j;
-
- for (j = 1; j < c->argc; j++)
- pubsubUnsubscribePattern(c,c->argv[j],1);
- }
-}
-
-static void publishCommand(redisClient *c) {
- int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
- addReplyLongLong(c,receivers);
-}
-
-/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
- *
- * The implementation uses a per-DB hash table mapping keys to list of clients
- * WATCHing those keys, so that given a key that is going to be modified
- * we can mark all the associated clients as dirty.
- *
- * Also every client contains a list of WATCHed keys so that's possible to
- * un-watch such keys when the client is freed or when UNWATCH is called. */
-
-/* In the client->watched_keys list we need to use watchedKey structures
- * as in order to identify a key in Redis we need both the key name and the
- * DB */
-typedef struct watchedKey {
- robj *key;
- redisDb *db;
-} watchedKey;
-
-/* Watch for the specified key */
-static void watchForKey(redisClient *c, robj *key) {
- list *clients = NULL;
- listIter li;
- listNode *ln;
- watchedKey *wk;
-
- /* Check if we are already watching for this key */
- listRewind(c->watched_keys,&li);
- while((ln = listNext(&li))) {
- wk = listNodeValue(ln);
- if (wk->db == c->db && equalStringObjects(key,wk->key))
- return; /* Key already watched */
- }
- /* This key is not already watched in this DB. Let's add it */
- clients = dictFetchValue(c->db->watched_keys,key);
- if (!clients) {
- clients = listCreate();
- dictAdd(c->db->watched_keys,key,clients);
- incrRefCount(key);
- }
- listAddNodeTail(clients,c);
- /* Add the new key to the lits of keys watched by this client */
- wk = zmalloc(sizeof(*wk));
- wk->key = key;
- wk->db = c->db;
- incrRefCount(key);
- listAddNodeTail(c->watched_keys,wk);
-}
-
-/* Unwatch all the keys watched by this client. To clean the EXEC dirty
- * flag is up to the caller. */
-static void unwatchAllKeys(redisClient *c) {
- listIter li;
- listNode *ln;
-
- if (listLength(c->watched_keys) == 0) return;
- listRewind(c->watched_keys,&li);
- while((ln = listNext(&li))) {
- list *clients;
- watchedKey *wk;
-
- /* Lookup the watched key -> clients list and remove the client
- * from the list */
- wk = listNodeValue(ln);
- clients = dictFetchValue(wk->db->watched_keys, wk->key);
- assert(clients != NULL);
- listDelNode(clients,listSearchKey(clients,c));
- /* Kill the entry at all if this was the only client */
- if (listLength(clients) == 0)
- dictDelete(wk->db->watched_keys, wk->key);
- /* Remove this watched key from the client->watched list */
- listDelNode(c->watched_keys,ln);
- decrRefCount(wk->key);
- zfree(wk);
- }
-}
-
-/* "Touch" a key, so that if this key is being WATCHed by some client the
- * next EXEC will fail. */
-static void touchWatchedKey(redisDb *db, robj *key) {
- list *clients;
- listIter li;
- listNode *ln;
-
- if (dictSize(db->watched_keys) == 0) return;
- clients = dictFetchValue(db->watched_keys, key);
- if (!clients) return;
-
- /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
- /* Check if we are already watching for this key */
- listRewind(clients,&li);
- while((ln = listNext(&li))) {
- redisClient *c = listNodeValue(ln);
-
- c->flags |= REDIS_DIRTY_CAS;
- }
-}
-
-/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
- * flush but will be deleted as effect of the flushing operation should
- * be touched. "dbid" is the DB that's getting the flush. -1 if it is
- * a FLUSHALL operation (all the DBs flushed). */
-static void touchWatchedKeysOnFlush(int dbid) {
- listIter li1, li2;
- listNode *ln;
-
- /* For every client, check all the waited keys */
- listRewind(server.clients,&li1);
- while((ln = listNext(&li1))) {
- redisClient *c = listNodeValue(ln);
- listRewind(c->watched_keys,&li2);
- while((ln = listNext(&li2))) {
- watchedKey *wk = listNodeValue(ln);
-
- /* For every watched key matching the specified DB, if the
- * key exists, mark the client as dirty, as the key will be
- * removed. */
- if (dbid == -1 || wk->db->id == dbid) {
- if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
- c->flags |= REDIS_DIRTY_CAS;
- }
- }
- }
-}
-
-static void watchCommand(redisClient *c) {
- int j;
-
- if (c->flags & REDIS_MULTI) {
- addReplySds(c,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
- return;
- }
- for (j = 1; j < c->argc; j++)
- watchForKey(c,c->argv[j]);
- addReply(c,shared.ok);
-}
-
-static void unwatchCommand(redisClient *c) {
- unwatchAllKeys(c);
- c->flags &= (~REDIS_DIRTY_CAS);
- addReply(c,shared.ok);
-}
-
-/* ================================= Debugging ============================== */
-
-/* Compute the sha1 of string at 's' with 'len' bytes long.
- * The SHA1 is then xored againt the string pointed by digest.
- * Since xor is commutative, this operation is used in order to
- * "add" digests relative to unordered elements.
- *
- * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
-static void xorDigest(unsigned char *digest, void *ptr, size_t len) {
- SHA1_CTX ctx;
- unsigned char hash[20], *s = ptr;
- int j;
-
- SHA1Init(&ctx);
- SHA1Update(&ctx,s,len);
- SHA1Final(hash,&ctx);
-
- for (j = 0; j < 20; j++)
- digest[j] ^= hash[j];
-}
-
-static void xorObjectDigest(unsigned char *digest, robj *o) {
- o = getDecodedObject(o);
- xorDigest(digest,o->ptr,sdslen(o->ptr));
- decrRefCount(o);
-}
-
-/* This function instead of just computing the SHA1 and xoring it
- * against diget, also perform the digest of "digest" itself and
- * replace the old value with the new one.
- *
- * So the final digest will be:
- *
- * digest = SHA1(digest xor SHA1(data))
- *
- * This function is used every time we want to preserve the order so
- * that digest(a,b,c,d) will be different than digest(b,c,d,a)
- *
- * Also note that mixdigest("foo") followed by mixdigest("bar")
- * will lead to a different digest compared to "fo", "obar".
- */
-static void mixDigest(unsigned char *digest, void *ptr, size_t len) {
- SHA1_CTX ctx;
- char *s = ptr;
-
- xorDigest(digest,s,len);
- SHA1Init(&ctx);
- SHA1Update(&ctx,digest,20);
- SHA1Final(digest,&ctx);
-}
-
-static void mixObjectDigest(unsigned char *digest, robj *o) {
- o = getDecodedObject(o);
- mixDigest(digest,o->ptr,sdslen(o->ptr));
- decrRefCount(o);
-}
-
-/* Compute the dataset digest. Since keys, sets elements, hashes elements
- * are not ordered, we use a trick: every aggregate digest is the xor
- * of the digests of their elements. This way the order will not change
- * the result. For list instead we use a feedback entering the output digest
- * as input in order to ensure that a different ordered list will result in
- * a different digest. */
-static void computeDatasetDigest(unsigned char *final) {
- unsigned char digest[20];
- char buf[128];
- dictIterator *di = NULL;
- dictEntry *de;
- int j;
- uint32_t aux;
-
- memset(final,0,20); /* Start with a clean result */
-
- for (j = 0; j < server.dbnum; j++) {
- redisDb *db = server.db+j;
-
- if (dictSize(db->dict) == 0) continue;
- di = dictGetIterator(db->dict);
-
- /* hash the DB id, so the same dataset moved in a different
- * DB will lead to a different digest */
- aux = htonl(j);
- mixDigest(final,&aux,sizeof(aux));
-
- /* Iterate this DB writing every entry */
- while((de = dictNext(di)) != NULL) {
- sds key;
- robj *keyobj, *o;
- time_t expiretime;
-
- memset(digest,0,20); /* This key-val digest */
- key = dictGetEntryKey(de);
- keyobj = createStringObject(key,sdslen(key));
-
- mixDigest(digest,key,sdslen(key));
-
- /* Make sure the key is loaded if VM is active */
- o = lookupKeyRead(db,keyobj);
-
- aux = htonl(o->type);
- mixDigest(digest,&aux,sizeof(aux));
- expiretime = getExpire(db,keyobj);
-
- /* Save the key and associated value */
- if (o->type == REDIS_STRING) {
- mixObjectDigest(digest,o);
- } else if (o->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(o,0,REDIS_TAIL);
- listTypeEntry entry;
- while(listTypeNext(li,&entry)) {
- robj *eleobj = listTypeGet(&entry);
- mixObjectDigest(digest,eleobj);
- decrRefCount(eleobj);
- }
- listTypeReleaseIterator(li);
- } else if (o->type == REDIS_SET) {
- dict *set = o->ptr;
- dictIterator *di = dictGetIterator(set);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
-
- xorObjectDigest(digest,eleobj);
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_ZSET) {
- zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetEntryKey(de);
- double *score = dictGetEntryVal(de);
- unsigned char eledigest[20];
-
- snprintf(buf,sizeof(buf),"%.17g",*score);
- memset(eledigest,0,20);
- mixObjectDigest(eledigest,eleobj);
- mixDigest(eledigest,buf,strlen(buf));
- xorDigest(digest,eledigest,20);
- }
- dictReleaseIterator(di);
- } else if (o->type == REDIS_HASH) {
- hashTypeIterator *hi;
- robj *obj;
-
- hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
- unsigned char eledigest[20];
-
- memset(eledigest,0,20);
- obj = hashTypeCurrent(hi,REDIS_HASH_KEY);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
- obj = hashTypeCurrent(hi,REDIS_HASH_VALUE);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
- xorDigest(digest,eledigest,20);
- }
- hashTypeReleaseIterator(hi);
- } else {
- redisPanic("Unknown object type");
- }
- /* If the key has an expire, add it to the mix */
- if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
- /* We can finally xor the key-val digest to the final digest */
- xorDigest(final,digest,20);
- decrRefCount(keyobj);
- }
- dictReleaseIterator(di);
- }
-}
-
-static void debugCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
- *((char*)-1) = 'x';
- } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
- if (rdbSave(server.dbfilename) != REDIS_OK) {
- addReply(c,shared.err);
- return;
- }
- emptyDb();
- if (rdbLoad(server.dbfilename) != REDIS_OK) {
- addReply(c,shared.err);
- return;
- }
- redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
- addReply(c,shared.ok);
- } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
- emptyDb();
- if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
- addReply(c,shared.err);
- return;
- }
- redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
- addReply(c,shared.ok);
- } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
- dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
- robj *val;
-
- if (!de) {
- addReply(c,shared.nokeyerr);
- return;
- }
- val = dictGetEntryVal(de);
- if (!server.vm_enabled || (val->storage == REDIS_VM_MEMORY ||
- val->storage == REDIS_VM_SWAPPING)) {
- char *strenc;
- char buf[128];
-
- if (val->encoding < (sizeof(strencoding)/sizeof(char*))) {
- strenc = strencoding[val->encoding];
- } else {
- snprintf(buf,64,"unknown encoding %d\n", val->encoding);
- strenc = buf;
- }
- addReplySds(c,sdscatprintf(sdsempty(),
- "+Value at:%p refcount:%d "
- "encoding:%s serializedlength:%lld\r\n",
- (void*)val, val->refcount,
- strenc, (long long) rdbSavedObjectLen(val,NULL)));
- } else {
- vmpointer *vp = (vmpointer*) val;
- addReplySds(c,sdscatprintf(sdsempty(),
- "+Value swapped at: page %llu "
- "using %llu pages\r\n",
- (unsigned long long) vp->page,
- (unsigned long long) vp->usedpages));
- }
- } else if (!strcasecmp(c->argv[1]->ptr,"swapin") && c->argc == 3) {
- lookupKeyRead(c->db,c->argv[2]);
- addReply(c,shared.ok);
- } else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
- dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
- robj *val;
- vmpointer *vp;
-
- if (!server.vm_enabled) {
- addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
- return;
- }
- if (!de) {
- addReply(c,shared.nokeyerr);
- return;
- }
- val = dictGetEntryVal(de);
- /* Swap it */
- if (val->storage != REDIS_VM_MEMORY) {
- addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
- } else if (val->refcount != 1) {
- addReplySds(c,sdsnew("-ERR Object is shared\r\n"));
- } else if ((vp = vmSwapObjectBlocking(val)) != NULL) {
- dictGetEntryVal(de) = vp;
- addReply(c,shared.ok);
- } else {
- addReply(c,shared.err);
- }
- } else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) {
- long keys, j;
- robj *key, *val;
- char buf[128];
-
- if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
- return;
- for (j = 0; j < keys; j++) {
- snprintf(buf,sizeof(buf),"key:%lu",j);
- key = createStringObject(buf,strlen(buf));
- if (lookupKeyRead(c->db,key) != NULL) {
- decrRefCount(key);
- continue;
- }
- snprintf(buf,sizeof(buf),"value:%lu",j);
- val = createStringObject(buf,strlen(buf));
- dbAdd(c->db,key,val);
- decrRefCount(key);
- }
- addReply(c,shared.ok);
- } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) {
- unsigned char digest[20];
- sds d = sdsnew("+");
- int j;
-
- computeDatasetDigest(digest);
- for (j = 0; j < 20; j++)
- d = sdscatprintf(d, "%02x",digest[j]);
-
- d = sdscatlen(d,"\r\n",2);
- addReplySds(c,d);
- } else {
- addReplySds(c,sdsnew(
- "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
- }
-}
-
-static void _redisAssert(char *estr, char *file, int line) {
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
- redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
-#ifdef HAVE_BACKTRACE
- redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
- *((char*)-1) = 'x';
-#endif
-}
-
-static void _redisPanic(char *msg, char *file, int line) {
- redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
- redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
-#ifdef HAVE_BACKTRACE
- redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
- *((char*)-1) = 'x';
-#endif
-}
-
-/* =================================== Main! ================================ */
-
-#ifdef __linux__
-int linuxOvercommitMemoryValue(void) {
- FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
- char buf[64];
-
- if (!fp) return -1;
- if (fgets(buf,64,fp) == NULL) {
- fclose(fp);
- return -1;
- }
- fclose(fp);
-
- return atoi(buf);
-}
-
-void linuxOvercommitMemoryWarning(void) {
- if (linuxOvercommitMemoryValue() == 0) {
- redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
- }
-}
-#endif /* __linux__ */
-
-static void daemonize(void) {
- int fd;
- FILE *fp;
-
- if (fork() != 0) exit(0); /* parent exits */
- setsid(); /* create a new session */
-
- /* Every output goes to /dev/null. If Redis is daemonized but
- * the 'logfile' is set to 'stdout' in the configuration file
- * it will not log at all. */
- if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
- dup2(fd, STDIN_FILENO);
- dup2(fd, STDOUT_FILENO);
- dup2(fd, STDERR_FILENO);
- if (fd > STDERR_FILENO) close(fd);
- }
- /* Try to write the pid file */
- fp = fopen(server.pidfile,"w");
- if (fp) {
- fprintf(fp,"%d\n",getpid());
- fclose(fp);
- }
-}
-
-static void version() {
- printf("Redis server version %s (%s:%d)\n", REDIS_VERSION,
- redisGitSHA1(), atoi(redisGitDirty()) > 0);
- exit(0);
-}
-
-static void usage() {
- fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
- fprintf(stderr," ./redis-server - (read config from stdin)\n");
- exit(1);
-}
-
-int main(int argc, char **argv) {
- time_t start;
-
- initServerConfig();
- sortCommandTable();
- if (argc == 2) {
- if (strcmp(argv[1], "-v") == 0 ||
- strcmp(argv[1], "--version") == 0) version();
- if (strcmp(argv[1], "--help") == 0) usage();
- resetServerSaveParams();
- loadServerConfig(argv[1]);
- } else if ((argc > 2)) {
- usage();
- } else {
- redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
- }
- if (server.daemonize) daemonize();
- initServer();
- redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
-#ifdef __linux__
- linuxOvercommitMemoryWarning();
-#endif
- start = time(NULL);
- if (server.appendonly) {
- if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
- redisLog(REDIS_NOTICE,"DB loaded from append only file: %ld seconds",time(NULL)-start);
- } else {
- if (rdbLoad(server.dbfilename) == REDIS_OK)
- redisLog(REDIS_NOTICE,"DB loaded from disk: %ld seconds",time(NULL)-start);
- }
- redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
- aeSetBeforeSleepProc(server.el,beforeSleep);
- aeMain(server.el);
- aeDeleteEventLoop(server.el);
- return 0;
-}
-
-/* ============================= Backtrace support ========================= */
-
-#ifdef HAVE_BACKTRACE
-static char *findFuncName(void *pointer, unsigned long *offset);
-
-static void *getMcontextEip(ucontext_t *uc) {
-#if defined(__FreeBSD__)
- return (void*) uc->uc_mcontext.mc_eip;
-#elif defined(__dietlibc__)
- return (void*) uc->uc_mcontext.eip;
-#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
- #if __x86_64__
- return (void*) uc->uc_mcontext->__ss.__rip;
- #else
- return (void*) uc->uc_mcontext->__ss.__eip;
- #endif
-#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
- #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
- return (void*) uc->uc_mcontext->__ss.__rip;
- #else
- return (void*) uc->uc_mcontext->__ss.__eip;
- #endif
-#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
- return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
-#elif defined(__ia64__) /* Linux IA64 */
- return (void*) uc->uc_mcontext.sc_ip;
-#else
- return NULL;
-#endif
-}
-
-static void segvHandler(int sig, siginfo_t *info, void *secret) {
- void *trace[100];
- char **messages = NULL;
- int i, trace_size = 0;
- unsigned long offset=0;
- ucontext_t *uc = (ucontext_t*) secret;
- sds infostring;
- REDIS_NOTUSED(info);
-
- redisLog(REDIS_WARNING,
- "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
- infostring = genRedisInfoString();
- redisLog(REDIS_WARNING, "%s",infostring);
- /* It's not safe to sdsfree() the returned string under memory
- * corruption conditions. Let it leak as we are going to abort */
-
- trace_size = backtrace(trace, 100);
- /* overwrite sigaction with caller's address */
- if (getMcontextEip(uc) != NULL) {
- trace[1] = getMcontextEip(uc);
- }
- messages = backtrace_symbols(trace, trace_size);
-
- for (i=1; i<trace_size; ++i) {
- char *fn = findFuncName(trace[i], &offset), *p;
-
- p = strchr(messages[i],'+');
- if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
- redisLog(REDIS_WARNING,"%s", messages[i]);
- } else {
- redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
- }
- }
- /* free(messages); Don't call free() with possibly corrupted memory. */
- _exit(0);
-}
-
-static void sigtermHandler(int sig) {
- REDIS_NOTUSED(sig);
-
- redisLog(REDIS_WARNING,"SIGTERM received, scheduling shutting down...");
- server.shutdown_asap = 1;
-}
-
-static void setupSigSegvAction(void) {
- struct sigaction act;
-
- sigemptyset (&act.sa_mask);
- /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
- * is used. Otherwise, sa_handler is used */
- act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
- act.sa_sigaction = segvHandler;
- sigaction (SIGSEGV, &act, NULL);
- sigaction (SIGBUS, &act, NULL);
- sigaction (SIGFPE, &act, NULL);
- sigaction (SIGILL, &act, NULL);
- sigaction (SIGBUS, &act, NULL);
-
- act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
- act.sa_handler = sigtermHandler;
- sigaction (SIGTERM, &act, NULL);
- return;
-}
-
-#include "staticsymbols.h"
-/* This function try to convert a pointer into a function name. It's used in
- * oreder to provide a backtrace under segmentation fault that's able to
- * display functions declared as static (otherwise the backtrace is useless). */
-static char *findFuncName(void *pointer, unsigned long *offset){
- int i, ret = -1;
- unsigned long off, minoff = 0;
-
- /* Try to match against the Symbol with the smallest offset */
- for (i=0; symsTable[i].pointer; i++) {
- unsigned long lp = (unsigned long) pointer;
-
- if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
- off=lp-symsTable[i].pointer;
- if (ret < 0 || off < minoff) {
- minoff=off;
- ret=i;
- }
- }
- }
- if (ret == -1) return NULL;
- *offset = minoff;
- return symsTable[ret].name;
-}
-#else /* HAVE_BACKTRACE */
-static void setupSigSegvAction(void) {
-}
-#endif /* HAVE_BACKTRACE */
-
-
-
-/* The End */
-
-
-
diff --git a/redis.h b/redis.h
deleted file mode 100644
index 18816844f..000000000
--- a/redis.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __REDIS_H__
-#define __REDIS_H__
-
-enum
-{
- REG_GS = 0,
-# define REG_GS REG_GS
- REG_FS,
-# define REG_FS REG_FS
- REG_ES,
-# define REG_ES REG_ES
- REG_DS,
-# define REG_DS REG_DS
- REG_EDI,
-# define REG_EDI REG_EDI
- REG_ESI,
-# define REG_ESI REG_ESI
- REG_EBP,
-# define REG_EBP REG_EBP
- REG_ESP,
-# define REG_ESP REG_ESP
- REG_EBX,
-# define REG_EBX REG_EBX
- REG_EDX,
-# define REG_EDX REG_EDX
- REG_ECX,
-# define REG_ECX REG_ECX
- REG_EAX,
-# define REG_EAX REG_EAX
- REG_TRAPNO,
-# define REG_TRAPNO REG_TRAPNO
- REG_ERR,
-# define REG_ERR REG_ERR
- REG_EIP,
-# define REG_EIP REG_EIP
- REG_CS,
-# define REG_CS REG_CS
- REG_EFL,
-# define REG_EFL REG_EFL
- REG_UESP,
-# define REG_UESP REG_UESP
- REG_SS
-# define REG_SS REG_SS
-};
-
-#endif
diff --git a/Makefile b/src/Makefile
index 96dddd69e..3cba3c069 100644
--- a/Makefile
+++ b/src/Makefile
@@ -15,7 +15,7 @@ endif
CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF)
DEBUG?= -g -rdynamic -ggdb
-OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o
+OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o vm.o pubsub.o multi.o debug.o sort.o
BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o
CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o
CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o
@@ -48,7 +48,7 @@ redis-check-dump.o: redis-check-dump.c lzf.h
redis-cli.o: redis-cli.c fmacros.h anet.h sds.h adlist.h zmalloc.h \
linenoise.h
redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \
- adlist.h zmalloc.h lzf.h pqsort.h zipmap.h ziplist.h sha1.h staticsymbols.h
+ adlist.h zmalloc.h lzf.h pqsort.h zipmap.h ziplist.h sha1.h
release.o: release.c release.h
sds.o: sds.c sds.h zmalloc.h
sha1.o: sha1.c sha1.h
@@ -83,11 +83,8 @@ clean:
dep:
$(CC) -MM *.c
-staticsymbols:
- tclsh utils/build-static-symbols.tcl > staticsymbols.h
-
test:
- tclsh8.5 tests/test_helper.tcl --tags "${TAGS}"
+ (cd ..; tclsh8.5 tests/test_helper.tcl --tags "${TAGS}")
bench:
./redis-benchmark
diff --git a/adlist.c b/src/adlist.c
index 015012f5c..015012f5c 100644
--- a/adlist.c
+++ b/src/adlist.c
diff --git a/adlist.h b/src/adlist.h
index a1209f62f..a1209f62f 100644
--- a/adlist.h
+++ b/src/adlist.h
diff --git a/ae.c b/src/ae.c
index c7918ee1d..c7918ee1d 100644
--- a/ae.c
+++ b/src/ae.c
diff --git a/ae.h b/src/ae.h
index a9db18ed9..a9db18ed9 100644
--- a/ae.h
+++ b/src/ae.h
diff --git a/ae_epoll.c b/src/ae_epoll.c
index d48977b65..d48977b65 100644
--- a/ae_epoll.c
+++ b/src/ae_epoll.c
diff --git a/ae_kqueue.c b/src/ae_kqueue.c
index 04c3536ba..04c3536ba 100644
--- a/ae_kqueue.c
+++ b/src/ae_kqueue.c
diff --git a/ae_select.c b/src/ae_select.c
index 43f5867f3..43f5867f3 100644
--- a/ae_select.c
+++ b/src/ae_select.c
diff --git a/anet.c b/src/anet.c
index 4fe811a11..4fe811a11 100644
--- a/anet.c
+++ b/src/anet.c
diff --git a/anet.h b/src/anet.h
index ce0f47787..ce0f47787 100644
--- a/anet.h
+++ b/src/anet.h
diff --git a/src/aof.c b/src/aof.c
new file mode 100644
index 000000000..51054b296
--- /dev/null
+++ b/src/aof.c
@@ -0,0 +1,694 @@
+#include "redis.h"
+
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* Called when the user switches from "appendonly yes" to "appendonly no"
+ * at runtime using the CONFIG command. */
+void stopAppendOnly(void) {
+ flushAppendOnlyFile();
+ aof_fsync(server.appendfd);
+ close(server.appendfd);
+
+ server.appendfd = -1;
+ server.appendseldb = -1;
+ server.appendonly = 0;
+ /* rewrite operation in progress? kill it, wait child exit */
+ if (server.bgsavechildpid != -1) {
+ int statloc;
+
+ if (kill(server.bgsavechildpid,SIGKILL) != -1)
+ wait3(&statloc,0,NULL);
+ /* reset the buffer accumulating changes while the child saves */
+ sdsfree(server.bgrewritebuf);
+ server.bgrewritebuf = sdsempty();
+ server.bgsavechildpid = -1;
+ }
+}
+
+/* Called when the user switches from "appendonly no" to "appendonly yes"
+ * at runtime using the CONFIG command. */
+int startAppendOnly(void) {
+ server.appendonly = 1;
+ server.lastfsync = time(NULL);
+ server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
+ if (server.appendfd == -1) {
+ redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno));
+ return REDIS_ERR;
+ }
+ if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
+ server.appendonly = 0;
+ close(server.appendfd);
+ redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno));
+ return REDIS_ERR;
+ }
+ return REDIS_OK;
+}
+
+/* Write the append only file buffer on disk.
+ *
+ * Since we are required to write the AOF before replying to the client,
+ * and the only way the client socket can get a write is entering when the
+ * the event loop, we accumulate all the AOF writes in a memory
+ * buffer and write it on disk using this function just before entering
+ * the event loop again. */
+void flushAppendOnlyFile(void) {
+ time_t now;
+ ssize_t nwritten;
+
+ if (sdslen(server.aofbuf) == 0) return;
+
+ /* We want to perform a single write. This should be guaranteed atomic
+ * at least if the filesystem we are writing is a real physical one.
+ * While this will save us against the server being killed I don't think
+ * there is much to do about the whole server stopping for power problems
+ * or alike */
+ nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf));
+ if (nwritten != (signed)sdslen(server.aofbuf)) {
+ /* Ooops, we are in troubles. The best thing to do for now is
+ * aborting instead of giving the illusion that everything is
+ * working as expected. */
+ if (nwritten == -1) {
+ redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
+ } else {
+ redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
+ }
+ exit(1);
+ }
+ sdsfree(server.aofbuf);
+ server.aofbuf = sdsempty();
+
+ /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
+ * childs performing heavy I/O on disk. */
+ if (server.no_appendfsync_on_rewrite &&
+ (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1))
+ return;
+ /* Fsync if needed */
+ now = time(NULL);
+ if (server.appendfsync == APPENDFSYNC_ALWAYS ||
+ (server.appendfsync == APPENDFSYNC_EVERYSEC &&
+ now-server.lastfsync > 1))
+ {
+ /* aof_fsync is defined as fdatasync() for Linux in order to avoid
+ * flushing metadata. */
+ aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
+ server.lastfsync = now;
+ }
+}
+
+sds catAppendOnlyGenericCommand(sds buf, int argc, robj **argv) {
+ int j;
+ buf = sdscatprintf(buf,"*%d\r\n",argc);
+ for (j = 0; j < argc; j++) {
+ robj *o = getDecodedObject(argv[j]);
+ buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
+ buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
+ buf = sdscatlen(buf,"\r\n",2);
+ decrRefCount(o);
+ }
+ return buf;
+}
+
+sds catAppendOnlyExpireAtCommand(sds buf, robj *key, robj *seconds) {
+ int argc = 3;
+ long when;
+ robj *argv[3];
+
+ /* Make sure we can use strtol */
+ seconds = getDecodedObject(seconds);
+ when = time(NULL)+strtol(seconds->ptr,NULL,10);
+ decrRefCount(seconds);
+
+ argv[0] = createStringObject("EXPIREAT",8);
+ argv[1] = key;
+ argv[2] = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"%ld",when));
+ buf = catAppendOnlyGenericCommand(buf, argc, argv);
+ decrRefCount(argv[0]);
+ decrRefCount(argv[2]);
+ return buf;
+}
+
+void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
+ sds buf = sdsempty();
+ robj *tmpargv[3];
+
+ /* The DB this command was targetting is not the same as the last command
+ * we appendend. To issue a SELECT command is needed. */
+ if (dictid != server.appendseldb) {
+ char seldb[64];
+
+ snprintf(seldb,sizeof(seldb),"%d",dictid);
+ buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
+ (unsigned long)strlen(seldb),seldb);
+ server.appendseldb = dictid;
+ }
+
+ if (cmd->proc == expireCommand) {
+ /* Translate EXPIRE into EXPIREAT */
+ buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
+ } else if (cmd->proc == setexCommand) {
+ /* Translate SETEX to SET and EXPIREAT */
+ tmpargv[0] = createStringObject("SET",3);
+ tmpargv[1] = argv[1];
+ tmpargv[2] = argv[3];
+ buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
+ decrRefCount(tmpargv[0]);
+ buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
+ } else {
+ buf = catAppendOnlyGenericCommand(buf,argc,argv);
+ }
+
+ /* Append to the AOF buffer. This will be flushed on disk just before
+ * of re-entering the event loop, so before the client will get a
+ * positive reply about the operation performed. */
+ server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf));
+
+ /* If a background append only file rewriting is in progress we want to
+ * accumulate the differences between the child DB and the current one
+ * in a buffer, so that when the child process will do its work we
+ * can append the differences to the new append only file. */
+ if (server.bgrewritechildpid != -1)
+ server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
+
+ sdsfree(buf);
+}
+
+/* In Redis commands are always executed in the context of a client, so in
+ * order to load the append only file we need to create a fake client. */
+struct redisClient *createFakeClient(void) {
+ struct redisClient *c = zmalloc(sizeof(*c));
+
+ selectDb(c,0);
+ c->fd = -1;
+ c->querybuf = sdsempty();
+ c->argc = 0;
+ c->argv = NULL;
+ c->flags = 0;
+ /* We set the fake client as a slave waiting for the synchronization
+ * so that Redis will not try to send replies to this client. */
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ c->reply = listCreate();
+ listSetFreeMethod(c->reply,decrRefCount);
+ listSetDupMethod(c->reply,dupClientReplyValue);
+ initClientMultiState(c);
+ return c;
+}
+
+void freeFakeClient(struct redisClient *c) {
+ sdsfree(c->querybuf);
+ listRelease(c->reply);
+ freeClientMultiState(c);
+ zfree(c);
+}
+
+/* Replay the append log file. On error REDIS_OK is returned. On non fatal
+ * error (the append only file is zero-length) REDIS_ERR is returned. On
+ * fatal error an error message is logged and the program exists. */
+int loadAppendOnlyFile(char *filename) {
+ struct redisClient *fakeClient;
+ FILE *fp = fopen(filename,"r");
+ struct redis_stat sb;
+ int appendonly = server.appendonly;
+
+ if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
+ return REDIS_ERR;
+
+ if (fp == NULL) {
+ redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
+ exit(1);
+ }
+
+ /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
+ * to the same file we're about to read. */
+ server.appendonly = 0;
+
+ fakeClient = createFakeClient();
+ while(1) {
+ int argc, j;
+ unsigned long len;
+ robj **argv;
+ char buf[128];
+ sds argsds;
+ struct redisCommand *cmd;
+ int force_swapout;
+
+ if (fgets(buf,sizeof(buf),fp) == NULL) {
+ if (feof(fp))
+ break;
+ else
+ goto readerr;
+ }
+ if (buf[0] != '*') goto fmterr;
+ argc = atoi(buf+1);
+ argv = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
+ if (buf[0] != '$') goto fmterr;
+ len = strtol(buf+1,NULL,10);
+ argsds = sdsnewlen(NULL,len);
+ if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
+ argv[j] = createObject(REDIS_STRING,argsds);
+ if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
+ }
+
+ /* Command lookup */
+ cmd = lookupCommand(argv[0]->ptr);
+ if (!cmd) {
+ redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
+ exit(1);
+ }
+ /* Try object encoding */
+ if (cmd->flags & REDIS_CMD_BULK)
+ argv[argc-1] = tryObjectEncoding(argv[argc-1]);
+ /* Run the command in the context of a fake client */
+ fakeClient->argc = argc;
+ fakeClient->argv = argv;
+ cmd->proc(fakeClient);
+ /* Discard the reply objects list from the fake client */
+ while(listLength(fakeClient->reply))
+ listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
+ /* Clean up, ready for the next command */
+ for (j = 0; j < argc; j++) decrRefCount(argv[j]);
+ zfree(argv);
+ /* Handle swapping while loading big datasets when VM is on */
+ force_swapout = 0;
+ if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
+ force_swapout = 1;
+
+ if (server.vm_enabled && force_swapout) {
+ while (zmalloc_used_memory() > server.vm_max_memory) {
+ if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
+ }
+ }
+ }
+
+ /* This point can only be reached when EOF is reached without errors.
+ * If the client is in the middle of a MULTI/EXEC, log error and quit. */
+ if (fakeClient->flags & REDIS_MULTI) goto readerr;
+
+ fclose(fp);
+ freeFakeClient(fakeClient);
+ server.appendonly = appendonly;
+ return REDIS_OK;
+
+readerr:
+ if (feof(fp)) {
+ redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
+ } else {
+ redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
+ }
+ exit(1);
+fmterr:
+ redisLog(REDIS_WARNING,"Bad file format reading the append only file");
+ exit(1);
+}
+
+/* Write binary-safe string into a file in the bulkformat
+ * $<count>\r\n<payload>\r\n */
+int fwriteBulkString(FILE *fp, char *s, unsigned long len) {
+ char cbuf[128];
+ int clen;
+ cbuf[0] = '$';
+ clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,len);
+ cbuf[clen++] = '\r';
+ cbuf[clen++] = '\n';
+ if (fwrite(cbuf,clen,1,fp) == 0) return 0;
+ if (len > 0 && fwrite(s,len,1,fp) == 0) return 0;
+ if (fwrite("\r\n",2,1,fp) == 0) return 0;
+ return 1;
+}
+
+/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
+int fwriteBulkDouble(FILE *fp, double d) {
+ char buf[128], dbuf[128];
+
+ snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
+ snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
+ if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
+ if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
+ return 1;
+}
+
+/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
+int fwriteBulkLongLong(FILE *fp, long long l) {
+ char bbuf[128], lbuf[128];
+ unsigned int blen, llen;
+ llen = ll2string(lbuf,32,l);
+ blen = snprintf(bbuf,sizeof(bbuf),"$%u\r\n%s\r\n",llen,lbuf);
+ if (fwrite(bbuf,blen,1,fp) == 0) return 0;
+ return 1;
+}
+
+/* Delegate writing an object to writing a bulk string or bulk long long. */
+int fwriteBulkObject(FILE *fp, robj *obj) {
+ /* Avoid using getDecodedObject to help copy-on-write (we are often
+ * in a child process when this function is called). */
+ if (obj->encoding == REDIS_ENCODING_INT) {
+ return fwriteBulkLongLong(fp,(long)obj->ptr);
+ } else if (obj->encoding == REDIS_ENCODING_RAW) {
+ return fwriteBulkString(fp,obj->ptr,sdslen(obj->ptr));
+ } else {
+ redisPanic("Unknown string encoding");
+ }
+}
+
+/* Write a sequence of commands able to fully rebuild the dataset into
+ * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
+int rewriteAppendOnlyFile(char *filename) {
+ dictIterator *di = NULL;
+ dictEntry *de;
+ FILE *fp;
+ char tmpfile[256];
+ int j;
+ time_t now = time(NULL);
+
+ /* Note that we have to use a different temp name here compared to the
+ * one used by rewriteAppendOnlyFileBackground() function. */
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
+ return REDIS_ERR;
+ }
+ for (j = 0; j < server.dbnum; j++) {
+ char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
+ redisDb *db = server.db+j;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) continue;
+ di = dictGetIterator(d);
+ if (!di) {
+ fclose(fp);
+ return REDIS_ERR;
+ }
+
+ /* SELECT the new DB */
+ if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkLongLong(fp,j) == 0) goto werr;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds keystr = dictGetEntryKey(de);
+ robj key, *o;
+ time_t expiretime;
+ int swapped;
+
+ keystr = dictGetEntryKey(de);
+ o = dictGetEntryVal(de);
+ initStaticStringObject(key,keystr);
+ /* If the value for this key is swapped, load a preview in memory.
+ * We use a "swapped" flag to remember if we need to free the
+ * value object instead to just increment the ref count anyway
+ * in order to avoid copy-on-write of pages if we are forked() */
+ if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
+ o->storage == REDIS_VM_SWAPPING) {
+ swapped = 0;
+ } else {
+ o = vmPreviewObject(o);
+ swapped = 1;
+ }
+ expiretime = getExpire(db,&key);
+
+ /* Save the key and associated value */
+ if (o->type == REDIS_STRING) {
+ /* Emit a SET command */
+ char cmd[]="*3\r\n$3\r\nSET\r\n";
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ /* Key and value */
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkObject(fp,o) == 0) goto werr;
+ } else if (o->type == REDIS_LIST) {
+ /* Emit the RPUSHes needed to rebuild the list */
+ char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *zl = o->ptr;
+ unsigned char *p = ziplistIndex(zl,0);
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ while(ziplistGet(p,&vstr,&vlen,&vlong)) {
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (vstr) {
+ if (fwriteBulkString(fp,(char*)vstr,vlen) == 0)
+ goto werr;
+ } else {
+ if (fwriteBulkLongLong(fp,vlong) == 0)
+ goto werr;
+ }
+ p = ziplistNext(zl,p);
+ }
+ } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
+ list *list = o->ptr;
+ listNode *ln;
+ listIter li;
+
+ listRewind(list,&li);
+ while((ln = listNext(&li))) {
+ robj *eleobj = listNodeValue(ln);
+
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ } else if (o->type == REDIS_SET) {
+ /* Emit the SADDs needed to rebuild the set */
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ char cmd[]="*3\r\n$4\r\nSADD\r\n";
+ robj *eleobj = dictGetEntryKey(de);
+
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_ZSET) {
+ /* Emit the ZADDs needed to rebuild the sorted set */
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ char cmd[]="*4\r\n$4\r\nZADD\r\n";
+ robj *eleobj = dictGetEntryKey(de);
+ double *score = dictGetEntryVal(de);
+
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkDouble(fp,*score) == 0) goto werr;
+ if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_HASH) {
+ char cmd[]="*4\r\n$4\r\nHSET\r\n";
+
+ /* Emit the HSETs needed to rebuild the hash */
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *p = zipmapRewind(o->ptr);
+ unsigned char *field, *val;
+ unsigned int flen, vlen;
+
+ while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) {
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkString(fp,(char*)field,flen) == -1)
+ return -1;
+ if (fwriteBulkString(fp,(char*)val,vlen) == -1)
+ return -1;
+ }
+ } else {
+ dictIterator *di = dictGetIterator(o->ptr);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ robj *field = dictGetEntryKey(de);
+ robj *val = dictGetEntryVal(de);
+
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkObject(fp,field) == -1) return -1;
+ if (fwriteBulkObject(fp,val) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ }
+ } else {
+ redisPanic("Unknown object type");
+ }
+ /* Save the expire time */
+ if (expiretime != -1) {
+ char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
+ /* If this key is already expired skip it */
+ if (expiretime < now) continue;
+ if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
+ if (fwriteBulkObject(fp,&key) == 0) goto werr;
+ if (fwriteBulkLongLong(fp,expiretime) == 0) goto werr;
+ }
+ if (swapped) decrRefCount(o);
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Make sure data will not remain on the OS's output buffers */
+ fflush(fp);
+ aof_fsync(fileno(fp));
+ fclose(fp);
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
+ unlink(tmpfile);
+ return REDIS_ERR;
+ }
+ redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
+ return REDIS_OK;
+
+werr:
+ fclose(fp);
+ unlink(tmpfile);
+ redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
+ if (di) dictReleaseIterator(di);
+ return REDIS_ERR;
+}
+
+/* This is how rewriting of the append only file in background works:
+ *
+ * 1) The user calls BGREWRITEAOF
+ * 2) Redis calls this function, that forks():
+ * 2a) the child rewrite the append only file in a temp file.
+ * 2b) the parent accumulates differences in server.bgrewritebuf.
+ * 3) When the child finished '2a' exists.
+ * 4) The parent will trap the exit code, if it's OK, will append the
+ * data accumulated into server.bgrewritebuf into the temp file, and
+ * finally will rename(2) the temp file in the actual file name.
+ * The the new file is reopened as the new append only file. Profit!
+ */
+int rewriteAppendOnlyFileBackground(void) {
+ pid_t childpid;
+
+ if (server.bgrewritechildpid != -1) return REDIS_ERR;
+ if (server.vm_enabled) waitEmptyIOJobsQueue();
+ if ((childpid = fork()) == 0) {
+ /* Child */
+ char tmpfile[256];
+
+ if (server.vm_enabled) vmReopenSwapFile();
+ close(server.fd);
+ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
+ if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ redisLog(REDIS_WARNING,
+ "Can't rewrite append only file in background: fork: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ redisLog(REDIS_NOTICE,
+ "Background append only file rewriting started by pid %d",childpid);
+ server.bgrewritechildpid = childpid;
+ updateDictResizePolicy();
+ /* We set appendseldb to -1 in order to force the next call to the
+ * feedAppendOnlyFile() to issue a SELECT command, so the differences
+ * accumulated by the parent into server.bgrewritebuf will start
+ * with a SELECT statement and it will be safe to merge. */
+ server.appendseldb = -1;
+ return REDIS_OK;
+ }
+ return REDIS_OK; /* unreached */
+}
+
+void bgrewriteaofCommand(redisClient *c) {
+ if (server.bgrewritechildpid != -1) {
+ addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
+ return;
+ }
+ if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
+ char *status = "+Background append only file rewriting started\r\n";
+ addReplySds(c,sdsnew(status));
+ } else {
+ addReply(c,shared.err);
+ }
+}
+
+void aofRemoveTempFile(pid_t childpid) {
+ char tmpfile[256];
+
+ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
+ unlink(tmpfile);
+}
+
+/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
+ * Handle this. */
+void backgroundRewriteDoneHandler(int statloc) {
+ int exitcode = WEXITSTATUS(statloc);
+ int bysignal = WIFSIGNALED(statloc);
+
+ if (!bysignal && exitcode == 0) {
+ int fd;
+ char tmpfile[256];
+
+ redisLog(REDIS_NOTICE,
+ "Background append only file rewriting terminated with success");
+ /* Now it's time to flush the differences accumulated by the parent */
+ snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
+ fd = open(tmpfile,O_WRONLY|O_APPEND);
+ if (fd == -1) {
+ redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
+ goto cleanup;
+ }
+ /* Flush our data... */
+ if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
+ (signed) sdslen(server.bgrewritebuf)) {
+ redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
+ close(fd);
+ goto cleanup;
+ }
+ redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
+ /* Now our work is to rename the temp file into the stable file. And
+ * switch the file descriptor used by the server for append only. */
+ if (rename(tmpfile,server.appendfilename) == -1) {
+ redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
+ close(fd);
+ goto cleanup;
+ }
+ /* Mission completed... almost */
+ redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
+ if (server.appendfd != -1) {
+ /* If append only is actually enabled... */
+ close(server.appendfd);
+ server.appendfd = fd;
+ if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd);
+ server.appendseldb = -1; /* Make sure it will issue SELECT */
+ redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
+ } else {
+ /* If append only is disabled we just generate a dump in this
+ * format. Why not? */
+ close(fd);
+ }
+ } else if (!bysignal && exitcode != 0) {
+ redisLog(REDIS_WARNING, "Background append only file rewriting error");
+ } else {
+ redisLog(REDIS_WARNING,
+ "Background append only file rewriting terminated by signal %d",
+ WTERMSIG(statloc));
+ }
+cleanup:
+ sdsfree(server.bgrewritebuf);
+ server.bgrewritebuf = sdsempty();
+ aofRemoveTempFile(server.bgrewritechildpid);
+ server.bgrewritechildpid = -1;
+}
diff --git a/src/config.c b/src/config.c
new file mode 100644
index 000000000..6d946ee0c
--- /dev/null
+++ b/src/config.c
@@ -0,0 +1,438 @@
+#include "redis.h"
+
+/*-----------------------------------------------------------------------------
+ * Config file parsing
+ *----------------------------------------------------------------------------*/
+
+int yesnotoi(char *s) {
+ if (!strcasecmp(s,"yes")) return 1;
+ else if (!strcasecmp(s,"no")) return 0;
+ else return -1;
+}
+
+void appendServerSaveParams(time_t seconds, int changes) {
+ server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
+ server.saveparams[server.saveparamslen].seconds = seconds;
+ server.saveparams[server.saveparamslen].changes = changes;
+ server.saveparamslen++;
+}
+
+void resetServerSaveParams() {
+ zfree(server.saveparams);
+ server.saveparams = NULL;
+ server.saveparamslen = 0;
+}
+
+/* I agree, this is a very rudimental way to load a configuration...
+ will improve later if the config gets more complex */
+void loadServerConfig(char *filename) {
+ FILE *fp;
+ char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
+ int linenum = 0;
+ sds line = NULL;
+
+ if (filename[0] == '-' && filename[1] == '\0')
+ fp = stdin;
+ else {
+ if ((fp = fopen(filename,"r")) == NULL) {
+ redisLog(REDIS_WARNING, "Fatal error, can't open config file '%s'", filename);
+ exit(1);
+ }
+ }
+
+ while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
+ sds *argv;
+ int argc, j;
+
+ linenum++;
+ line = sdsnew(buf);
+ line = sdstrim(line," \t\r\n");
+
+ /* Skip comments and blank lines*/
+ if (line[0] == '#' || line[0] == '\0') {
+ sdsfree(line);
+ continue;
+ }
+
+ /* Split into arguments */
+ argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
+ sdstolower(argv[0]);
+
+ /* Execute config directives */
+ if (!strcasecmp(argv[0],"timeout") && argc == 2) {
+ server.maxidletime = atoi(argv[1]);
+ if (server.maxidletime < 0) {
+ err = "Invalid timeout value"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"port") && argc == 2) {
+ server.port = atoi(argv[1]);
+ if (server.port < 1 || server.port > 65535) {
+ err = "Invalid port"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
+ server.bindaddr = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"save") && argc == 3) {
+ int seconds = atoi(argv[1]);
+ int changes = atoi(argv[2]);
+ if (seconds < 1 || changes < 0) {
+ err = "Invalid save parameters"; goto loaderr;
+ }
+ appendServerSaveParams(seconds,changes);
+ } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
+ if (chdir(argv[1]) == -1) {
+ redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
+ argv[1], strerror(errno));
+ exit(1);
+ }
+ } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
+ if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
+ else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
+ else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
+ else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
+ else {
+ err = "Invalid log level. Must be one of debug, notice, warning";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
+ FILE *logfp;
+
+ server.logfile = zstrdup(argv[1]);
+ if (!strcasecmp(server.logfile,"stdout")) {
+ zfree(server.logfile);
+ server.logfile = NULL;
+ }
+ if (server.logfile) {
+ /* Test if we are able to open the file. The server will not
+ * be able to abort just for this problem later... */
+ logfp = fopen(server.logfile,"a");
+ if (logfp == NULL) {
+ err = sdscatprintf(sdsempty(),
+ "Can't open the log file: %s", strerror(errno));
+ goto loaderr;
+ }
+ fclose(logfp);
+ }
+ } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
+ server.dbnum = atoi(argv[1]);
+ if (server.dbnum < 1) {
+ err = "Invalid number of databases"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"include") && argc == 2) {
+ loadServerConfig(argv[1]);
+ } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
+ server.maxclients = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
+ server.maxmemory = memtoll(argv[1],NULL);
+ } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
+ server.masterhost = sdsnew(argv[1]);
+ server.masterport = atoi(argv[2]);
+ server.replstate = REDIS_REPL_CONNECT;
+ } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
+ server.masterauth = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
+ if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
+ if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) {
+ if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
+ if ((server.daemonize = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
+ if ((server.appendonly = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
+ zfree(server.appendfilename);
+ server.appendfilename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite")
+ && argc == 2) {
+ if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
+ if (!strcasecmp(argv[1],"no")) {
+ server.appendfsync = APPENDFSYNC_NO;
+ } else if (!strcasecmp(argv[1],"always")) {
+ server.appendfsync = APPENDFSYNC_ALWAYS;
+ } else if (!strcasecmp(argv[1],"everysec")) {
+ server.appendfsync = APPENDFSYNC_EVERYSEC;
+ } else {
+ err = "argument must be 'no', 'always' or 'everysec'";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
+ server.requirepass = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
+ zfree(server.pidfile);
+ server.pidfile = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
+ zfree(server.dbfilename);
+ server.dbfilename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
+ if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"vm-swap-file") && argc == 2) {
+ zfree(server.vm_swap_file);
+ server.vm_swap_file = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
+ server.vm_max_memory = memtoll(argv[1],NULL);
+ } else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
+ server.vm_page_size = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
+ server.vm_pages = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
+ server.vm_max_threads = strtoll(argv[1], NULL, 10);
+ } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2){
+ server.hash_max_zipmap_entries = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2){
+ server.hash_max_zipmap_value = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
+ server.list_max_ziplist_entries = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2){
+ server.list_max_ziplist_value = memtoll(argv[1], NULL);
+ } else {
+ err = "Bad directive or wrong number of arguments"; goto loaderr;
+ }
+ for (j = 0; j < argc; j++)
+ sdsfree(argv[j]);
+ zfree(argv);
+ sdsfree(line);
+ }
+ if (fp != stdin) fclose(fp);
+ return;
+
+loaderr:
+ fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
+ fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
+ fprintf(stderr, ">>> '%s'\n", line);
+ fprintf(stderr, "%s\n", err);
+ exit(1);
+}
+
+/*-----------------------------------------------------------------------------
+ * CONFIG command for remote configuration
+ *----------------------------------------------------------------------------*/
+
+void configSetCommand(redisClient *c) {
+ robj *o = getDecodedObject(c->argv[3]);
+ long long ll;
+
+ if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
+ zfree(server.dbfilename);
+ server.dbfilename = zstrdup(o->ptr);
+ } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
+ zfree(server.requirepass);
+ server.requirepass = zstrdup(o->ptr);
+ } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
+ zfree(server.masterauth);
+ server.masterauth = zstrdup(o->ptr);
+ } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
+ if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
+ ll < 0) goto badfmt;
+ server.maxmemory = ll;
+ } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
+ if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
+ ll < 0 || ll > LONG_MAX) goto badfmt;
+ server.maxidletime = ll;
+ } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
+ if (!strcasecmp(o->ptr,"no")) {
+ server.appendfsync = APPENDFSYNC_NO;
+ } else if (!strcasecmp(o->ptr,"everysec")) {
+ server.appendfsync = APPENDFSYNC_EVERYSEC;
+ } else if (!strcasecmp(o->ptr,"always")) {
+ server.appendfsync = APPENDFSYNC_ALWAYS;
+ } else {
+ goto badfmt;
+ }
+ } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
+ int yn = yesnotoi(o->ptr);
+
+ if (yn == -1) goto badfmt;
+ server.no_appendfsync_on_rewrite = yn;
+ } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
+ int old = server.appendonly;
+ int new = yesnotoi(o->ptr);
+
+ if (new == -1) goto badfmt;
+ if (old != new) {
+ if (new == 0) {
+ stopAppendOnly();
+ } else {
+ if (startAppendOnly() == REDIS_ERR) {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-ERR Unable to turn on AOF. Check server logs.\r\n"));
+ decrRefCount(o);
+ return;
+ }
+ }
+ }
+ } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
+ int vlen, j;
+ sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
+
+ /* Perform sanity check before setting the new config:
+ * - Even number of args
+ * - Seconds >= 1, changes >= 0 */
+ if (vlen & 1) {
+ sdsfreesplitres(v,vlen);
+ goto badfmt;
+ }
+ for (j = 0; j < vlen; j++) {
+ char *eptr;
+ long val;
+
+ val = strtoll(v[j], &eptr, 10);
+ if (eptr[0] != '\0' ||
+ ((j & 1) == 0 && val < 1) ||
+ ((j & 1) == 1 && val < 0)) {
+ sdsfreesplitres(v,vlen);
+ goto badfmt;
+ }
+ }
+ /* Finally set the new config */
+ resetServerSaveParams();
+ for (j = 0; j < vlen; j += 2) {
+ time_t seconds;
+ int changes;
+
+ seconds = strtoll(v[j],NULL,10);
+ changes = strtoll(v[j+1],NULL,10);
+ appendServerSaveParams(seconds, changes);
+ }
+ sdsfreesplitres(v,vlen);
+ } else {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-ERR not supported CONFIG parameter %s\r\n",
+ (char*)c->argv[2]->ptr));
+ decrRefCount(o);
+ return;
+ }
+ decrRefCount(o);
+ addReply(c,shared.ok);
+ return;
+
+badfmt: /* Bad format errors */
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
+ (char*)o->ptr,
+ (char*)c->argv[2]->ptr));
+ decrRefCount(o);
+}
+
+void configGetCommand(redisClient *c) {
+ robj *o = getDecodedObject(c->argv[2]);
+ robj *lenobj = createObject(REDIS_STRING,NULL);
+ char *pattern = o->ptr;
+ int matches = 0;
+
+ addReply(c,lenobj);
+ decrRefCount(lenobj);
+
+ if (stringmatch(pattern,"dbfilename",0)) {
+ addReplyBulkCString(c,"dbfilename");
+ addReplyBulkCString(c,server.dbfilename);
+ matches++;
+ }
+ if (stringmatch(pattern,"requirepass",0)) {
+ addReplyBulkCString(c,"requirepass");
+ addReplyBulkCString(c,server.requirepass);
+ matches++;
+ }
+ if (stringmatch(pattern,"masterauth",0)) {
+ addReplyBulkCString(c,"masterauth");
+ addReplyBulkCString(c,server.masterauth);
+ matches++;
+ }
+ if (stringmatch(pattern,"maxmemory",0)) {
+ char buf[128];
+
+ ll2string(buf,128,server.maxmemory);
+ addReplyBulkCString(c,"maxmemory");
+ addReplyBulkCString(c,buf);
+ matches++;
+ }
+ if (stringmatch(pattern,"timeout",0)) {
+ char buf[128];
+
+ ll2string(buf,128,server.maxidletime);
+ addReplyBulkCString(c,"timeout");
+ addReplyBulkCString(c,buf);
+ matches++;
+ }
+ if (stringmatch(pattern,"appendonly",0)) {
+ addReplyBulkCString(c,"appendonly");
+ addReplyBulkCString(c,server.appendonly ? "yes" : "no");
+ matches++;
+ }
+ if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) {
+ addReplyBulkCString(c,"no-appendfsync-on-rewrite");
+ addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no");
+ matches++;
+ }
+ if (stringmatch(pattern,"appendfsync",0)) {
+ char *policy;
+
+ switch(server.appendfsync) {
+ case APPENDFSYNC_NO: policy = "no"; break;
+ case APPENDFSYNC_EVERYSEC: policy = "everysec"; break;
+ case APPENDFSYNC_ALWAYS: policy = "always"; break;
+ default: policy = "unknown"; break; /* too harmless to panic */
+ }
+ addReplyBulkCString(c,"appendfsync");
+ addReplyBulkCString(c,policy);
+ matches++;
+ }
+ if (stringmatch(pattern,"save",0)) {
+ sds buf = sdsempty();
+ int j;
+
+ for (j = 0; j < server.saveparamslen; j++) {
+ buf = sdscatprintf(buf,"%ld %d",
+ server.saveparams[j].seconds,
+ server.saveparams[j].changes);
+ if (j != server.saveparamslen-1)
+ buf = sdscatlen(buf," ",1);
+ }
+ addReplyBulkCString(c,"save");
+ addReplyBulkCString(c,buf);
+ sdsfree(buf);
+ matches++;
+ }
+ decrRefCount(o);
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2);
+}
+
+void configCommand(redisClient *c) {
+ if (!strcasecmp(c->argv[1]->ptr,"set")) {
+ if (c->argc != 4) goto badarity;
+ configSetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"get")) {
+ if (c->argc != 3) goto badarity;
+ configGetCommand(c);
+ } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) {
+ if (c->argc != 2) goto badarity;
+ server.stat_numcommands = 0;
+ server.stat_numconnections = 0;
+ server.stat_expiredkeys = 0;
+ server.stat_starttime = time(NULL);
+ addReply(c,shared.ok);
+ } else {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
+ }
+ return;
+
+badarity:
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-ERR Wrong number of arguments for CONFIG %s\r\n",
+ (char*) c->argv[1]->ptr));
+}
diff --git a/config.h b/src/config.h
index 6e98fbb2c..6e98fbb2c 100644
--- a/config.h
+++ b/src/config.h
diff --git a/src/db.c b/src/db.c
new file mode 100644
index 000000000..e1e82cb22
--- /dev/null
+++ b/src/db.c
@@ -0,0 +1,508 @@
+#include "redis.h"
+
+#include <signal.h>
+
+/*-----------------------------------------------------------------------------
+ * C-level DB API
+ *----------------------------------------------------------------------------*/
+
+robj *lookupKey(redisDb *db, robj *key) {
+ dictEntry *de = dictFind(db->dict,key->ptr);
+ if (de) {
+ robj *val = dictGetEntryVal(de);
+
+ if (server.vm_enabled) {
+ if (val->storage == REDIS_VM_MEMORY ||
+ val->storage == REDIS_VM_SWAPPING)
+ {
+ /* If we were swapping the object out, cancel the operation */
+ if (val->storage == REDIS_VM_SWAPPING)
+ vmCancelThreadedIOJob(val);
+ /* Update the access time for the aging algorithm. */
+ val->lru = server.lruclock;
+ } else {
+ int notify = (val->storage == REDIS_VM_LOADING);
+
+ /* Our value was swapped on disk. Bring it at home. */
+ redisAssert(val->type == REDIS_VMPOINTER);
+ val = vmLoadObject(val);
+ dictGetEntryVal(de) = val;
+
+ /* Clients blocked by the VM subsystem may be waiting for
+ * this key... */
+ if (notify) handleClientsBlockedOnSwappedKey(db,key);
+ }
+ }
+ return val;
+ } else {
+ return NULL;
+ }
+}
+
+robj *lookupKeyRead(redisDb *db, robj *key) {
+ expireIfNeeded(db,key);
+ return lookupKey(db,key);
+}
+
+robj *lookupKeyWrite(redisDb *db, robj *key) {
+ deleteIfVolatile(db,key);
+ touchWatchedKey(db,key);
+ return lookupKey(db,key);
+}
+
+robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
+ robj *o = lookupKeyRead(c->db, key);
+ if (!o) addReply(c,reply);
+ return o;
+}
+
+robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
+ robj *o = lookupKeyWrite(c->db, key);
+ if (!o) addReply(c,reply);
+ return o;
+}
+
+/* Add the key to the DB. If the key already exists REDIS_ERR is returned,
+ * otherwise REDIS_OK is returned, and the caller should increment the
+ * refcount of 'val'. */
+int dbAdd(redisDb *db, robj *key, robj *val) {
+ /* Perform a lookup before adding the key, as we need to copy the
+ * key value. */
+ if (dictFind(db->dict, key->ptr) != NULL) {
+ return REDIS_ERR;
+ } else {
+ sds copy = sdsdup(key->ptr);
+ dictAdd(db->dict, copy, val);
+ return REDIS_OK;
+ }
+}
+
+/* If the key does not exist, this is just like dbAdd(). Otherwise
+ * the value associated to the key is replaced with the new one.
+ *
+ * On update (key already existed) 0 is returned. Otherwise 1. */
+int dbReplace(redisDb *db, robj *key, robj *val) {
+ if (dictFind(db->dict,key->ptr) == NULL) {
+ sds copy = sdsdup(key->ptr);
+ dictAdd(db->dict, copy, val);
+ return 1;
+ } else {
+ dictReplace(db->dict, key->ptr, val);
+ return 0;
+ }
+}
+
+int dbExists(redisDb *db, robj *key) {
+ return dictFind(db->dict,key->ptr) != NULL;
+}
+
+/* Return a random key, in form of a Redis object.
+ * If there are no keys, NULL is returned.
+ *
+ * The function makes sure to return keys not already expired. */
+robj *dbRandomKey(redisDb *db) {
+ struct dictEntry *de;
+
+ while(1) {
+ sds key;
+ robj *keyobj;
+
+ de = dictGetRandomKey(db->dict);
+ if (de == NULL) return NULL;
+
+ key = dictGetEntryKey(de);
+ keyobj = createStringObject(key,sdslen(key));
+ if (dictFind(db->expires,key)) {
+ if (expireIfNeeded(db,keyobj)) {
+ decrRefCount(keyobj);
+ continue; /* search for another key. This expired. */
+ }
+ }
+ return keyobj;
+ }
+}
+
+/* Delete a key, value, and associated expiration entry if any, from the DB */
+int dbDelete(redisDb *db, robj *key) {
+ /* Deleting an entry from the expires dict will not free the sds of
+ * the key, because it is shared with the main dictionary. */
+ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
+ return dictDelete(db->dict,key->ptr) == DICT_OK;
+}
+
+/* Empty the whole database */
+long long emptyDb() {
+ int j;
+ long long removed = 0;
+
+ for (j = 0; j < server.dbnum; j++) {
+ removed += dictSize(server.db[j].dict);
+ dictEmpty(server.db[j].dict);
+ dictEmpty(server.db[j].expires);
+ }
+ return removed;
+}
+
+int selectDb(redisClient *c, int id) {
+ if (id < 0 || id >= server.dbnum)
+ return REDIS_ERR;
+ c->db = &server.db[id];
+ return REDIS_OK;
+}
+
+/*-----------------------------------------------------------------------------
+ * Type agnostic commands operating on the key space
+ *----------------------------------------------------------------------------*/
+
+void flushdbCommand(redisClient *c) {
+ server.dirty += dictSize(c->db->dict);
+ touchWatchedKeysOnFlush(c->db->id);
+ dictEmpty(c->db->dict);
+ dictEmpty(c->db->expires);
+ addReply(c,shared.ok);
+}
+
+void flushallCommand(redisClient *c) {
+ touchWatchedKeysOnFlush(-1);
+ server.dirty += emptyDb();
+ addReply(c,shared.ok);
+ if (server.bgsavechildpid != -1) {
+ kill(server.bgsavechildpid,SIGKILL);
+ rdbRemoveTempFile(server.bgsavechildpid);
+ }
+ rdbSave(server.dbfilename);
+ server.dirty++;
+}
+
+void delCommand(redisClient *c) {
+ int deleted = 0, j;
+
+ for (j = 1; j < c->argc; j++) {
+ if (dbDelete(c->db,c->argv[j])) {
+ touchWatchedKey(c->db,c->argv[j]);
+ server.dirty++;
+ deleted++;
+ }
+ }
+ addReplyLongLong(c,deleted);
+}
+
+void existsCommand(redisClient *c) {
+ expireIfNeeded(c->db,c->argv[1]);
+ if (dbExists(c->db,c->argv[1])) {
+ addReply(c, shared.cone);
+ } else {
+ addReply(c, shared.czero);
+ }
+}
+
+void selectCommand(redisClient *c) {
+ int id = atoi(c->argv[1]->ptr);
+
+ if (selectDb(c,id) == REDIS_ERR) {
+ addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
+ } else {
+ addReply(c,shared.ok);
+ }
+}
+
+void randomkeyCommand(redisClient *c) {
+ robj *key;
+
+ if ((key = dbRandomKey(c->db)) == NULL) {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ addReplyBulk(c,key);
+ decrRefCount(key);
+}
+
+void keysCommand(redisClient *c) {
+ dictIterator *di;
+ dictEntry *de;
+ sds pattern = c->argv[1]->ptr;
+ int plen = sdslen(pattern);
+ unsigned long numkeys = 0;
+ robj *lenobj = createObject(REDIS_STRING,NULL);
+
+ di = dictGetIterator(c->db->dict);
+ addReply(c,lenobj);
+ decrRefCount(lenobj);
+ while((de = dictNext(di)) != NULL) {
+ sds key = dictGetEntryKey(de);
+ robj *keyobj;
+
+ if ((pattern[0] == '*' && pattern[1] == '\0') ||
+ stringmatchlen(pattern,plen,key,sdslen(key),0)) {
+ keyobj = createStringObject(key,sdslen(key));
+ if (expireIfNeeded(c->db,keyobj) == 0) {
+ addReplyBulk(c,keyobj);
+ numkeys++;
+ }
+ decrRefCount(keyobj);
+ }
+ }
+ dictReleaseIterator(di);
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",numkeys);
+}
+
+void dbsizeCommand(redisClient *c) {
+ addReplySds(c,
+ sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
+}
+
+void lastsaveCommand(redisClient *c) {
+ addReplySds(c,
+ sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
+}
+
+void typeCommand(redisClient *c) {
+ robj *o;
+ char *type;
+
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ type = "+none";
+ } else {
+ switch(o->type) {
+ case REDIS_STRING: type = "+string"; break;
+ case REDIS_LIST: type = "+list"; break;
+ case REDIS_SET: type = "+set"; break;
+ case REDIS_ZSET: type = "+zset"; break;
+ case REDIS_HASH: type = "+hash"; break;
+ default: type = "+unknown"; break;
+ }
+ }
+ addReplySds(c,sdsnew(type));
+ addReply(c,shared.crlf);
+}
+
+void saveCommand(redisClient *c) {
+ if (server.bgsavechildpid != -1) {
+ addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
+ return;
+ }
+ if (rdbSave(server.dbfilename) == REDIS_OK) {
+ addReply(c,shared.ok);
+ } else {
+ addReply(c,shared.err);
+ }
+}
+
+void bgsaveCommand(redisClient *c) {
+ if (server.bgsavechildpid != -1) {
+ addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
+ return;
+ }
+ if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
+ char *status = "+Background saving started\r\n";
+ addReplySds(c,sdsnew(status));
+ } else {
+ addReply(c,shared.err);
+ }
+}
+
+void shutdownCommand(redisClient *c) {
+ if (prepareForShutdown() == REDIS_OK)
+ exit(0);
+ addReplySds(c, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
+}
+
+void renameGenericCommand(redisClient *c, int nx) {
+ robj *o;
+
+ /* To use the same key as src and dst is probably an error */
+ if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
+ addReply(c,shared.sameobjecterr);
+ return;
+ }
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
+ return;
+
+ incrRefCount(o);
+ deleteIfVolatile(c->db,c->argv[2]);
+ if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
+ if (nx) {
+ decrRefCount(o);
+ addReply(c,shared.czero);
+ return;
+ }
+ dbReplace(c->db,c->argv[2],o);
+ }
+ dbDelete(c->db,c->argv[1]);
+ touchWatchedKey(c->db,c->argv[2]);
+ server.dirty++;
+ addReply(c,nx ? shared.cone : shared.ok);
+}
+
+void renameCommand(redisClient *c) {
+ renameGenericCommand(c,0);
+}
+
+void renamenxCommand(redisClient *c) {
+ renameGenericCommand(c,1);
+}
+
+void moveCommand(redisClient *c) {
+ robj *o;
+ redisDb *src, *dst;
+ int srcid;
+
+ /* Obtain source and target DB pointers */
+ src = c->db;
+ srcid = c->db->id;
+ if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
+ addReply(c,shared.outofrangeerr);
+ return;
+ }
+ dst = c->db;
+ selectDb(c,srcid); /* Back to the source DB */
+
+ /* If the user is moving using as target the same
+ * DB as the source DB it is probably an error. */
+ if (src == dst) {
+ addReply(c,shared.sameobjecterr);
+ return;
+ }
+
+ /* Check if the element exists and get a reference */
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (!o) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* Try to add the element to the target DB */
+ deleteIfVolatile(dst,c->argv[1]);
+ if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
+ addReply(c,shared.czero);
+ return;
+ }
+ incrRefCount(o);
+
+ /* OK! key moved, free the entry in the source DB */
+ dbDelete(src,c->argv[1]);
+ server.dirty++;
+ addReply(c,shared.cone);
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires API
+ *----------------------------------------------------------------------------*/
+
+int removeExpire(redisDb *db, robj *key) {
+ /* An expire may only be removed if there is a corresponding entry in the
+ * main dict. Otherwise, the key will never be freed. */
+ redisAssert(dictFind(db->dict,key->ptr) != NULL);
+ if (dictDelete(db->expires,key->ptr) == DICT_OK) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int setExpire(redisDb *db, robj *key, time_t when) {
+ dictEntry *de;
+
+ /* Reuse the sds from the main dict in the expire dict */
+ redisAssert((de = dictFind(db->dict,key->ptr)) != NULL);
+ if (dictAdd(db->expires,dictGetEntryKey(de),(void*)when) == DICT_ERR) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* Return the expire time of the specified key, or -1 if no expire
+ * is associated with this key (i.e. the key is non volatile) */
+time_t getExpire(redisDb *db, robj *key) {
+ dictEntry *de;
+
+ /* No expire? return ASAP */
+ if (dictSize(db->expires) == 0 ||
+ (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
+
+ /* The entry was found in the expire dict, this means it should also
+ * be present in the main dict (safety check). */
+ redisAssert(dictFind(db->dict,key->ptr) != NULL);
+ return (time_t) dictGetEntryVal(de);
+}
+
+int expireIfNeeded(redisDb *db, robj *key) {
+ time_t when = getExpire(db,key);
+ if (when < 0) return 0;
+
+ /* Return when this key has not expired */
+ if (time(NULL) <= when) return 0;
+
+ /* Delete the key */
+ server.stat_expiredkeys++;
+ server.dirty++;
+ return dbDelete(db,key);
+}
+
+int deleteIfVolatile(redisDb *db, robj *key) {
+ if (getExpire(db,key) < 0) return 0;
+
+ /* Delete the key */
+ server.stat_expiredkeys++;
+ server.dirty++;
+ return dbDelete(db,key);
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires Commands
+ *----------------------------------------------------------------------------*/
+
+void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
+ dictEntry *de;
+ time_t seconds;
+
+ if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
+
+ seconds -= offset;
+
+ de = dictFind(c->db->dict,key->ptr);
+ if (de == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+ if (seconds <= 0) {
+ if (dbDelete(c->db,key)) server.dirty++;
+ addReply(c, shared.cone);
+ return;
+ } else {
+ time_t when = time(NULL)+seconds;
+ if (setExpire(c->db,key,when)) {
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+ return;
+ }
+}
+
+void expireCommand(redisClient *c) {
+ expireGenericCommand(c,c->argv[1],c->argv[2],0);
+}
+
+void expireatCommand(redisClient *c) {
+ expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
+}
+
+void ttlCommand(redisClient *c) {
+ time_t expire;
+ int ttl = -1;
+
+ expire = getExpire(c->db,c->argv[1]);
+ if (expire != -1) {
+ ttl = (int) (expire-time(NULL));
+ if (ttl < 0) ttl = -1;
+ }
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
+}
+
+
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 000000000..10b620d6f
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,309 @@
+#include "redis.h"
+#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
+
+/* ================================= Debugging ============================== */
+
+/* Compute the sha1 of string at 's' with 'len' bytes long.
+ * The SHA1 is then xored againt the string pointed by digest.
+ * Since xor is commutative, this operation is used in order to
+ * "add" digests relative to unordered elements.
+ *
+ * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
+void xorDigest(unsigned char *digest, void *ptr, size_t len) {
+ SHA1_CTX ctx;
+ unsigned char hash[20], *s = ptr;
+ int j;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,s,len);
+ SHA1Final(hash,&ctx);
+
+ for (j = 0; j < 20; j++)
+ digest[j] ^= hash[j];
+}
+
+void xorObjectDigest(unsigned char *digest, robj *o) {
+ o = getDecodedObject(o);
+ xorDigest(digest,o->ptr,sdslen(o->ptr));
+ decrRefCount(o);
+}
+
+/* This function instead of just computing the SHA1 and xoring it
+ * against diget, also perform the digest of "digest" itself and
+ * replace the old value with the new one.
+ *
+ * So the final digest will be:
+ *
+ * digest = SHA1(digest xor SHA1(data))
+ *
+ * This function is used every time we want to preserve the order so
+ * that digest(a,b,c,d) will be different than digest(b,c,d,a)
+ *
+ * Also note that mixdigest("foo") followed by mixdigest("bar")
+ * will lead to a different digest compared to "fo", "obar".
+ */
+void mixDigest(unsigned char *digest, void *ptr, size_t len) {
+ SHA1_CTX ctx;
+ char *s = ptr;
+
+ xorDigest(digest,s,len);
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,digest,20);
+ SHA1Final(digest,&ctx);
+}
+
+void mixObjectDigest(unsigned char *digest, robj *o) {
+ o = getDecodedObject(o);
+ mixDigest(digest,o->ptr,sdslen(o->ptr));
+ decrRefCount(o);
+}
+
+/* Compute the dataset digest. Since keys, sets elements, hashes elements
+ * are not ordered, we use a trick: every aggregate digest is the xor
+ * of the digests of their elements. This way the order will not change
+ * the result. For list instead we use a feedback entering the output digest
+ * as input in order to ensure that a different ordered list will result in
+ * a different digest. */
+void computeDatasetDigest(unsigned char *final) {
+ unsigned char digest[20];
+ char buf[128];
+ dictIterator *di = NULL;
+ dictEntry *de;
+ int j;
+ uint32_t aux;
+
+ memset(final,0,20); /* Start with a clean result */
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+
+ if (dictSize(db->dict) == 0) continue;
+ di = dictGetIterator(db->dict);
+
+ /* hash the DB id, so the same dataset moved in a different
+ * DB will lead to a different digest */
+ aux = htonl(j);
+ mixDigest(final,&aux,sizeof(aux));
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds key;
+ robj *keyobj, *o;
+ time_t expiretime;
+
+ memset(digest,0,20); /* This key-val digest */
+ key = dictGetEntryKey(de);
+ keyobj = createStringObject(key,sdslen(key));
+
+ mixDigest(digest,key,sdslen(key));
+
+ /* Make sure the key is loaded if VM is active */
+ o = lookupKeyRead(db,keyobj);
+
+ aux = htonl(o->type);
+ mixDigest(digest,&aux,sizeof(aux));
+ expiretime = getExpire(db,keyobj);
+
+ /* Save the key and associated value */
+ if (o->type == REDIS_STRING) {
+ mixObjectDigest(digest,o);
+ } else if (o->type == REDIS_LIST) {
+ listTypeIterator *li = listTypeInitIterator(o,0,REDIS_TAIL);
+ listTypeEntry entry;
+ while(listTypeNext(li,&entry)) {
+ robj *eleobj = listTypeGet(&entry);
+ mixObjectDigest(digest,eleobj);
+ decrRefCount(eleobj);
+ }
+ listTypeReleaseIterator(li);
+ } else if (o->type == REDIS_SET) {
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+
+ xorObjectDigest(digest,eleobj);
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_ZSET) {
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+ double *score = dictGetEntryVal(de);
+ unsigned char eledigest[20];
+
+ snprintf(buf,sizeof(buf),"%.17g",*score);
+ memset(eledigest,0,20);
+ mixObjectDigest(eledigest,eleobj);
+ mixDigest(eledigest,buf,strlen(buf));
+ xorDigest(digest,eledigest,20);
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_HASH) {
+ hashTypeIterator *hi;
+ robj *obj;
+
+ hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != REDIS_ERR) {
+ unsigned char eledigest[20];
+
+ memset(eledigest,0,20);
+ obj = hashTypeCurrent(hi,REDIS_HASH_KEY);
+ mixObjectDigest(eledigest,obj);
+ decrRefCount(obj);
+ obj = hashTypeCurrent(hi,REDIS_HASH_VALUE);
+ mixObjectDigest(eledigest,obj);
+ decrRefCount(obj);
+ xorDigest(digest,eledigest,20);
+ }
+ hashTypeReleaseIterator(hi);
+ } else {
+ redisPanic("Unknown object type");
+ }
+ /* If the key has an expire, add it to the mix */
+ if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
+ /* We can finally xor the key-val digest to the final digest */
+ xorDigest(final,digest,20);
+ decrRefCount(keyobj);
+ }
+ dictReleaseIterator(di);
+ }
+}
+
+void debugCommand(redisClient *c) {
+ if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
+ *((char*)-1) = 'x';
+ } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
+ if (rdbSave(server.dbfilename) != REDIS_OK) {
+ addReply(c,shared.err);
+ return;
+ }
+ emptyDb();
+ if (rdbLoad(server.dbfilename) != REDIS_OK) {
+ addReply(c,shared.err);
+ return;
+ }
+ redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
+ emptyDb();
+ if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
+ addReply(c,shared.err);
+ return;
+ }
+ redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
+ dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
+ robj *val;
+
+ if (!de) {
+ addReply(c,shared.nokeyerr);
+ return;
+ }
+ val = dictGetEntryVal(de);
+ if (!server.vm_enabled || (val->storage == REDIS_VM_MEMORY ||
+ val->storage == REDIS_VM_SWAPPING)) {
+ char *strenc;
+
+ strenc = strEncoding(val->encoding);
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "+Value at:%p refcount:%d "
+ "encoding:%s serializedlength:%lld\r\n",
+ (void*)val, val->refcount,
+ strenc, (long long) rdbSavedObjectLen(val,NULL)));
+ } else {
+ vmpointer *vp = (vmpointer*) val;
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "+Value swapped at: page %llu "
+ "using %llu pages\r\n",
+ (unsigned long long) vp->page,
+ (unsigned long long) vp->usedpages));
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"swapin") && c->argc == 3) {
+ lookupKeyRead(c->db,c->argv[2]);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
+ dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
+ robj *val;
+ vmpointer *vp;
+
+ if (!server.vm_enabled) {
+ addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
+ return;
+ }
+ if (!de) {
+ addReply(c,shared.nokeyerr);
+ return;
+ }
+ val = dictGetEntryVal(de);
+ /* Swap it */
+ if (val->storage != REDIS_VM_MEMORY) {
+ addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
+ } else if (val->refcount != 1) {
+ addReplySds(c,sdsnew("-ERR Object is shared\r\n"));
+ } else if ((vp = vmSwapObjectBlocking(val)) != NULL) {
+ dictGetEntryVal(de) = vp;
+ addReply(c,shared.ok);
+ } else {
+ addReply(c,shared.err);
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) {
+ long keys, j;
+ robj *key, *val;
+ char buf[128];
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
+ return;
+ for (j = 0; j < keys; j++) {
+ snprintf(buf,sizeof(buf),"key:%lu",j);
+ key = createStringObject(buf,strlen(buf));
+ if (lookupKeyRead(c->db,key) != NULL) {
+ decrRefCount(key);
+ continue;
+ }
+ snprintf(buf,sizeof(buf),"value:%lu",j);
+ val = createStringObject(buf,strlen(buf));
+ dbAdd(c->db,key,val);
+ decrRefCount(key);
+ }
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) {
+ unsigned char digest[20];
+ sds d = sdsnew("+");
+ int j;
+
+ computeDatasetDigest(digest);
+ for (j = 0; j < 20; j++)
+ d = sdscatprintf(d, "%02x",digest[j]);
+
+ d = sdscatlen(d,"\r\n",2);
+ addReplySds(c,d);
+ } else {
+ addReplySds(c,sdsnew(
+ "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
+ }
+}
+
+void _redisAssert(char *estr, char *file, int line) {
+ redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
+ redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
+#ifdef HAVE_BACKTRACE
+ redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
+ *((char*)-1) = 'x';
+#endif
+}
+
+void _redisPanic(char *msg, char *file, int line) {
+ redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
+ redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
+#ifdef HAVE_BACKTRACE
+ redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
+ *((char*)-1) = 'x';
+#endif
+}
diff --git a/dict.c b/src/dict.c
index d5010708c..d5010708c 100644
--- a/dict.c
+++ b/src/dict.c
diff --git a/dict.h b/src/dict.h
index 30ace4db7..30ace4db7 100644
--- a/dict.h
+++ b/src/dict.h
diff --git a/fmacros.h b/src/fmacros.h
index 38f46482a..38f46482a 100644
--- a/fmacros.h
+++ b/src/fmacros.h
diff --git a/linenoise.c b/src/linenoise.c
index 0c04d03fb..0c04d03fb 100644
--- a/linenoise.c
+++ b/src/linenoise.c
diff --git a/linenoise.h b/src/linenoise.h
index ff45e2c47..ff45e2c47 100644
--- a/linenoise.h
+++ b/src/linenoise.h
diff --git a/lzf.h b/src/lzf.h
index 919b6e6be..919b6e6be 100644
--- a/lzf.h
+++ b/src/lzf.h
diff --git a/lzfP.h b/src/lzfP.h
index d533f1829..d533f1829 100644
--- a/lzfP.h
+++ b/src/lzfP.h
diff --git a/lzf_c.c b/src/lzf_c.c
index 99dab091c..99dab091c 100644
--- a/lzf_c.c
+++ b/src/lzf_c.c
diff --git a/lzf_d.c b/src/lzf_d.c
index e7e48c138..e7e48c138 100644
--- a/lzf_d.c
+++ b/src/lzf_d.c
diff --git a/mkreleasehdr.sh b/src/mkreleasehdr.sh
index 30984160e..30984160e 100755
--- a/mkreleasehdr.sh
+++ b/src/mkreleasehdr.sh
diff --git a/src/multi.c b/src/multi.c
new file mode 100644
index 000000000..def1dd673
--- /dev/null
+++ b/src/multi.c
@@ -0,0 +1,266 @@
+#include "redis.h"
+
+/* ================================ MULTI/EXEC ============================== */
+
+/* Client state initialization for MULTI/EXEC */
+void initClientMultiState(redisClient *c) {
+ c->mstate.commands = NULL;
+ c->mstate.count = 0;
+}
+
+/* Release all the resources associated with MULTI/EXEC state */
+void freeClientMultiState(redisClient *c) {
+ int j;
+
+ for (j = 0; j < c->mstate.count; j++) {
+ int i;
+ multiCmd *mc = c->mstate.commands+j;
+
+ for (i = 0; i < mc->argc; i++)
+ decrRefCount(mc->argv[i]);
+ zfree(mc->argv);
+ }
+ zfree(c->mstate.commands);
+}
+
+/* Add a new command into the MULTI commands queue */
+void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
+ multiCmd *mc;
+ int j;
+
+ c->mstate.commands = zrealloc(c->mstate.commands,
+ sizeof(multiCmd)*(c->mstate.count+1));
+ mc = c->mstate.commands+c->mstate.count;
+ mc->cmd = cmd;
+ mc->argc = c->argc;
+ mc->argv = zmalloc(sizeof(robj*)*c->argc);
+ memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
+ for (j = 0; j < c->argc; j++)
+ incrRefCount(mc->argv[j]);
+ c->mstate.count++;
+}
+
+void multiCommand(redisClient *c) {
+ if (c->flags & REDIS_MULTI) {
+ addReplySds(c,sdsnew("-ERR MULTI calls can not be nested\r\n"));
+ return;
+ }
+ c->flags |= REDIS_MULTI;
+ addReply(c,shared.ok);
+}
+
+void discardCommand(redisClient *c) {
+ if (!(c->flags & REDIS_MULTI)) {
+ addReplySds(c,sdsnew("-ERR DISCARD without MULTI\r\n"));
+ return;
+ }
+
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= (~REDIS_MULTI);
+ unwatchAllKeys(c);
+ addReply(c,shared.ok);
+}
+
+/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
+ * implememntation for more information. */
+void execCommandReplicateMulti(redisClient *c) {
+ struct redisCommand *cmd;
+ robj *multistring = createStringObject("MULTI",5);
+
+ cmd = lookupCommand("multi");
+ if (server.appendonly)
+ feedAppendOnlyFile(cmd,c->db->id,&multistring,1);
+ if (listLength(server.slaves))
+ replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
+ decrRefCount(multistring);
+}
+
+void execCommand(redisClient *c) {
+ int j;
+ robj **orig_argv;
+ int orig_argc;
+
+ if (!(c->flags & REDIS_MULTI)) {
+ addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
+ return;
+ }
+
+ /* Check if we need to abort the EXEC if some WATCHed key was touched.
+ * A failed EXEC will return a multi bulk nil object. */
+ if (c->flags & REDIS_DIRTY_CAS) {
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
+ unwatchAllKeys(c);
+ addReply(c,shared.nullmultibulk);
+ return;
+ }
+
+ /* Replicate a MULTI request now that we are sure the block is executed.
+ * This way we'll deliver the MULTI/..../EXEC block as a whole and
+ * both the AOF and the replication link will have the same consistency
+ * and atomicity guarantees. */
+ execCommandReplicateMulti(c);
+
+ /* Exec all the queued commands */
+ unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
+ orig_argv = c->argv;
+ orig_argc = c->argc;
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
+ for (j = 0; j < c->mstate.count; j++) {
+ c->argc = c->mstate.commands[j].argc;
+ c->argv = c->mstate.commands[j].argv;
+ call(c,c->mstate.commands[j].cmd);
+ }
+ c->argv = orig_argv;
+ c->argc = orig_argc;
+ freeClientMultiState(c);
+ initClientMultiState(c);
+ c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
+ /* Make sure the EXEC command is always replicated / AOF, since we
+ * always send the MULTI command (we can't know beforehand if the
+ * next operations will contain at least a modification to the DB). */
+ server.dirty++;
+}
+
+/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
+ *
+ * The implementation uses a per-DB hash table mapping keys to list of clients
+ * WATCHing those keys, so that given a key that is going to be modified
+ * we can mark all the associated clients as dirty.
+ *
+ * Also every client contains a list of WATCHed keys so that's possible to
+ * un-watch such keys when the client is freed or when UNWATCH is called. */
+
+/* In the client->watched_keys list we need to use watchedKey structures
+ * as in order to identify a key in Redis we need both the key name and the
+ * DB */
+typedef struct watchedKey {
+ robj *key;
+ redisDb *db;
+} watchedKey;
+
+/* Watch for the specified key */
+void watchForKey(redisClient *c, robj *key) {
+ list *clients = NULL;
+ listIter li;
+ listNode *ln;
+ watchedKey *wk;
+
+ /* Check if we are already watching for this key */
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ wk = listNodeValue(ln);
+ if (wk->db == c->db && equalStringObjects(key,wk->key))
+ return; /* Key already watched */
+ }
+ /* This key is not already watched in this DB. Let's add it */
+ clients = dictFetchValue(c->db->watched_keys,key);
+ if (!clients) {
+ clients = listCreate();
+ dictAdd(c->db->watched_keys,key,clients);
+ incrRefCount(key);
+ }
+ listAddNodeTail(clients,c);
+ /* Add the new key to the lits of keys watched by this client */
+ wk = zmalloc(sizeof(*wk));
+ wk->key = key;
+ wk->db = c->db;
+ incrRefCount(key);
+ listAddNodeTail(c->watched_keys,wk);
+}
+
+/* Unwatch all the keys watched by this client. To clean the EXEC dirty
+ * flag is up to the caller. */
+void unwatchAllKeys(redisClient *c) {
+ listIter li;
+ listNode *ln;
+
+ if (listLength(c->watched_keys) == 0) return;
+ listRewind(c->watched_keys,&li);
+ while((ln = listNext(&li))) {
+ list *clients;
+ watchedKey *wk;
+
+ /* Lookup the watched key -> clients list and remove the client
+ * from the list */
+ wk = listNodeValue(ln);
+ clients = dictFetchValue(wk->db->watched_keys, wk->key);
+ redisAssert(clients != NULL);
+ listDelNode(clients,listSearchKey(clients,c));
+ /* Kill the entry at all if this was the only client */
+ if (listLength(clients) == 0)
+ dictDelete(wk->db->watched_keys, wk->key);
+ /* Remove this watched key from the client->watched list */
+ listDelNode(c->watched_keys,ln);
+ decrRefCount(wk->key);
+ zfree(wk);
+ }
+}
+
+/* "Touch" a key, so that if this key is being WATCHed by some client the
+ * next EXEC will fail. */
+void touchWatchedKey(redisDb *db, robj *key) {
+ list *clients;
+ listIter li;
+ listNode *ln;
+
+ if (dictSize(db->watched_keys) == 0) return;
+ clients = dictFetchValue(db->watched_keys, key);
+ if (!clients) return;
+
+ /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
+ /* Check if we are already watching for this key */
+ listRewind(clients,&li);
+ while((ln = listNext(&li))) {
+ redisClient *c = listNodeValue(ln);
+
+ c->flags |= REDIS_DIRTY_CAS;
+ }
+}
+
+/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
+ * flush but will be deleted as effect of the flushing operation should
+ * be touched. "dbid" is the DB that's getting the flush. -1 if it is
+ * a FLUSHALL operation (all the DBs flushed). */
+void touchWatchedKeysOnFlush(int dbid) {
+ listIter li1, li2;
+ listNode *ln;
+
+ /* For every client, check all the waited keys */
+ listRewind(server.clients,&li1);
+ while((ln = listNext(&li1))) {
+ redisClient *c = listNodeValue(ln);
+ listRewind(c->watched_keys,&li2);
+ while((ln = listNext(&li2))) {
+ watchedKey *wk = listNodeValue(ln);
+
+ /* For every watched key matching the specified DB, if the
+ * key exists, mark the client as dirty, as the key will be
+ * removed. */
+ if (dbid == -1 || wk->db->id == dbid) {
+ if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
+ c->flags |= REDIS_DIRTY_CAS;
+ }
+ }
+ }
+}
+
+void watchCommand(redisClient *c) {
+ int j;
+
+ if (c->flags & REDIS_MULTI) {
+ addReplySds(c,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
+ return;
+ }
+ for (j = 1; j < c->argc; j++)
+ watchForKey(c,c->argv[j]);
+ addReply(c,shared.ok);
+}
+
+void unwatchCommand(redisClient *c) {
+ unwatchAllKeys(c);
+ c->flags &= (~REDIS_DIRTY_CAS);
+ addReply(c,shared.ok);
+}
diff --git a/src/networking.c b/src/networking.c
new file mode 100644
index 000000000..31844a09f
--- /dev/null
+++ b/src/networking.c
@@ -0,0 +1,589 @@
+#include "redis.h"
+
+#include <sys/uio.h>
+
+void *dupClientReplyValue(void *o) {
+ incrRefCount((robj*)o);
+ return o;
+}
+
+int listMatchObjects(void *a, void *b) {
+ return equalStringObjects(a,b);
+}
+
+redisClient *createClient(int fd) {
+ redisClient *c = zmalloc(sizeof(*c));
+
+ anetNonBlock(NULL,fd);
+ anetTcpNoDelay(NULL,fd);
+ if (!c) return NULL;
+ selectDb(c,0);
+ c->fd = fd;
+ c->querybuf = sdsempty();
+ c->argc = 0;
+ c->argv = NULL;
+ c->bulklen = -1;
+ c->multibulk = 0;
+ c->mbargc = 0;
+ c->mbargv = NULL;
+ c->sentlen = 0;
+ c->flags = 0;
+ c->lastinteraction = time(NULL);
+ c->authenticated = 0;
+ c->replstate = REDIS_REPL_NONE;
+ c->reply = listCreate();
+ listSetFreeMethod(c->reply,decrRefCount);
+ listSetDupMethod(c->reply,dupClientReplyValue);
+ c->blocking_keys = NULL;
+ c->blocking_keys_num = 0;
+ c->io_keys = listCreate();
+ c->watched_keys = listCreate();
+ listSetFreeMethod(c->io_keys,decrRefCount);
+ c->pubsub_channels = dictCreate(&setDictType,NULL);
+ c->pubsub_patterns = listCreate();
+ listSetFreeMethod(c->pubsub_patterns,decrRefCount);
+ listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
+ if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
+ readQueryFromClient, c) == AE_ERR) {
+ freeClient(c);
+ return NULL;
+ }
+ listAddNodeTail(server.clients,c);
+ initClientMultiState(c);
+ return c;
+}
+
+void addReply(redisClient *c, robj *obj) {
+ if (listLength(c->reply) == 0 &&
+ (c->replstate == REDIS_REPL_NONE ||
+ c->replstate == REDIS_REPL_ONLINE) &&
+ aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
+ sendReplyToClient, c) == AE_ERR) return;
+
+ if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
+ obj = dupStringObject(obj);
+ obj->refcount = 0; /* getDecodedObject() will increment the refcount */
+ }
+ listAddNodeTail(c->reply,getDecodedObject(obj));
+}
+
+void addReplySds(redisClient *c, sds s) {
+ robj *o = createObject(REDIS_STRING,s);
+ addReply(c,o);
+ decrRefCount(o);
+}
+
+void addReplyDouble(redisClient *c, double d) {
+ char buf[128];
+
+ snprintf(buf,sizeof(buf),"%.17g",d);
+ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
+ (unsigned long) strlen(buf),buf));
+}
+
+void addReplyLongLong(redisClient *c, long long ll) {
+ char buf[128];
+ size_t len;
+
+ if (ll == 0) {
+ addReply(c,shared.czero);
+ return;
+ } else if (ll == 1) {
+ addReply(c,shared.cone);
+ return;
+ }
+ buf[0] = ':';
+ len = ll2string(buf+1,sizeof(buf)-1,ll);
+ buf[len+1] = '\r';
+ buf[len+2] = '\n';
+ addReplySds(c,sdsnewlen(buf,len+3));
+}
+
+void addReplyUlong(redisClient *c, unsigned long ul) {
+ char buf[128];
+ size_t len;
+
+ if (ul == 0) {
+ addReply(c,shared.czero);
+ return;
+ } else if (ul == 1) {
+ addReply(c,shared.cone);
+ return;
+ }
+ len = snprintf(buf,sizeof(buf),":%lu\r\n",ul);
+ addReplySds(c,sdsnewlen(buf,len));
+}
+
+void addReplyBulkLen(redisClient *c, robj *obj) {
+ size_t len, intlen;
+ char buf[128];
+
+ if (obj->encoding == REDIS_ENCODING_RAW) {
+ len = sdslen(obj->ptr);
+ } else {
+ long n = (long)obj->ptr;
+
+ /* Compute how many bytes will take this integer as a radix 10 string */
+ len = 1;
+ if (n < 0) {
+ len++;
+ n = -n;
+ }
+ while((n = n/10) != 0) {
+ len++;
+ }
+ }
+ buf[0] = '$';
+ intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len);
+ buf[intlen+1] = '\r';
+ buf[intlen+2] = '\n';
+ addReplySds(c,sdsnewlen(buf,intlen+3));
+}
+
+void addReplyBulk(redisClient *c, robj *obj) {
+ addReplyBulkLen(c,obj);
+ addReply(c,obj);
+ addReply(c,shared.crlf);
+}
+
+/* In the CONFIG command we need to add vanilla C string as bulk replies */
+void addReplyBulkCString(redisClient *c, char *s) {
+ if (s == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ robj *o = createStringObject(s,strlen(s));
+ addReplyBulk(c,o);
+ decrRefCount(o);
+ }
+}
+
+void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cport, cfd;
+ char cip[128];
+ redisClient *c;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+ REDIS_NOTUSED(privdata);
+
+ cfd = anetAccept(server.neterr, fd, cip, &cport);
+ if (cfd == AE_ERR) {
+ redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
+ return;
+ }
+ redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
+ if ((c = createClient(cfd)) == NULL) {
+ redisLog(REDIS_WARNING,"Error allocating resoures for the client");
+ close(cfd); /* May be already closed, just ingore errors */
+ return;
+ }
+ /* If maxclient directive is set and this is one client more... close the
+ * connection. Note that we create the client instead to check before
+ * for this condition, since now the socket is already set in nonblocking
+ * mode and we can send an error for free using the Kernel I/O */
+ if (server.maxclients && listLength(server.clients) > server.maxclients) {
+ char *err = "-ERR max number of clients reached\r\n";
+
+ /* That's a best effort error message, don't check write errors */
+ if (write(c->fd,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ freeClient(c);
+ return;
+ }
+ server.stat_numconnections++;
+}
+
+static void freeClientArgv(redisClient *c) {
+ int j;
+
+ for (j = 0; j < c->argc; j++)
+ decrRefCount(c->argv[j]);
+ for (j = 0; j < c->mbargc; j++)
+ decrRefCount(c->mbargv[j]);
+ c->argc = 0;
+ c->mbargc = 0;
+}
+
+void freeClient(redisClient *c) {
+ listNode *ln;
+
+ /* Note that if the client we are freeing is blocked into a blocking
+ * call, we have to set querybuf to NULL *before* to call
+ * unblockClientWaitingData() to avoid processInputBuffer() will get
+ * called. Also it is important to remove the file events after
+ * this, because this call adds the READABLE event. */
+ sdsfree(c->querybuf);
+ c->querybuf = NULL;
+ if (c->flags & REDIS_BLOCKED)
+ unblockClientWaitingData(c);
+
+ /* UNWATCH all the keys */
+ unwatchAllKeys(c);
+ listRelease(c->watched_keys);
+ /* Unsubscribe from all the pubsub channels */
+ pubsubUnsubscribeAllChannels(c,0);
+ pubsubUnsubscribeAllPatterns(c,0);
+ dictRelease(c->pubsub_channels);
+ listRelease(c->pubsub_patterns);
+ /* Obvious cleanup */
+ aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ listRelease(c->reply);
+ freeClientArgv(c);
+ close(c->fd);
+ /* Remove from the list of clients */
+ ln = listSearchKey(server.clients,c);
+ redisAssert(ln != NULL);
+ listDelNode(server.clients,ln);
+ /* Remove from the list of clients that are now ready to be restarted
+ * after waiting for swapped keys */
+ if (c->flags & REDIS_IO_WAIT && listLength(c->io_keys) == 0) {
+ ln = listSearchKey(server.io_ready_clients,c);
+ if (ln) {
+ listDelNode(server.io_ready_clients,ln);
+ server.vm_blocked_clients--;
+ }
+ }
+ /* Remove from the list of clients waiting for swapped keys */
+ while (server.vm_enabled && listLength(c->io_keys)) {
+ ln = listFirst(c->io_keys);
+ dontWaitForSwappedKey(c,ln->value);
+ }
+ listRelease(c->io_keys);
+ /* Master/slave cleanup */
+ if (c->flags & REDIS_SLAVE) {
+ if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
+ close(c->repldbfd);
+ list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
+ ln = listSearchKey(l,c);
+ redisAssert(ln != NULL);
+ listDelNode(l,ln);
+ }
+ if (c->flags & REDIS_MASTER) {
+ server.master = NULL;
+ server.replstate = REDIS_REPL_CONNECT;
+ }
+ /* Release memory */
+ zfree(c->argv);
+ zfree(c->mbargv);
+ freeClientMultiState(c);
+ zfree(c);
+}
+
+#define GLUEREPLY_UP_TO (1024)
+static void glueReplyBuffersIfNeeded(redisClient *c) {
+ int copylen = 0;
+ char buf[GLUEREPLY_UP_TO];
+ listNode *ln;
+ listIter li;
+ robj *o;
+
+ listRewind(c->reply,&li);
+ while((ln = listNext(&li))) {
+ int objlen;
+
+ o = ln->value;
+ objlen = sdslen(o->ptr);
+ if (copylen + objlen <= GLUEREPLY_UP_TO) {
+ memcpy(buf+copylen,o->ptr,objlen);
+ copylen += objlen;
+ listDelNode(c->reply,ln);
+ } else {
+ if (copylen == 0) return;
+ break;
+ }
+ }
+ /* Now the output buffer is empty, add the new single element */
+ o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
+ listAddNodeHead(c->reply,o);
+}
+
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
+ redisClient *c = privdata;
+ int nwritten = 0, totwritten = 0, objlen;
+ robj *o;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+
+ /* Use writev() if we have enough buffers to send */
+ if (!server.glueoutputbuf &&
+ listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
+ !(c->flags & REDIS_MASTER))
+ {
+ sendReplyToClientWritev(el, fd, privdata, mask);
+ return;
+ }
+
+ while(listLength(c->reply)) {
+ if (server.glueoutputbuf && listLength(c->reply) > 1)
+ glueReplyBuffersIfNeeded(c);
+
+ o = listNodeValue(listFirst(c->reply));
+ objlen = sdslen(o->ptr);
+
+ if (objlen == 0) {
+ listDelNode(c->reply,listFirst(c->reply));
+ continue;
+ }
+
+ if (c->flags & REDIS_MASTER) {
+ /* Don't reply to a master */
+ nwritten = objlen - c->sentlen;
+ } else {
+ nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
+ if (nwritten <= 0) break;
+ }
+ c->sentlen += nwritten;
+ totwritten += nwritten;
+ /* If we fully sent the object on head go to the next one */
+ if (c->sentlen == objlen) {
+ listDelNode(c->reply,listFirst(c->reply));
+ c->sentlen = 0;
+ }
+ /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
+ * bytes, in a single threaded server it's a good idea to serve
+ * other clients as well, even if a very large request comes from
+ * super fast link that is always able to accept data (in real world
+ * scenario think about 'KEYS *' against the loopback interfae) */
+ if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
+ }
+ if (nwritten == -1) {
+ if (errno == EAGAIN) {
+ nwritten = 0;
+ } else {
+ redisLog(REDIS_VERBOSE,
+ "Error writing to client: %s", strerror(errno));
+ freeClient(c);
+ return;
+ }
+ }
+ if (totwritten > 0) c->lastinteraction = time(NULL);
+ if (listLength(c->reply) == 0) {
+ c->sentlen = 0;
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ }
+}
+
+void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
+{
+ redisClient *c = privdata;
+ int nwritten = 0, totwritten = 0, objlen, willwrite;
+ robj *o;
+ struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
+ int offset, ion = 0;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+
+ listNode *node;
+ while (listLength(c->reply)) {
+ offset = c->sentlen;
+ ion = 0;
+ willwrite = 0;
+
+ /* fill-in the iov[] array */
+ for(node = listFirst(c->reply); node; node = listNextNode(node)) {
+ o = listNodeValue(node);
+ objlen = sdslen(o->ptr);
+
+ if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
+ break;
+
+ if(ion == REDIS_WRITEV_IOVEC_COUNT)
+ break; /* no more iovecs */
+
+ iov[ion].iov_base = ((char*)o->ptr) + offset;
+ iov[ion].iov_len = objlen - offset;
+ willwrite += objlen - offset;
+ offset = 0; /* just for the first item */
+ ion++;
+ }
+
+ if(willwrite == 0)
+ break;
+
+ /* write all collected blocks at once */
+ if((nwritten = writev(fd, iov, ion)) < 0) {
+ if (errno != EAGAIN) {
+ redisLog(REDIS_VERBOSE,
+ "Error writing to client: %s", strerror(errno));
+ freeClient(c);
+ return;
+ }
+ break;
+ }
+
+ totwritten += nwritten;
+ offset = c->sentlen;
+
+ /* remove written robjs from c->reply */
+ while (nwritten && listLength(c->reply)) {
+ o = listNodeValue(listFirst(c->reply));
+ objlen = sdslen(o->ptr);
+
+ if(nwritten >= objlen - offset) {
+ listDelNode(c->reply, listFirst(c->reply));
+ nwritten -= objlen - offset;
+ c->sentlen = 0;
+ } else {
+ /* partial write */
+ c->sentlen += nwritten;
+ break;
+ }
+ offset = 0;
+ }
+ }
+
+ if (totwritten > 0)
+ c->lastinteraction = time(NULL);
+
+ if (listLength(c->reply) == 0) {
+ c->sentlen = 0;
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ }
+}
+
+/* resetClient prepare the client to process the next command */
+void resetClient(redisClient *c) {
+ freeClientArgv(c);
+ c->bulklen = -1;
+ c->multibulk = 0;
+}
+
+void closeTimedoutClients(void) {
+ redisClient *c;
+ listNode *ln;
+ time_t now = time(NULL);
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ c = listNodeValue(ln);
+ if (server.maxidletime &&
+ !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
+ !(c->flags & REDIS_MASTER) && /* no timeout for masters */
+ dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
+ listLength(c->pubsub_patterns) == 0 &&
+ (now - c->lastinteraction > server.maxidletime))
+ {
+ redisLog(REDIS_VERBOSE,"Closing idle client");
+ freeClient(c);
+ } else if (c->flags & REDIS_BLOCKED) {
+ if (c->blockingto != 0 && c->blockingto < now) {
+ addReply(c,shared.nullmultibulk);
+ unblockClientWaitingData(c);
+ }
+ }
+ }
+}
+
+void processInputBuffer(redisClient *c) {
+again:
+ /* Before to process the input buffer, make sure the client is not
+ * waitig for a blocking operation such as BLPOP. Note that the first
+ * iteration the client is never blocked, otherwise the processInputBuffer
+ * would not be called at all, but after the execution of the first commands
+ * in the input buffer the client may be blocked, and the "goto again"
+ * will try to reiterate. The following line will make it return asap. */
+ if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
+ if (c->bulklen == -1) {
+ /* Read the first line of the query */
+ char *p = strchr(c->querybuf,'\n');
+ size_t querylen;
+
+ if (p) {
+ sds query, *argv;
+ int argc, j;
+
+ query = c->querybuf;
+ c->querybuf = sdsempty();
+ querylen = 1+(p-(query));
+ if (sdslen(query) > querylen) {
+ /* leave data after the first line of the query in the buffer */
+ c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
+ }
+ *p = '\0'; /* remove "\n" */
+ if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
+ sdsupdatelen(query);
+
+ /* Now we can split the query in arguments */
+ argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
+ sdsfree(query);
+
+ if (c->argv) zfree(c->argv);
+ c->argv = zmalloc(sizeof(robj*)*argc);
+
+ for (j = 0; j < argc; j++) {
+ if (sdslen(argv[j])) {
+ c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
+ c->argc++;
+ } else {
+ sdsfree(argv[j]);
+ }
+ }
+ zfree(argv);
+ if (c->argc) {
+ /* Execute the command. If the client is still valid
+ * after processCommand() return and there is something
+ * on the query buffer try to process the next command. */
+ if (processCommand(c) && sdslen(c->querybuf)) goto again;
+ } else {
+ /* Nothing to process, argc == 0. Just process the query
+ * buffer if it's not empty or return to the caller */
+ if (sdslen(c->querybuf)) goto again;
+ }
+ return;
+ } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
+ redisLog(REDIS_VERBOSE, "Client protocol error");
+ freeClient(c);
+ return;
+ }
+ } else {
+ /* Bulk read handling. Note that if we are at this point
+ the client already sent a command terminated with a newline,
+ we are reading the bulk data that is actually the last
+ argument of the command. */
+ int qbl = sdslen(c->querybuf);
+
+ if (c->bulklen <= qbl) {
+ /* Copy everything but the final CRLF as final argument */
+ c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
+ c->argc++;
+ c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
+ /* Process the command. If the client is still valid after
+ * the processing and there is more data in the buffer
+ * try to parse it. */
+ if (processCommand(c) && sdslen(c->querybuf)) goto again;
+ return;
+ }
+ }
+}
+
+void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
+ redisClient *c = (redisClient*) privdata;
+ char buf[REDIS_IOBUF_LEN];
+ int nread;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+
+ nread = read(fd, buf, REDIS_IOBUF_LEN);
+ if (nread == -1) {
+ if (errno == EAGAIN) {
+ nread = 0;
+ } else {
+ redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
+ freeClient(c);
+ return;
+ }
+ } else if (nread == 0) {
+ redisLog(REDIS_VERBOSE, "Client closed connection");
+ freeClient(c);
+ return;
+ }
+ if (nread) {
+ c->querybuf = sdscatlen(c->querybuf, buf, nread);
+ c->lastinteraction = time(NULL);
+ } else {
+ return;
+ }
+ processInputBuffer(c);
+}
diff --git a/src/object.c b/src/object.c
new file mode 100644
index 000000000..4854909e0
--- /dev/null
+++ b/src/object.c
@@ -0,0 +1,405 @@
+#include "redis.h"
+#include <pthread.h>
+
+robj *createObject(int type, void *ptr) {
+ robj *o;
+
+ if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
+ if (listLength(server.objfreelist)) {
+ listNode *head = listFirst(server.objfreelist);
+ o = listNodeValue(head);
+ listDelNode(server.objfreelist,head);
+ if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
+ } else {
+ if (server.vm_enabled)
+ pthread_mutex_unlock(&server.obj_freelist_mutex);
+ o = zmalloc(sizeof(*o));
+ }
+ o->type = type;
+ o->encoding = REDIS_ENCODING_RAW;
+ o->ptr = ptr;
+ o->refcount = 1;
+ if (server.vm_enabled) {
+ /* Note that this code may run in the context of an I/O thread
+ * and accessing server.lruclock in theory is an error
+ * (no locks). But in practice this is safe, and even if we read
+ * garbage Redis will not fail. */
+ o->lru = server.lruclock;
+ o->storage = REDIS_VM_MEMORY;
+ }
+ return o;
+}
+
+robj *createStringObject(char *ptr, size_t len) {
+ return createObject(REDIS_STRING,sdsnewlen(ptr,len));
+}
+
+robj *createStringObjectFromLongLong(long long value) {
+ robj *o;
+ if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
+ incrRefCount(shared.integers[value]);
+ o = shared.integers[value];
+ } else {
+ if (value >= LONG_MIN && value <= LONG_MAX) {
+ o = createObject(REDIS_STRING, NULL);
+ o->encoding = REDIS_ENCODING_INT;
+ o->ptr = (void*)((long)value);
+ } else {
+ o = createObject(REDIS_STRING,sdsfromlonglong(value));
+ }
+ }
+ return o;
+}
+
+robj *dupStringObject(robj *o) {
+ redisAssert(o->encoding == REDIS_ENCODING_RAW);
+ return createStringObject(o->ptr,sdslen(o->ptr));
+}
+
+robj *createListObject(void) {
+ list *l = listCreate();
+ robj *o = createObject(REDIS_LIST,l);
+ listSetFreeMethod(l,decrRefCount);
+ o->encoding = REDIS_ENCODING_LINKEDLIST;
+ return o;
+}
+
+robj *createZiplistObject(void) {
+ unsigned char *zl = ziplistNew();
+ robj *o = createObject(REDIS_LIST,zl);
+ o->encoding = REDIS_ENCODING_ZIPLIST;
+ return o;
+}
+
+robj *createSetObject(void) {
+ dict *d = dictCreate(&setDictType,NULL);
+ return createObject(REDIS_SET,d);
+}
+
+robj *createHashObject(void) {
+ /* All the Hashes start as zipmaps. Will be automatically converted
+ * into hash tables if there are enough elements or big elements
+ * inside. */
+ unsigned char *zm = zipmapNew();
+ robj *o = createObject(REDIS_HASH,zm);
+ o->encoding = REDIS_ENCODING_ZIPMAP;
+ return o;
+}
+
+robj *createZsetObject(void) {
+ zset *zs = zmalloc(sizeof(*zs));
+
+ zs->dict = dictCreate(&zsetDictType,NULL);
+ zs->zsl = zslCreate();
+ return createObject(REDIS_ZSET,zs);
+}
+
+void freeStringObject(robj *o) {
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ sdsfree(o->ptr);
+ }
+}
+
+void freeListObject(robj *o) {
+ switch (o->encoding) {
+ case REDIS_ENCODING_LINKEDLIST:
+ listRelease((list*) o->ptr);
+ break;
+ case REDIS_ENCODING_ZIPLIST:
+ zfree(o->ptr);
+ break;
+ default:
+ redisPanic("Unknown list encoding type");
+ }
+}
+
+void freeSetObject(robj *o) {
+ dictRelease((dict*) o->ptr);
+}
+
+void freeZsetObject(robj *o) {
+ zset *zs = o->ptr;
+
+ dictRelease(zs->dict);
+ zslFree(zs->zsl);
+ zfree(zs);
+}
+
+void freeHashObject(robj *o) {
+ switch (o->encoding) {
+ case REDIS_ENCODING_HT:
+ dictRelease((dict*) o->ptr);
+ break;
+ case REDIS_ENCODING_ZIPMAP:
+ zfree(o->ptr);
+ break;
+ default:
+ redisPanic("Unknown hash encoding type");
+ break;
+ }
+}
+
+void incrRefCount(robj *o) {
+ o->refcount++;
+}
+
+void decrRefCount(void *obj) {
+ robj *o = obj;
+
+ /* Object is a swapped out value, or in the process of being loaded. */
+ if (server.vm_enabled &&
+ (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING))
+ {
+ vmpointer *vp = obj;
+ if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o);
+ vmMarkPagesFree(vp->page,vp->usedpages);
+ server.vm_stats_swapped_objects--;
+ zfree(vp);
+ return;
+ }
+
+ if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
+ /* Object is in memory, or in the process of being swapped out.
+ *
+ * If the object is being swapped out, abort the operation on
+ * decrRefCount even if the refcount does not drop to 0: the object
+ * is referenced at least two times, as value of the key AND as
+ * job->val in the iojob. So if we don't invalidate the iojob, when it is
+ * done but the relevant key was removed in the meantime, the
+ * complete jobs handler will not find the key about the job and the
+ * assert will fail. */
+ if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING)
+ vmCancelThreadedIOJob(o);
+ if (--(o->refcount) == 0) {
+ switch(o->type) {
+ case REDIS_STRING: freeStringObject(o); break;
+ case REDIS_LIST: freeListObject(o); break;
+ case REDIS_SET: freeSetObject(o); break;
+ case REDIS_ZSET: freeZsetObject(o); break;
+ case REDIS_HASH: freeHashObject(o); break;
+ default: redisPanic("Unknown object type"); break;
+ }
+ if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
+ if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
+ !listAddNodeHead(server.objfreelist,o))
+ zfree(o);
+ if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
+ }
+}
+
+int checkType(redisClient *c, robj *o, int type) {
+ if (o->type != type) {
+ addReply(c,shared.wrongtypeerr);
+ return 1;
+ }
+ return 0;
+}
+
+/* Try to encode a string object in order to save space */
+robj *tryObjectEncoding(robj *o) {
+ long value;
+ sds s = o->ptr;
+
+ if (o->encoding != REDIS_ENCODING_RAW)
+ return o; /* Already encoded */
+
+ /* It's not safe to encode shared objects: shared objects can be shared
+ * everywhere in the "object space" of Redis. Encoded objects can only
+ * appear as "values" (and not, for instance, as keys) */
+ if (o->refcount > 1) return o;
+
+ /* Currently we try to encode only strings */
+ redisAssert(o->type == REDIS_STRING);
+
+ /* Check if we can represent this string as a long integer */
+ if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return o;
+
+ /* Ok, this object can be encoded */
+ if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
+ decrRefCount(o);
+ incrRefCount(shared.integers[value]);
+ return shared.integers[value];
+ } else {
+ o->encoding = REDIS_ENCODING_INT;
+ sdsfree(o->ptr);
+ o->ptr = (void*) value;
+ return o;
+ }
+}
+
+/* Get a decoded version of an encoded object (returned as a new object).
+ * If the object is already raw-encoded just increment the ref count. */
+robj *getDecodedObject(robj *o) {
+ robj *dec;
+
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ incrRefCount(o);
+ return o;
+ }
+ if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
+ char buf[32];
+
+ ll2string(buf,32,(long)o->ptr);
+ dec = createStringObject(buf,strlen(buf));
+ return dec;
+ } else {
+ redisPanic("Unknown encoding type");
+ }
+}
+
+/* Compare two string objects via strcmp() or alike.
+ * Note that the objects may be integer-encoded. In such a case we
+ * use ll2string() to get a string representation of the numbers on the stack
+ * and compare the strings, it's much faster than calling getDecodedObject().
+ *
+ * Important note: if objects are not integer encoded, but binary-safe strings,
+ * sdscmp() from sds.c will apply memcmp() so this function ca be considered
+ * binary safe. */
+int compareStringObjects(robj *a, robj *b) {
+ redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
+ char bufa[128], bufb[128], *astr, *bstr;
+ int bothsds = 1;
+
+ if (a == b) return 0;
+ if (a->encoding != REDIS_ENCODING_RAW) {
+ ll2string(bufa,sizeof(bufa),(long) a->ptr);
+ astr = bufa;
+ bothsds = 0;
+ } else {
+ astr = a->ptr;
+ }
+ if (b->encoding != REDIS_ENCODING_RAW) {
+ ll2string(bufb,sizeof(bufb),(long) b->ptr);
+ bstr = bufb;
+ bothsds = 0;
+ } else {
+ bstr = b->ptr;
+ }
+ return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
+}
+
+/* Equal string objects return 1 if the two objects are the same from the
+ * point of view of a string comparison, otherwise 0 is returned. Note that
+ * this function is faster then checking for (compareStringObject(a,b) == 0)
+ * because it can perform some more optimization. */
+int equalStringObjects(robj *a, robj *b) {
+ if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){
+ return a->ptr == b->ptr;
+ } else {
+ return compareStringObjects(a,b) == 0;
+ }
+}
+
+size_t stringObjectLen(robj *o) {
+ redisAssert(o->type == REDIS_STRING);
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ return sdslen(o->ptr);
+ } else {
+ char buf[32];
+
+ return ll2string(buf,32,(long)o->ptr);
+ }
+}
+
+int getDoubleFromObject(robj *o, double *target) {
+ double value;
+ char *eptr;
+
+ if (o == NULL) {
+ value = 0;
+ } else {
+ redisAssert(o->type == REDIS_STRING);
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ value = strtod(o->ptr, &eptr);
+ if (eptr[0] != '\0') return REDIS_ERR;
+ } else if (o->encoding == REDIS_ENCODING_INT) {
+ value = (long)o->ptr;
+ } else {
+ redisPanic("Unknown string encoding");
+ }
+ }
+
+ *target = value;
+ return REDIS_OK;
+}
+
+int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
+ double value;
+ if (getDoubleFromObject(o, &value) != REDIS_OK) {
+ if (msg != NULL) {
+ addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
+ } else {
+ addReplySds(c, sdsnew("-ERR value is not a double\r\n"));
+ }
+ return REDIS_ERR;
+ }
+
+ *target = value;
+ return REDIS_OK;
+}
+
+int getLongLongFromObject(robj *o, long long *target) {
+ long long value;
+ char *eptr;
+
+ if (o == NULL) {
+ value = 0;
+ } else {
+ redisAssert(o->type == REDIS_STRING);
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ value = strtoll(o->ptr, &eptr, 10);
+ if (eptr[0] != '\0') return REDIS_ERR;
+ } else if (o->encoding == REDIS_ENCODING_INT) {
+ value = (long)o->ptr;
+ } else {
+ redisPanic("Unknown string encoding");
+ }
+ }
+
+ *target = value;
+ return REDIS_OK;
+}
+
+int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
+ long long value;
+ if (getLongLongFromObject(o, &value) != REDIS_OK) {
+ if (msg != NULL) {
+ addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
+ } else {
+ addReplySds(c, sdsnew("-ERR value is not an integer\r\n"));
+ }
+ return REDIS_ERR;
+ }
+
+ *target = value;
+ return REDIS_OK;
+}
+
+int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
+ long long value;
+
+ if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
+ if (value < LONG_MIN || value > LONG_MAX) {
+ if (msg != NULL) {
+ addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
+ } else {
+ addReplySds(c, sdsnew("-ERR value is out of range\r\n"));
+ }
+ return REDIS_ERR;
+ }
+
+ *target = value;
+ return REDIS_OK;
+}
+
+char *strEncoding(int encoding) {
+ switch(encoding) {
+ case REDIS_ENCODING_RAW: return "raw";
+ case REDIS_ENCODING_INT: return "int";
+ case REDIS_ENCODING_HT: return "hashtable";
+ case REDIS_ENCODING_ZIPMAP: return "zipmap";
+ case REDIS_ENCODING_LINKEDLIST: return "linkedlist";
+ case REDIS_ENCODING_ZIPLIST: return "ziplist";
+ default: return "unknown";
+ }
+}
diff --git a/pqsort.c b/src/pqsort.c
index 257756376..257756376 100644
--- a/pqsort.c
+++ b/src/pqsort.c
diff --git a/pqsort.h b/src/pqsort.h
index 5054d5209..5054d5209 100644
--- a/pqsort.h
+++ b/src/pqsort.h
diff --git a/src/pubsub.c b/src/pubsub.c
new file mode 100644
index 000000000..c9f5f310e
--- /dev/null
+++ b/src/pubsub.c
@@ -0,0 +1,259 @@
+#include "redis.h"
+
+void freePubsubPattern(void *p) {
+ pubsubPattern *pat = p;
+
+ decrRefCount(pat->pattern);
+ zfree(pat);
+}
+
+int listMatchPubsubPattern(void *a, void *b) {
+ pubsubPattern *pa = a, *pb = b;
+
+ return (pa->client == pb->client) &&
+ (equalStringObjects(pa->pattern,pb->pattern));
+}
+
+/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was already subscribed to that channel. */
+int pubsubSubscribeChannel(redisClient *c, robj *channel) {
+ struct dictEntry *de;
+ list *clients = NULL;
+ int retval = 0;
+
+ /* Add the channel to the client -> channels hash table */
+ if (dictAdd(c->pubsub_channels,channel,NULL) == DICT_OK) {
+ retval = 1;
+ incrRefCount(channel);
+ /* Add the client to the channel -> list of clients hash table */
+ de = dictFind(server.pubsub_channels,channel);
+ if (de == NULL) {
+ clients = listCreate();
+ dictAdd(server.pubsub_channels,channel,clients);
+ incrRefCount(channel);
+ } else {
+ clients = dictGetEntryVal(de);
+ }
+ listAddNodeTail(clients,c);
+ }
+ /* Notify the client */
+ addReply(c,shared.mbulk3);
+ addReply(c,shared.subscribebulk);
+ addReplyBulk(c,channel);
+ addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
+ return retval;
+}
+
+/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was not subscribed to the specified channel. */
+int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
+ struct dictEntry *de;
+ list *clients;
+ listNode *ln;
+ int retval = 0;
+
+ /* Remove the channel from the client -> channels hash table */
+ incrRefCount(channel); /* channel may be just a pointer to the same object
+ we have in the hash tables. Protect it... */
+ if (dictDelete(c->pubsub_channels,channel) == DICT_OK) {
+ retval = 1;
+ /* Remove the client from the channel -> clients list hash table */
+ de = dictFind(server.pubsub_channels,channel);
+ redisAssert(de != NULL);
+ clients = dictGetEntryVal(de);
+ ln = listSearchKey(clients,c);
+ redisAssert(ln != NULL);
+ listDelNode(clients,ln);
+ if (listLength(clients) == 0) {
+ /* Free the list and associated hash entry at all if this was
+ * the latest client, so that it will be possible to abuse
+ * Redis PUBSUB creating millions of channels. */
+ dictDelete(server.pubsub_channels,channel);
+ }
+ }
+ /* Notify the client */
+ if (notify) {
+ addReply(c,shared.mbulk3);
+ addReply(c,shared.unsubscribebulk);
+ addReplyBulk(c,channel);
+ addReplyLongLong(c,dictSize(c->pubsub_channels)+
+ listLength(c->pubsub_patterns));
+
+ }
+ decrRefCount(channel); /* it is finally safe to release it */
+ return retval;
+}
+
+/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
+int pubsubSubscribePattern(redisClient *c, robj *pattern) {
+ int retval = 0;
+
+ if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
+ retval = 1;
+ pubsubPattern *pat;
+ listAddNodeTail(c->pubsub_patterns,pattern);
+ incrRefCount(pattern);
+ pat = zmalloc(sizeof(*pat));
+ pat->pattern = getDecodedObject(pattern);
+ pat->client = c;
+ listAddNodeTail(server.pubsub_patterns,pat);
+ }
+ /* Notify the client */
+ addReply(c,shared.mbulk3);
+ addReply(c,shared.psubscribebulk);
+ addReplyBulk(c,pattern);
+ addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
+ return retval;
+}
+
+/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
+ * 0 if the client was not subscribed to the specified channel. */
+int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
+ listNode *ln;
+ pubsubPattern pat;
+ int retval = 0;
+
+ incrRefCount(pattern); /* Protect the object. May be the same we remove */
+ if ((ln = listSearchKey(c->pubsub_patterns,pattern)) != NULL) {
+ retval = 1;
+ listDelNode(c->pubsub_patterns,ln);
+ pat.client = c;
+ pat.pattern = pattern;
+ ln = listSearchKey(server.pubsub_patterns,&pat);
+ listDelNode(server.pubsub_patterns,ln);
+ }
+ /* Notify the client */
+ if (notify) {
+ addReply(c,shared.mbulk3);
+ addReply(c,shared.punsubscribebulk);
+ addReplyBulk(c,pattern);
+ addReplyLongLong(c,dictSize(c->pubsub_channels)+
+ listLength(c->pubsub_patterns));
+ }
+ decrRefCount(pattern);
+ return retval;
+}
+
+/* Unsubscribe from all the channels. Return the number of channels the
+ * client was subscribed from. */
+int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
+ dictIterator *di = dictGetIterator(c->pubsub_channels);
+ dictEntry *de;
+ int count = 0;
+
+ while((de = dictNext(di)) != NULL) {
+ robj *channel = dictGetEntryKey(de);
+
+ count += pubsubUnsubscribeChannel(c,channel,notify);
+ }
+ dictReleaseIterator(di);
+ return count;
+}
+
+/* Unsubscribe from all the patterns. Return the number of patterns the
+ * client was subscribed from. */
+int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
+ listNode *ln;
+ listIter li;
+ int count = 0;
+
+ listRewind(c->pubsub_patterns,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ robj *pattern = ln->value;
+
+ count += pubsubUnsubscribePattern(c,pattern,notify);
+ }
+ return count;
+}
+
+/* Publish a message */
+int pubsubPublishMessage(robj *channel, robj *message) {
+ int receivers = 0;
+ struct dictEntry *de;
+ listNode *ln;
+ listIter li;
+
+ /* Send to clients listening for that channel */
+ de = dictFind(server.pubsub_channels,channel);
+ if (de) {
+ list *list = dictGetEntryVal(de);
+ listNode *ln;
+ listIter li;
+
+ listRewind(list,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ redisClient *c = ln->value;
+
+ addReply(c,shared.mbulk3);
+ addReply(c,shared.messagebulk);
+ addReplyBulk(c,channel);
+ addReplyBulk(c,message);
+ receivers++;
+ }
+ }
+ /* Send to clients listening to matching channels */
+ if (listLength(server.pubsub_patterns)) {
+ listRewind(server.pubsub_patterns,&li);
+ channel = getDecodedObject(channel);
+ while ((ln = listNext(&li)) != NULL) {
+ pubsubPattern *pat = ln->value;
+
+ if (stringmatchlen((char*)pat->pattern->ptr,
+ sdslen(pat->pattern->ptr),
+ (char*)channel->ptr,
+ sdslen(channel->ptr),0)) {
+ addReply(pat->client,shared.mbulk4);
+ addReply(pat->client,shared.pmessagebulk);
+ addReplyBulk(pat->client,pat->pattern);
+ addReplyBulk(pat->client,channel);
+ addReplyBulk(pat->client,message);
+ receivers++;
+ }
+ }
+ decrRefCount(channel);
+ }
+ return receivers;
+}
+
+void subscribeCommand(redisClient *c) {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubSubscribeChannel(c,c->argv[j]);
+}
+
+void unsubscribeCommand(redisClient *c) {
+ if (c->argc == 1) {
+ pubsubUnsubscribeAllChannels(c,1);
+ return;
+ } else {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubUnsubscribeChannel(c,c->argv[j],1);
+ }
+}
+
+void psubscribeCommand(redisClient *c) {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubSubscribePattern(c,c->argv[j]);
+}
+
+void punsubscribeCommand(redisClient *c) {
+ if (c->argc == 1) {
+ pubsubUnsubscribeAllPatterns(c,1);
+ return;
+ } else {
+ int j;
+
+ for (j = 1; j < c->argc; j++)
+ pubsubUnsubscribePattern(c,c->argv[j],1);
+ }
+}
+
+void publishCommand(redisClient *c) {
+ int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
+ addReplyLongLong(c,receivers);
+}
diff --git a/src/rdb.c b/src/rdb.c
new file mode 100644
index 000000000..5bda5e565
--- /dev/null
+++ b/src/rdb.c
@@ -0,0 +1,886 @@
+#include "redis.h"
+#include "lzf.h" /* LZF compression library */
+
+#include <math.h>
+
+int rdbSaveType(FILE *fp, unsigned char type) {
+ if (fwrite(&type,1,1,fp) == 0) return -1;
+ return 0;
+}
+
+int rdbSaveTime(FILE *fp, time_t t) {
+ int32_t t32 = (int32_t) t;
+ if (fwrite(&t32,4,1,fp) == 0) return -1;
+ return 0;
+}
+
+/* check rdbLoadLen() comments for more info */
+int rdbSaveLen(FILE *fp, uint32_t len) {
+ unsigned char buf[2];
+
+ if (len < (1<<6)) {
+ /* Save a 6 bit len */
+ buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
+ if (fwrite(buf,1,1,fp) == 0) return -1;
+ } else if (len < (1<<14)) {
+ /* Save a 14 bit len */
+ buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
+ buf[1] = len&0xFF;
+ if (fwrite(buf,2,1,fp) == 0) return -1;
+ } else {
+ /* Save a 32 bit len */
+ buf[0] = (REDIS_RDB_32BITLEN<<6);
+ if (fwrite(buf,1,1,fp) == 0) return -1;
+ len = htonl(len);
+ if (fwrite(&len,4,1,fp) == 0) return -1;
+ }
+ return 0;
+}
+
+/* Encode 'value' as an integer if possible (if integer will fit the
+ * supported range). If the function sucessful encoded the integer
+ * then the (up to 5 bytes) encoded representation is written in the
+ * string pointed by 'enc' and the length is returned. Otherwise
+ * 0 is returned. */
+int rdbEncodeInteger(long long value, unsigned char *enc) {
+ /* Finally check if it fits in our ranges */
+ if (value >= -(1<<7) && value <= (1<<7)-1) {
+ enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
+ enc[1] = value&0xFF;
+ return 2;
+ } else if (value >= -(1<<15) && value <= (1<<15)-1) {
+ enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ return 3;
+ } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
+ enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
+ enc[1] = value&0xFF;
+ enc[2] = (value>>8)&0xFF;
+ enc[3] = (value>>16)&0xFF;
+ enc[4] = (value>>24)&0xFF;
+ return 5;
+ } else {
+ return 0;
+ }
+}
+
+/* String objects in the form "2391" "-100" without any space and with a
+ * range of values that can fit in an 8, 16 or 32 bit signed value can be
+ * encoded as integers to save space */
+int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
+ long long value;
+ char *endptr, buf[32];
+
+ /* Check if it's possible to encode this value as a number */
+ value = strtoll(s, &endptr, 10);
+ if (endptr[0] != '\0') return 0;
+ ll2string(buf,32,value);
+
+ /* If the number converted back into a string is not identical
+ * then it's not possible to encode the string as integer */
+ if (strlen(buf) != len || memcmp(buf,s,len)) return 0;
+
+ return rdbEncodeInteger(value,enc);
+}
+
+int rdbSaveLzfStringObject(FILE *fp, unsigned char *s, size_t len) {
+ size_t comprlen, outlen;
+ unsigned char byte;
+ void *out;
+
+ /* We require at least four bytes compression for this to be worth it */
+ if (len <= 4) return 0;
+ outlen = len-4;
+ if ((out = zmalloc(outlen+1)) == NULL) return 0;
+ comprlen = lzf_compress(s, len, out, outlen);
+ if (comprlen == 0) {
+ zfree(out);
+ return 0;
+ }
+ /* Data compressed! Let's save it on disk */
+ byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
+ if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
+ if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
+ if (rdbSaveLen(fp,len) == -1) goto writeerr;
+ if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
+ zfree(out);
+ return comprlen;
+
+writeerr:
+ zfree(out);
+ return -1;
+}
+
+/* Save a string objet as [len][data] on disk. If the object is a string
+ * representation of an integer value we try to safe it in a special form */
+int rdbSaveRawString(FILE *fp, unsigned char *s, size_t len) {
+ int enclen;
+
+ /* Try integer encoding */
+ if (len <= 11) {
+ unsigned char buf[5];
+ if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
+ if (fwrite(buf,enclen,1,fp) == 0) return -1;
+ return 0;
+ }
+ }
+
+ /* Try LZF compression - under 20 bytes it's unable to compress even
+ * aaaaaaaaaaaaaaaaaa so skip it */
+ if (server.rdbcompression && len > 20) {
+ int retval;
+
+ retval = rdbSaveLzfStringObject(fp,s,len);
+ if (retval == -1) return -1;
+ if (retval > 0) return 0;
+ /* retval == 0 means data can't be compressed, save the old way */
+ }
+
+ /* Store verbatim */
+ if (rdbSaveLen(fp,len) == -1) return -1;
+ if (len && fwrite(s,len,1,fp) == 0) return -1;
+ return 0;
+}
+
+/* Save a long long value as either an encoded string or a string. */
+int rdbSaveLongLongAsStringObject(FILE *fp, long long value) {
+ unsigned char buf[32];
+ int enclen = rdbEncodeInteger(value,buf);
+ if (enclen > 0) {
+ if (fwrite(buf,enclen,1,fp) == 0) return -1;
+ } else {
+ /* Encode as string */
+ enclen = ll2string((char*)buf,32,value);
+ redisAssert(enclen < 32);
+ if (rdbSaveLen(fp,enclen) == -1) return -1;
+ if (fwrite(buf,enclen,1,fp) == 0) return -1;
+ }
+ return 0;
+}
+
+/* Like rdbSaveStringObjectRaw() but handle encoded objects */
+int rdbSaveStringObject(FILE *fp, robj *obj) {
+ /* Avoid to decode the object, then encode it again, if the
+ * object is alrady integer encoded. */
+ if (obj->encoding == REDIS_ENCODING_INT) {
+ return rdbSaveLongLongAsStringObject(fp,(long)obj->ptr);
+ } else {
+ redisAssert(obj->encoding == REDIS_ENCODING_RAW);
+ return rdbSaveRawString(fp,obj->ptr,sdslen(obj->ptr));
+ }
+}
+
+/* Save a double value. Doubles are saved as strings prefixed by an unsigned
+ * 8 bit integer specifing the length of the representation.
+ * This 8 bit integer has special values in order to specify the following
+ * conditions:
+ * 253: not a number
+ * 254: + inf
+ * 255: - inf
+ */
+int rdbSaveDoubleValue(FILE *fp, double val) {
+ unsigned char buf[128];
+ int len;
+
+ if (isnan(val)) {
+ buf[0] = 253;
+ len = 1;
+ } else if (!isfinite(val)) {
+ len = 1;
+ buf[0] = (val < 0) ? 255 : 254;
+ } else {
+#if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
+ /* Check if the float is in a safe range to be casted into a
+ * long long. We are assuming that long long is 64 bit here.
+ * Also we are assuming that there are no implementations around where
+ * double has precision < 52 bit.
+ *
+ * Under this assumptions we test if a double is inside an interval
+ * where casting to long long is safe. Then using two castings we
+ * make sure the decimal part is zero. If all this is true we use
+ * integer printing function that is much faster. */
+ double min = -4503599627370495; /* (2^52)-1 */
+ double max = 4503599627370496; /* -(2^52) */
+ if (val > min && val < max && val == ((double)((long long)val)))
+ ll2string((char*)buf+1,sizeof(buf),(long long)val);
+ else
+#endif
+ snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
+ buf[0] = strlen((char*)buf+1);
+ len = buf[0]+1;
+ }
+ if (fwrite(buf,len,1,fp) == 0) return -1;
+ return 0;
+}
+
+/* Save a Redis object. */
+int rdbSaveObject(FILE *fp, robj *o) {
+ if (o->type == REDIS_STRING) {
+ /* Save a string value */
+ if (rdbSaveStringObject(fp,o) == -1) return -1;
+ } else if (o->type == REDIS_LIST) {
+ /* Save a list value */
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ if (rdbSaveLen(fp,ziplistLen(o->ptr)) == -1) return -1;
+ p = ziplistIndex(o->ptr,0);
+ while(ziplistGet(p,&vstr,&vlen,&vlong)) {
+ if (vstr) {
+ if (rdbSaveRawString(fp,vstr,vlen) == -1)
+ return -1;
+ } else {
+ if (rdbSaveLongLongAsStringObject(fp,vlong) == -1)
+ return -1;
+ }
+ p = ziplistNext(o->ptr,p);
+ }
+ } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
+ list *list = o->ptr;
+ listIter li;
+ listNode *ln;
+
+ if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
+ listRewind(list,&li);
+ while((ln = listNext(&li))) {
+ robj *eleobj = listNodeValue(ln);
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ } else if (o->type == REDIS_SET) {
+ /* Save a set value */
+ dict *set = o->ptr;
+ dictIterator *di = dictGetIterator(set);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_ZSET) {
+ /* Save a set value */
+ zset *zs = o->ptr;
+ dictIterator *di = dictGetIterator(zs->dict);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *eleobj = dictGetEntryKey(de);
+ double *score = dictGetEntryVal(de);
+
+ if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
+ if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ } else if (o->type == REDIS_HASH) {
+ /* Save a hash value */
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *p = zipmapRewind(o->ptr);
+ unsigned int count = zipmapLen(o->ptr);
+ unsigned char *key, *val;
+ unsigned int klen, vlen;
+
+ if (rdbSaveLen(fp,count) == -1) return -1;
+ while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
+ if (rdbSaveRawString(fp,key,klen) == -1) return -1;
+ if (rdbSaveRawString(fp,val,vlen) == -1) return -1;
+ }
+ } else {
+ dictIterator *di = dictGetIterator(o->ptr);
+ dictEntry *de;
+
+ if (rdbSaveLen(fp,dictSize((dict*)o->ptr)) == -1) return -1;
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetEntryKey(de);
+ robj *val = dictGetEntryVal(de);
+
+ if (rdbSaveStringObject(fp,key) == -1) return -1;
+ if (rdbSaveStringObject(fp,val) == -1) return -1;
+ }
+ dictReleaseIterator(di);
+ }
+ } else {
+ redisPanic("Unknown object type");
+ }
+ return 0;
+}
+
+/* Return the length the object will have on disk if saved with
+ * the rdbSaveObject() function. Currently we use a trick to get
+ * this length with very little changes to the code. In the future
+ * we could switch to a faster solution. */
+off_t rdbSavedObjectLen(robj *o, FILE *fp) {
+ if (fp == NULL) fp = server.devnull;
+ rewind(fp);
+ redisAssert(rdbSaveObject(fp,o) != 1);
+ return ftello(fp);
+}
+
+/* Return the number of pages required to save this object in the swap file */
+off_t rdbSavedObjectPages(robj *o, FILE *fp) {
+ off_t bytes = rdbSavedObjectLen(o,fp);
+
+ return (bytes+(server.vm_page_size-1))/server.vm_page_size;
+}
+
+/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
+int rdbSave(char *filename) {
+ dictIterator *di = NULL;
+ dictEntry *de;
+ FILE *fp;
+ char tmpfile[256];
+ int j;
+ time_t now = time(NULL);
+
+ /* Wait for I/O therads to terminate, just in case this is a
+ * foreground-saving, to avoid seeking the swap file descriptor at the
+ * same time. */
+ if (server.vm_enabled)
+ waitEmptyIOJobsQueue();
+
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
+ return REDIS_ERR;
+ }
+ if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+ dict *d = db->dict;
+ if (dictSize(d) == 0) continue;
+ di = dictGetIterator(d);
+ if (!di) {
+ fclose(fp);
+ return REDIS_ERR;
+ }
+
+ /* Write the SELECT DB opcode */
+ if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
+ if (rdbSaveLen(fp,j) == -1) goto werr;
+
+ /* Iterate this DB writing every entry */
+ while((de = dictNext(di)) != NULL) {
+ sds keystr = dictGetEntryKey(de);
+ robj key, *o = dictGetEntryVal(de);
+ time_t expiretime;
+
+ initStaticStringObject(key,keystr);
+ expiretime = getExpire(db,&key);
+
+ /* Save the expire time */
+ if (expiretime != -1) {
+ /* If this key is already expired skip it */
+ if (expiretime < now) continue;
+ if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
+ if (rdbSaveTime(fp,expiretime) == -1) goto werr;
+ }
+ /* Save the key and associated value. This requires special
+ * handling if the value is swapped out. */
+ if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
+ o->storage == REDIS_VM_SWAPPING) {
+ /* Save type, key, value */
+ if (rdbSaveType(fp,o->type) == -1) goto werr;
+ if (rdbSaveStringObject(fp,&key) == -1) goto werr;
+ if (rdbSaveObject(fp,o) == -1) goto werr;
+ } else {
+ /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
+ robj *po;
+ /* Get a preview of the object in memory */
+ po = vmPreviewObject(o);
+ /* Save type, key, value */
+ if (rdbSaveType(fp,po->type) == -1) goto werr;
+ if (rdbSaveStringObject(fp,&key) == -1) goto werr;
+ if (rdbSaveObject(fp,po) == -1) goto werr;
+ /* Remove the loaded object from memory */
+ decrRefCount(po);
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ /* EOF opcode */
+ if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
+
+ /* Make sure data will not remain on the OS's output buffers */
+ fflush(fp);
+ fsync(fileno(fp));
+ fclose(fp);
+
+ /* Use RENAME to make sure the DB file is changed atomically only
+ * if the generate DB file is ok. */
+ if (rename(tmpfile,filename) == -1) {
+ redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
+ unlink(tmpfile);
+ return REDIS_ERR;
+ }
+ redisLog(REDIS_NOTICE,"DB saved on disk");
+ server.dirty = 0;
+ server.lastsave = time(NULL);
+ return REDIS_OK;
+
+werr:
+ fclose(fp);
+ unlink(tmpfile);
+ redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
+ if (di) dictReleaseIterator(di);
+ return REDIS_ERR;
+}
+
+int rdbSaveBackground(char *filename) {
+ pid_t childpid;
+
+ if (server.bgsavechildpid != -1) return REDIS_ERR;
+ if (server.vm_enabled) waitEmptyIOJobsQueue();
+ if ((childpid = fork()) == 0) {
+ /* Child */
+ if (server.vm_enabled) vmReopenSwapFile();
+ close(server.fd);
+ if (rdbSave(filename) == REDIS_OK) {
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ } else {
+ /* Parent */
+ if (childpid == -1) {
+ redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
+ server.bgsavechildpid = childpid;
+ updateDictResizePolicy();
+ return REDIS_OK;
+ }
+ return REDIS_OK; /* unreached */
+}
+
+void rdbRemoveTempFile(pid_t childpid) {
+ char tmpfile[256];
+
+ snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
+ unlink(tmpfile);
+}
+
+int rdbLoadType(FILE *fp) {
+ unsigned char type;
+ if (fread(&type,1,1,fp) == 0) return -1;
+ return type;
+}
+
+time_t rdbLoadTime(FILE *fp) {
+ int32_t t32;
+ if (fread(&t32,4,1,fp) == 0) return -1;
+ return (time_t) t32;
+}
+
+/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
+ * of this file for a description of how this are stored on disk.
+ *
+ * isencoded is set to 1 if the readed length is not actually a length but
+ * an "encoding type", check the above comments for more info */
+uint32_t rdbLoadLen(FILE *fp, int *isencoded) {
+ unsigned char buf[2];
+ uint32_t len;
+ int type;
+
+ if (isencoded) *isencoded = 0;
+ if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
+ type = (buf[0]&0xC0)>>6;
+ if (type == REDIS_RDB_6BITLEN) {
+ /* Read a 6 bit len */
+ return buf[0]&0x3F;
+ } else if (type == REDIS_RDB_ENCVAL) {
+ /* Read a 6 bit len encoding type */
+ if (isencoded) *isencoded = 1;
+ return buf[0]&0x3F;
+ } else if (type == REDIS_RDB_14BITLEN) {
+ /* Read a 14 bit len */
+ if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
+ return ((buf[0]&0x3F)<<8)|buf[1];
+ } else {
+ /* Read a 32 bit len */
+ if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
+ return ntohl(len);
+ }
+}
+
+/* Load an integer-encoded object from file 'fp', with the specified
+ * encoding type 'enctype'. If encode is true the function may return
+ * an integer-encoded object as reply, otherwise the returned object
+ * will always be encoded as a raw string. */
+robj *rdbLoadIntegerObject(FILE *fp, int enctype, int encode) {
+ unsigned char enc[4];
+ long long val;
+
+ if (enctype == REDIS_RDB_ENC_INT8) {
+ if (fread(enc,1,1,fp) == 0) return NULL;
+ val = (signed char)enc[0];
+ } else if (enctype == REDIS_RDB_ENC_INT16) {
+ uint16_t v;
+ if (fread(enc,2,1,fp) == 0) return NULL;
+ v = enc[0]|(enc[1]<<8);
+ val = (int16_t)v;
+ } else if (enctype == REDIS_RDB_ENC_INT32) {
+ uint32_t v;
+ if (fread(enc,4,1,fp) == 0) return NULL;
+ v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
+ val = (int32_t)v;
+ } else {
+ val = 0; /* anti-warning */
+ redisPanic("Unknown RDB integer encoding type");
+ }
+ if (encode)
+ return createStringObjectFromLongLong(val);
+ else
+ return createObject(REDIS_STRING,sdsfromlonglong(val));
+}
+
+robj *rdbLoadLzfStringObject(FILE*fp) {
+ unsigned int len, clen;
+ unsigned char *c = NULL;
+ sds val = NULL;
+
+ if ((clen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((c = zmalloc(clen)) == NULL) goto err;
+ if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+ if (fread(c,clen,1,fp) == 0) goto err;
+ if (lzf_decompress(c,clen,val,len) == 0) goto err;
+ zfree(c);
+ return createObject(REDIS_STRING,val);
+err:
+ zfree(c);
+ sdsfree(val);
+ return NULL;
+}
+
+robj *rdbGenericLoadStringObject(FILE*fp, int encode) {
+ int isencoded;
+ uint32_t len;
+ sds val;
+
+ len = rdbLoadLen(fp,&isencoded);
+ if (isencoded) {
+ switch(len) {
+ case REDIS_RDB_ENC_INT8:
+ case REDIS_RDB_ENC_INT16:
+ case REDIS_RDB_ENC_INT32:
+ return rdbLoadIntegerObject(fp,len,encode);
+ case REDIS_RDB_ENC_LZF:
+ return rdbLoadLzfStringObject(fp);
+ default:
+ redisPanic("Unknown RDB encoding type");
+ }
+ }
+
+ if (len == REDIS_RDB_LENERR) return NULL;
+ val = sdsnewlen(NULL,len);
+ if (len && fread(val,len,1,fp) == 0) {
+ sdsfree(val);
+ return NULL;
+ }
+ return createObject(REDIS_STRING,val);
+}
+
+robj *rdbLoadStringObject(FILE *fp) {
+ return rdbGenericLoadStringObject(fp,0);
+}
+
+robj *rdbLoadEncodedStringObject(FILE *fp) {
+ return rdbGenericLoadStringObject(fp,1);
+}
+
+/* For information about double serialization check rdbSaveDoubleValue() */
+int rdbLoadDoubleValue(FILE *fp, double *val) {
+ char buf[128];
+ unsigned char len;
+
+ if (fread(&len,1,1,fp) == 0) return -1;
+ switch(len) {
+ case 255: *val = R_NegInf; return 0;
+ case 254: *val = R_PosInf; return 0;
+ case 253: *val = R_Nan; return 0;
+ default:
+ if (fread(buf,len,1,fp) == 0) return -1;
+ buf[len] = '\0';
+ sscanf(buf, "%lg", val);
+ return 0;
+ }
+}
+
+/* Load a Redis object of the specified type from the specified file.
+ * On success a newly allocated object is returned, otherwise NULL. */
+robj *rdbLoadObject(int type, FILE *fp) {
+ robj *o, *ele, *dec;
+ size_t len;
+
+ redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",type,ftell(fp));
+ if (type == REDIS_STRING) {
+ /* Read string value */
+ if ((o = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+ o = tryObjectEncoding(o);
+ } else if (type == REDIS_LIST) {
+ /* Read list value */
+ if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+
+ /* Use a real list when there are too many entries */
+ if (len > server.list_max_ziplist_entries) {
+ o = createListObject();
+ } else {
+ o = createZiplistObject();
+ }
+
+ /* Load every single element of the list */
+ while(len--) {
+ if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+
+ /* If we are using a ziplist and the value is too big, convert
+ * the object to a real list. */
+ if (o->encoding == REDIS_ENCODING_ZIPLIST &&
+ ele->encoding == REDIS_ENCODING_RAW &&
+ sdslen(ele->ptr) > server.list_max_ziplist_value)
+ listTypeConvert(o,REDIS_ENCODING_LINKEDLIST);
+
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ dec = getDecodedObject(ele);
+ o->ptr = ziplistPush(o->ptr,dec->ptr,sdslen(dec->ptr),REDIS_TAIL);
+ decrRefCount(dec);
+ decrRefCount(ele);
+ } else {
+ ele = tryObjectEncoding(ele);
+ listAddNodeTail(o->ptr,ele);
+ }
+ }
+ } else if (type == REDIS_SET) {
+ /* Read list/set value */
+ if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+ o = createSetObject();
+ /* It's faster to expand the dict to the right size asap in order
+ * to avoid rehashing */
+ if (len > DICT_HT_INITIAL_SIZE)
+ dictExpand(o->ptr,len);
+ /* Load every single element of the list/set */
+ while(len--) {
+ if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+ ele = tryObjectEncoding(ele);
+ dictAdd((dict*)o->ptr,ele,NULL);
+ }
+ } else if (type == REDIS_ZSET) {
+ /* Read list/set value */
+ size_t zsetlen;
+ zset *zs;
+
+ if ((zsetlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+ o = createZsetObject();
+ zs = o->ptr;
+ /* Load every single element of the list/set */
+ while(zsetlen--) {
+ robj *ele;
+ double *score = zmalloc(sizeof(double));
+
+ if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+ ele = tryObjectEncoding(ele);
+ if (rdbLoadDoubleValue(fp,score) == -1) return NULL;
+ dictAdd(zs->dict,ele,score);
+ zslInsert(zs->zsl,*score,ele);
+ incrRefCount(ele); /* added to skiplist */
+ }
+ } else if (type == REDIS_HASH) {
+ size_t hashlen;
+
+ if ((hashlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
+ o = createHashObject();
+ /* Too many entries? Use an hash table. */
+ if (hashlen > server.hash_max_zipmap_entries)
+ convertToRealHash(o);
+ /* Load every key/value, then set it into the zipmap or hash
+ * table, as needed. */
+ while(hashlen--) {
+ robj *key, *val;
+
+ if ((key = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+ if ((val = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
+ /* If we are using a zipmap and there are too big values
+ * the object is converted to real hash table encoding. */
+ if (o->encoding != REDIS_ENCODING_HT &&
+ ((key->encoding == REDIS_ENCODING_RAW &&
+ sdslen(key->ptr) > server.hash_max_zipmap_value) ||
+ (val->encoding == REDIS_ENCODING_RAW &&
+ sdslen(val->ptr) > server.hash_max_zipmap_value)))
+ {
+ convertToRealHash(o);
+ }
+
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *zm = o->ptr;
+ robj *deckey, *decval;
+
+ /* We need raw string objects to add them to the zipmap */
+ deckey = getDecodedObject(key);
+ decval = getDecodedObject(val);
+ zm = zipmapSet(zm,deckey->ptr,sdslen(deckey->ptr),
+ decval->ptr,sdslen(decval->ptr),NULL);
+ o->ptr = zm;
+ decrRefCount(deckey);
+ decrRefCount(decval);
+ decrRefCount(key);
+ decrRefCount(val);
+ } else {
+ key = tryObjectEncoding(key);
+ val = tryObjectEncoding(val);
+ dictAdd((dict*)o->ptr,key,val);
+ }
+ }
+ } else {
+ redisPanic("Unknown object type");
+ }
+ return o;
+}
+
+int rdbLoad(char *filename) {
+ FILE *fp;
+ uint32_t dbid;
+ int type, retval, rdbver;
+ int swap_all_values = 0;
+ redisDb *db = server.db+0;
+ char buf[1024];
+ time_t expiretime, now = time(NULL);
+
+ fp = fopen(filename,"r");
+ if (!fp) return REDIS_ERR;
+ if (fread(buf,9,1,fp) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ fclose(fp);
+ redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
+ return REDIS_ERR;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver != 1) {
+ fclose(fp);
+ redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
+ return REDIS_ERR;
+ }
+ while(1) {
+ robj *key, *val;
+ int force_swapout;
+
+ expiretime = -1;
+ /* Read type. */
+ if ((type = rdbLoadType(fp)) == -1) goto eoferr;
+ if (type == REDIS_EXPIRETIME) {
+ if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
+ /* We read the time so we need to read the object type again */
+ if ((type = rdbLoadType(fp)) == -1) goto eoferr;
+ }
+ if (type == REDIS_EOF) break;
+ /* Handle SELECT DB opcode as a special case */
+ if (type == REDIS_SELECTDB) {
+ if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
+ goto eoferr;
+ if (dbid >= (unsigned)server.dbnum) {
+ redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
+ exit(1);
+ }
+ db = server.db+dbid;
+ continue;
+ }
+ /* Read key */
+ if ((key = rdbLoadStringObject(fp)) == NULL) goto eoferr;
+ /* Read value */
+ if ((val = rdbLoadObject(type,fp)) == NULL) goto eoferr;
+ /* Check if the key already expired */
+ if (expiretime != -1 && expiretime < now) {
+ decrRefCount(key);
+ decrRefCount(val);
+ continue;
+ }
+ /* Add the new object in the hash table */
+ retval = dbAdd(db,key,val);
+ if (retval == REDIS_ERR) {
+ redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key->ptr);
+ exit(1);
+ }
+ /* Set the expire time if needed */
+ if (expiretime != -1) setExpire(db,key,expiretime);
+
+ /* Handle swapping while loading big datasets when VM is on */
+
+ /* If we detecter we are hopeless about fitting something in memory
+ * we just swap every new key on disk. Directly...
+ * Note that's important to check for this condition before resorting
+ * to random sampling, otherwise we may try to swap already
+ * swapped keys. */
+ if (swap_all_values) {
+ dictEntry *de = dictFind(db->dict,key->ptr);
+
+ /* de may be NULL since the key already expired */
+ if (de) {
+ vmpointer *vp;
+ val = dictGetEntryVal(de);
+
+ if (val->refcount == 1 &&
+ (vp = vmSwapObjectBlocking(val)) != NULL)
+ dictGetEntryVal(de) = vp;
+ }
+ decrRefCount(key);
+ continue;
+ }
+ decrRefCount(key);
+
+ /* Flush data on disk once 32 MB of additional RAM are used... */
+ force_swapout = 0;
+ if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
+ force_swapout = 1;
+
+ /* If we have still some hope of having some value fitting memory
+ * then we try random sampling. */
+ if (!swap_all_values && server.vm_enabled && force_swapout) {
+ while (zmalloc_used_memory() > server.vm_max_memory) {
+ if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
+ }
+ if (zmalloc_used_memory() > server.vm_max_memory)
+ swap_all_values = 1; /* We are already using too much mem */
+ }
+ }
+ fclose(fp);
+ return REDIS_OK;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
+ exit(1);
+ return REDIS_ERR; /* Just to avoid warning */
+}
+
+/* A background saving child (BGSAVE) terminated its work. Handle this. */
+void backgroundSaveDoneHandler(int statloc) {
+ int exitcode = WEXITSTATUS(statloc);
+ int bysignal = WIFSIGNALED(statloc);
+
+ if (!bysignal && exitcode == 0) {
+ redisLog(REDIS_NOTICE,
+ "Background saving terminated with success");
+ server.dirty = 0;
+ server.lastsave = time(NULL);
+ } else if (!bysignal && exitcode != 0) {
+ redisLog(REDIS_WARNING, "Background saving error");
+ } else {
+ redisLog(REDIS_WARNING,
+ "Background saving terminated by signal %d", WTERMSIG(statloc));
+ rdbRemoveTempFile(server.bgsavechildpid);
+ }
+ server.bgsavechildpid = -1;
+ /* Possibly there are slaves waiting for a BGSAVE in order to be served
+ * (the first stage of SYNC is a bulk transfer of dump.rdb) */
+ updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
+}
diff --git a/redis-benchmark.c b/src/redis-benchmark.c
index 123d81180..123d81180 100644
--- a/redis-benchmark.c
+++ b/src/redis-benchmark.c
diff --git a/redis-check-aof.c b/src/redis-check-aof.c
index ff0d1f82c..ff0d1f82c 100644
--- a/redis-check-aof.c
+++ b/src/redis-check-aof.c
diff --git a/redis-check-dump.c b/src/redis-check-dump.c
index 0b002790d..0b002790d 100644
--- a/redis-check-dump.c
+++ b/src/redis-check-dump.c
diff --git a/redis-cli.c b/src/redis-cli.c
index 2daa7c461..2daa7c461 100644
--- a/redis-cli.c
+++ b/src/redis-cli.c
diff --git a/src/redis.c b/src/redis.c
new file mode 100644
index 000000000..5f539216f
--- /dev/null
+++ b/src/redis.c
@@ -0,0 +1,1516 @@
+/*
+ * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "redis.h"
+
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#include <ucontext.h>
+#endif /* HAVE_BACKTRACE */
+
+#include <time.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/uio.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <pthread.h>
+
+/* Our shared "common" objects */
+
+struct sharedObjectsStruct shared;
+
+/* Global vars that are actally used as constants. The following double
+ * values are used for double on-disk serialization, and are initialized
+ * at runtime to avoid strange compiler optimizations. */
+
+double R_Zero, R_PosInf, R_NegInf, R_Nan;
+
+/*================================= Globals ================================= */
+
+/* Global vars */
+struct redisServer server; /* server global state */
+struct redisCommand *commandTable;
+struct redisCommand readonlyCommandTable[] = {
+ {"get",getCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
+ {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
+ {"setex",setexCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
+ {"append",appendCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"substr",substrCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"del",delCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"exists",existsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"mget",mgetCommand,-2,REDIS_CMD_INLINE,NULL,1,-1,1},
+ {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"rpushx",rpushxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"lpushx",lpushxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"linsert",linsertCommand,5,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"rpop",rpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"lpop",lpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"brpop",brpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"blpop",blpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"llen",llenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"lindex",lindexCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"lrange",lrangeCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"lrem",lremCommand,4,REDIS_CMD_BULK,NULL,1,1,1},
+ {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,2,1},
+ {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"srem",sremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"smove",smoveCommand,4,REDIS_CMD_BULK,NULL,1,2,1},
+ {"sismember",sismemberCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"scard",scardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"spop",spopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
+ {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
+ {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
+ {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
+ {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
+ {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
+ {"smembers",sinterCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"zrem",zremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zremrangebyrank",zremrangebyrankCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zunionstore",zunionstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
+ {"zinterstore",zinterstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
+ {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zcount",zcountCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zcard",zcardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"zrank",zrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"zrevrank",zrevrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"hset",hsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"hsetnx",hsetnxCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"hget",hgetCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"hmset",hmsetCommand,-4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"hmget",hmgetCommand,-3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"hincrby",hincrbyCommand,4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"hdel",hdelCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"hlen",hlenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"hkeys",hkeysCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"hvals",hvalsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"hgetall",hgetallCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"hexists",hexistsCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
+ {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
+ {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
+ {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"select",selectCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"move",moveCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"rename",renameCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"expire",expireCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"expireat",expireatCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"keys",keysCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"auth",authCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"ping",pingCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"echo",echoCommand,2,REDIS_CMD_BULK,NULL,0,0,0},
+ {"save",saveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"type",typeCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"multi",multiCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"exec",execCommand,1,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,execBlockClientOnSwappedKeys,0,0,0},
+ {"discard",discardCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"sync",syncCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"flushall",flushallCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
+ {"info",infoCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"monitor",monitorCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"ttl",ttlCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
+ {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"debug",debugCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"config",configCommand,-2,REDIS_CMD_BULK,NULL,0,0,0},
+ {"subscribe",subscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"unsubscribe",unsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"psubscribe",psubscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"punsubscribe",punsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"publish",publishCommand,3,REDIS_CMD_BULK|REDIS_CMD_FORCE_REPLICATION,NULL,0,0,0},
+ {"watch",watchCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
+ {"unwatch",unwatchCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}
+};
+
+/*============================ Utility functions ============================ */
+
+void redisLog(int level, const char *fmt, ...) {
+ va_list ap;
+ FILE *fp;
+
+ fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
+ if (!fp) return;
+
+ va_start(ap, fmt);
+ if (level >= server.verbosity) {
+ char *c = ".-*#";
+ char buf[64];
+ time_t now;
+
+ now = time(NULL);
+ strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
+ fprintf(fp,"[%d] %s %c ",(int)getpid(),buf,c[level]);
+ vfprintf(fp, fmt, ap);
+ fprintf(fp,"\n");
+ fflush(fp);
+ }
+ va_end(ap);
+
+ if (server.logfile) fclose(fp);
+}
+
+/* Redis generally does not try to recover from out of memory conditions
+ * when allocating objects or strings, it is not clear if it will be possible
+ * to report this condition to the client since the networking layer itself
+ * is based on heap allocation for send buffers, so we simply abort.
+ * At least the code will be simpler to read... */
+void oom(const char *msg) {
+ redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
+ sleep(1);
+ abort();
+}
+
+/*====================== Hash table type implementation ==================== */
+
+/* This is an hash table type that uses the SDS dynamic strings libary as
+ * keys and radis objects as values (objects can hold SDS strings,
+ * lists, sets). */
+
+void dictVanillaFree(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+ zfree(val);
+}
+
+void dictListDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+ listRelease((list*)val);
+}
+
+int dictSdsKeyCompare(void *privdata, const void *key1,
+ const void *key2)
+{
+ int l1,l2;
+ DICT_NOTUSED(privdata);
+
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+void dictRedisObjectDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+
+ if (val == NULL) return; /* Values of swapped out keys as set to NULL */
+ decrRefCount(val);
+}
+
+void dictSdsDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+
+ sdsfree(val);
+}
+
+int dictObjKeyCompare(void *privdata, const void *key1,
+ const void *key2)
+{
+ const robj *o1 = key1, *o2 = key2;
+ return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
+}
+
+unsigned int dictObjHash(const void *key) {
+ const robj *o = key;
+ return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
+}
+
+unsigned int dictSdsHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+int dictEncObjKeyCompare(void *privdata, const void *key1,
+ const void *key2)
+{
+ robj *o1 = (robj*) key1, *o2 = (robj*) key2;
+ int cmp;
+
+ if (o1->encoding == REDIS_ENCODING_INT &&
+ o2->encoding == REDIS_ENCODING_INT)
+ return o1->ptr == o2->ptr;
+
+ o1 = getDecodedObject(o1);
+ o2 = getDecodedObject(o2);
+ cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
+ decrRefCount(o1);
+ decrRefCount(o2);
+ return cmp;
+}
+
+unsigned int dictEncObjHash(const void *key) {
+ robj *o = (robj*) key;
+
+ if (o->encoding == REDIS_ENCODING_RAW) {
+ return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
+ } else {
+ if (o->encoding == REDIS_ENCODING_INT) {
+ char buf[32];
+ int len;
+
+ len = ll2string(buf,32,(long)o->ptr);
+ return dictGenHashFunction((unsigned char*)buf, len);
+ } else {
+ unsigned int hash;
+
+ o = getDecodedObject(o);
+ hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
+ decrRefCount(o);
+ return hash;
+ }
+ }
+}
+
+/* Sets type */
+dictType setDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ NULL /* val destructor */
+};
+
+/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
+dictType zsetDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ dictVanillaFree /* val destructor of malloc(sizeof(double)) */
+};
+
+/* Db->dict, keys are sds strings, vals are Redis objects. */
+dictType dbDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictRedisObjectDestructor /* val destructor */
+};
+
+/* Db->expires */
+dictType keyptrDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+/* Hash type hash table (note that small hashes are represented with zimpaps) */
+dictType hashDictType = {
+ dictEncObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictEncObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ dictRedisObjectDestructor /* val destructor */
+};
+
+/* Keylist hash table type has unencoded redis objects as keys and
+ * lists as values. It's used for blocking operations (BLPOP) and to
+ * map swapped keys to a list of clients waiting for this keys to be loaded. */
+dictType keylistDictType = {
+ dictObjHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictObjKeyCompare, /* key compare */
+ dictRedisObjectDestructor, /* key destructor */
+ dictListDestructor /* val destructor */
+};
+
+int htNeedsResize(dict *dict) {
+ long long size, used;
+
+ size = dictSlots(dict);
+ used = dictSize(dict);
+ return (size && used && size > DICT_HT_INITIAL_SIZE &&
+ (used*100/size < REDIS_HT_MINFILL));
+}
+
+/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
+ * we resize the hash table to save memory */
+void tryResizeHashTables(void) {
+ int j;
+
+ for (j = 0; j < server.dbnum; j++) {
+ if (htNeedsResize(server.db[j].dict))
+ dictResize(server.db[j].dict);
+ if (htNeedsResize(server.db[j].expires))
+ dictResize(server.db[j].expires);
+ }
+}
+
+/* Our hash table implementation performs rehashing incrementally while
+ * we write/read from the hash table. Still if the server is idle, the hash
+ * table will use two tables for a long time. So we try to use 1 millisecond
+ * of CPU time at every serverCron() loop in order to rehash some key. */
+void incrementallyRehash(void) {
+ int j;
+
+ for (j = 0; j < server.dbnum; j++) {
+ if (dictIsRehashing(server.db[j].dict)) {
+ dictRehashMilliseconds(server.db[j].dict,1);
+ break; /* already used our millisecond for this loop... */
+ }
+ }
+}
+
+/* This function is called once a background process of some kind terminates,
+ * as we want to avoid resizing the hash tables when there is a child in order
+ * to play well with copy-on-write (otherwise when a resize happens lots of
+ * memory pages are copied). The goal of this function is to update the ability
+ * for dict.c to resize the hash tables accordingly to the fact we have o not
+ * running childs. */
+void updateDictResizePolicy(void) {
+ if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1)
+ dictEnableResize();
+ else
+ dictDisableResize();
+}
+
+/* ======================= Cron: called every 100 ms ======================== */
+
+int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ int j, loops = server.cronloops++;
+ REDIS_NOTUSED(eventLoop);
+ REDIS_NOTUSED(id);
+ REDIS_NOTUSED(clientData);
+
+ /* We take a cached value of the unix time in the global state because
+ * with virtual memory and aging there is to store the current time
+ * in objects at every object access, and accuracy is not needed.
+ * To access a global var is faster than calling time(NULL) */
+ server.unixtime = time(NULL);
+ /* We have just 21 bits per object for LRU information.
+ * So we use an (eventually wrapping) LRU clock with minutes resolution.
+ *
+ * When we need to select what object to swap, we compute the minimum
+ * time distance between the current lruclock and the object last access
+ * lruclock info. Even if clocks will wrap on overflow, there is
+ * the interesting property that we are sure that at least
+ * ABS(A-B) minutes passed between current time and timestamp B.
+ *
+ * This is not precise but we don't need at all precision, but just
+ * something statistically reasonable.
+ */
+ server.lruclock = (time(NULL)/60)&((1<<21)-1);
+
+ /* We received a SIGTERM, shutting down here in a safe way, as it is
+ * not ok doing so inside the signal handler. */
+ if (server.shutdown_asap) {
+ if (prepareForShutdown() == REDIS_OK) exit(0);
+ redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
+ }
+
+ /* Show some info about non-empty databases */
+ for (j = 0; j < server.dbnum; j++) {
+ long long size, used, vkeys;
+
+ size = dictSlots(server.db[j].dict);
+ used = dictSize(server.db[j].dict);
+ vkeys = dictSize(server.db[j].expires);
+ if (!(loops % 50) && (used || vkeys)) {
+ redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
+ /* dictPrintStats(server.dict); */
+ }
+ }
+
+ /* We don't want to resize the hash tables while a bacground saving
+ * is in progress: the saving child is created using fork() that is
+ * implemented with a copy-on-write semantic in most modern systems, so
+ * if we resize the HT while there is the saving child at work actually
+ * a lot of memory movements in the parent will cause a lot of pages
+ * copied. */
+ if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {
+ if (!(loops % 10)) tryResizeHashTables();
+ if (server.activerehashing) incrementallyRehash();
+ }
+
+ /* Show information about connected clients */
+ if (!(loops % 50)) {
+ redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
+ listLength(server.clients)-listLength(server.slaves),
+ listLength(server.slaves),
+ zmalloc_used_memory());
+ }
+
+ /* Close connections of timedout clients */
+ if ((server.maxidletime && !(loops % 100)) || server.blpop_blocked_clients)
+ closeTimedoutClients();
+
+ /* Check if a background saving or AOF rewrite in progress terminated */
+ if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
+ int statloc;
+ pid_t pid;
+
+ if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
+ if (pid == server.bgsavechildpid) {
+ backgroundSaveDoneHandler(statloc);
+ } else {
+ backgroundRewriteDoneHandler(statloc);
+ }
+ updateDictResizePolicy();
+ }
+ } else {
+ /* If there is not a background saving in progress check if
+ * we have to save now */
+ time_t now = time(NULL);
+ for (j = 0; j < server.saveparamslen; j++) {
+ struct saveparam *sp = server.saveparams+j;
+
+ if (server.dirty >= sp->changes &&
+ now-server.lastsave > sp->seconds) {
+ redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
+ sp->changes, sp->seconds);
+ rdbSaveBackground(server.dbfilename);
+ break;
+ }
+ }
+ }
+
+ /* Try to expire a few timed out keys. The algorithm used is adaptive and
+ * will use few CPU cycles if there are few expiring keys, otherwise
+ * it will get more aggressive to avoid that too much memory is used by
+ * keys that can be removed from the keyspace. */
+ for (j = 0; j < server.dbnum; j++) {
+ int expired;
+ redisDb *db = server.db+j;
+
+ /* Continue to expire if at the end of the cycle more than 25%
+ * of the keys were expired. */
+ do {
+ long num = dictSize(db->expires);
+ time_t now = time(NULL);
+
+ expired = 0;
+ if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
+ num = REDIS_EXPIRELOOKUPS_PER_CRON;
+ while (num--) {
+ dictEntry *de;
+ time_t t;
+
+ if ((de = dictGetRandomKey(db->expires)) == NULL) break;
+ t = (time_t) dictGetEntryVal(de);
+ if (now > t) {
+ sds key = dictGetEntryKey(de);
+ robj *keyobj = createStringObject(key,sdslen(key));
+
+ dbDelete(db,keyobj);
+ decrRefCount(keyobj);
+ expired++;
+ server.stat_expiredkeys++;
+ }
+ }
+ } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
+ }
+
+ /* Swap a few keys on disk if we are over the memory limit and VM
+ * is enbled. Try to free objects from the free list first. */
+ if (vmCanSwapOut()) {
+ while (server.vm_enabled && zmalloc_used_memory() >
+ server.vm_max_memory)
+ {
+ int retval;
+
+ if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
+ retval = (server.vm_max_threads == 0) ?
+ vmSwapOneObjectBlocking() :
+ vmSwapOneObjectThreaded();
+ if (retval == REDIS_ERR && !(loops % 300) &&
+ zmalloc_used_memory() >
+ (server.vm_max_memory+server.vm_max_memory/10))
+ {
+ redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
+ }
+ /* Note that when using threade I/O we free just one object,
+ * because anyway when the I/O thread in charge to swap this
+ * object out will finish, the handler of completed jobs
+ * will try to swap more objects if we are still out of memory. */
+ if (retval == REDIS_ERR || server.vm_max_threads > 0) break;
+ }
+ }
+
+ /* Check if we should connect to a MASTER */
+ if (server.replstate == REDIS_REPL_CONNECT && !(loops % 10)) {
+ redisLog(REDIS_NOTICE,"Connecting to MASTER...");
+ if (syncWithMaster() == REDIS_OK) {
+ redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
+ if (server.appendonly) rewriteAppendOnlyFileBackground();
+ }
+ }
+ return 100;
+}
+
+/* This function gets called every time Redis is entering the
+ * main loop of the event driven library, that is, before to sleep
+ * for ready file descriptors. */
+void beforeSleep(struct aeEventLoop *eventLoop) {
+ REDIS_NOTUSED(eventLoop);
+
+ /* Awake clients that got all the swapped keys they requested */
+ if (server.vm_enabled && listLength(server.io_ready_clients)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.io_ready_clients,&li);
+ while((ln = listNext(&li))) {
+ redisClient *c = ln->value;
+ struct redisCommand *cmd;
+
+ /* Resume the client. */
+ listDelNode(server.io_ready_clients,ln);
+ c->flags &= (~REDIS_IO_WAIT);
+ server.vm_blocked_clients--;
+ aeCreateFileEvent(server.el, c->fd, AE_READABLE,
+ readQueryFromClient, c);
+ cmd = lookupCommand(c->argv[0]->ptr);
+ redisAssert(cmd != NULL);
+ call(c,cmd);
+ resetClient(c);
+ /* There may be more data to process in the input buffer. */
+ if (c->querybuf && sdslen(c->querybuf) > 0)
+ processInputBuffer(c);
+ }
+ }
+ /* Write the AOF buffer on disk */
+ flushAppendOnlyFile();
+}
+
+/* =========================== Server initialization ======================== */
+
+void createSharedObjects(void) {
+ int j;
+
+ shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
+ shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
+ shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
+ shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
+ shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
+ shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
+ shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
+ shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
+ shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
+ shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
+ shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
+ shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
+ shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
+ "-ERR Operation against a key holding the wrong kind of value\r\n"));
+ shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
+ "-ERR no such key\r\n"));
+ shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
+ "-ERR syntax error\r\n"));
+ shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
+ "-ERR source and destination objects are the same\r\n"));
+ shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
+ "-ERR index out of range\r\n"));
+ shared.space = createObject(REDIS_STRING,sdsnew(" "));
+ shared.colon = createObject(REDIS_STRING,sdsnew(":"));
+ shared.plus = createObject(REDIS_STRING,sdsnew("+"));
+ shared.select0 = createStringObject("select 0\r\n",10);
+ shared.select1 = createStringObject("select 1\r\n",10);
+ shared.select2 = createStringObject("select 2\r\n",10);
+ shared.select3 = createStringObject("select 3\r\n",10);
+ shared.select4 = createStringObject("select 4\r\n",10);
+ shared.select5 = createStringObject("select 5\r\n",10);
+ shared.select6 = createStringObject("select 6\r\n",10);
+ shared.select7 = createStringObject("select 7\r\n",10);
+ shared.select8 = createStringObject("select 8\r\n",10);
+ shared.select9 = createStringObject("select 9\r\n",10);
+ shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
+ shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
+ shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
+ shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
+ shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
+ shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
+ shared.mbulk3 = createStringObject("*3\r\n",4);
+ shared.mbulk4 = createStringObject("*4\r\n",4);
+ for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
+ shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
+ shared.integers[j]->encoding = REDIS_ENCODING_INT;
+ }
+}
+
+void initServerConfig() {
+ server.dbnum = REDIS_DEFAULT_DBNUM;
+ server.port = REDIS_SERVERPORT;
+ server.verbosity = REDIS_VERBOSE;
+ server.maxidletime = REDIS_MAXIDLETIME;
+ server.saveparams = NULL;
+ server.logfile = NULL; /* NULL = log on standard output */
+ server.bindaddr = NULL;
+ server.glueoutputbuf = 1;
+ server.daemonize = 0;
+ server.appendonly = 0;
+ server.appendfsync = APPENDFSYNC_EVERYSEC;
+ server.no_appendfsync_on_rewrite = 0;
+ server.lastfsync = time(NULL);
+ server.appendfd = -1;
+ server.appendseldb = -1; /* Make sure the first time will not match */
+ server.pidfile = zstrdup("/var/run/redis.pid");
+ server.dbfilename = zstrdup("dump.rdb");
+ server.appendfilename = zstrdup("appendonly.aof");
+ server.requirepass = NULL;
+ server.rdbcompression = 1;
+ server.activerehashing = 1;
+ server.maxclients = 0;
+ server.blpop_blocked_clients = 0;
+ server.maxmemory = 0;
+ server.vm_enabled = 0;
+ server.vm_swap_file = zstrdup("/tmp/redis-%p.vm");
+ server.vm_page_size = 256; /* 256 bytes per page */
+ server.vm_pages = 1024*1024*100; /* 104 millions of pages */
+ server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
+ server.vm_max_threads = 4;
+ server.vm_blocked_clients = 0;
+ server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES;
+ server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE;
+ server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES;
+ server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE;
+ server.shutdown_asap = 0;
+
+ resetServerSaveParams();
+
+ appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
+ appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
+ appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
+ /* Replication related */
+ server.isslave = 0;
+ server.masterauth = NULL;
+ server.masterhost = NULL;
+ server.masterport = 6379;
+ server.master = NULL;
+ server.replstate = REDIS_REPL_NONE;
+
+ /* Double constants initialization */
+ R_Zero = 0.0;
+ R_PosInf = 1.0/R_Zero;
+ R_NegInf = -1.0/R_Zero;
+ R_Nan = R_Zero/R_Zero;
+}
+
+void initServer() {
+ int j;
+
+ signal(SIGHUP, SIG_IGN);
+ signal(SIGPIPE, SIG_IGN);
+ setupSigSegvAction();
+
+ server.devnull = fopen("/dev/null","w");
+ if (server.devnull == NULL) {
+ redisLog(REDIS_WARNING, "Can't open /dev/null: %s", server.neterr);
+ exit(1);
+ }
+ server.clients = listCreate();
+ server.slaves = listCreate();
+ server.monitors = listCreate();
+ server.objfreelist = listCreate();
+ createSharedObjects();
+ server.el = aeCreateEventLoop();
+ server.db = zmalloc(sizeof(redisDb)*server.dbnum);
+ server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
+ if (server.fd == -1) {
+ redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
+ exit(1);
+ }
+ for (j = 0; j < server.dbnum; j++) {
+ server.db[j].dict = dictCreate(&dbDictType,NULL);
+ server.db[j].expires = dictCreate(&keyptrDictType,NULL);
+ server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
+ server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
+ if (server.vm_enabled)
+ server.db[j].io_keys = dictCreate(&keylistDictType,NULL);
+ server.db[j].id = j;
+ }
+ server.pubsub_channels = dictCreate(&keylistDictType,NULL);
+ server.pubsub_patterns = listCreate();
+ listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
+ listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
+ server.cronloops = 0;
+ server.bgsavechildpid = -1;
+ server.bgrewritechildpid = -1;
+ server.bgrewritebuf = sdsempty();
+ server.aofbuf = sdsempty();
+ server.lastsave = time(NULL);
+ server.dirty = 0;
+ server.stat_numcommands = 0;
+ server.stat_numconnections = 0;
+ server.stat_expiredkeys = 0;
+ server.stat_starttime = time(NULL);
+ server.unixtime = time(NULL);
+ aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
+ if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
+ acceptHandler, NULL) == AE_ERR) oom("creating file event");
+
+ if (server.appendonly) {
+ server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
+ if (server.appendfd == -1) {
+ redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
+ strerror(errno));
+ exit(1);
+ }
+ }
+
+ if (server.vm_enabled) vmInit();
+}
+
+int qsortRedisCommands(const void *r1, const void *r2) {
+ return strcasecmp(
+ ((struct redisCommand*)r1)->name,
+ ((struct redisCommand*)r2)->name);
+}
+
+void sortCommandTable() {
+ /* Copy and sort the read-only version of the command table */
+ commandTable = (struct redisCommand*)malloc(sizeof(readonlyCommandTable));
+ memcpy(commandTable,readonlyCommandTable,sizeof(readonlyCommandTable));
+ qsort(commandTable,
+ sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
+ sizeof(struct redisCommand),qsortRedisCommands);
+}
+
+/* ====================== Commands lookup and execution ===================== */
+
+struct redisCommand *lookupCommand(char *name) {
+ struct redisCommand tmp = {name,NULL,0,0,NULL,0,0,0};
+ return bsearch(
+ &tmp,
+ commandTable,
+ sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
+ sizeof(struct redisCommand),
+ qsortRedisCommands);
+}
+
+/* Call() is the core of Redis execution of a command */
+void call(redisClient *c, struct redisCommand *cmd) {
+ long long dirty;
+
+ dirty = server.dirty;
+ cmd->proc(c);
+ dirty = server.dirty-dirty;
+
+ if (server.appendonly && dirty)
+ feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
+ if ((dirty || cmd->flags & REDIS_CMD_FORCE_REPLICATION) &&
+ listLength(server.slaves))
+ replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc);
+ if (listLength(server.monitors))
+ replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc);
+ server.stat_numcommands++;
+}
+
+/* If this function gets called we already read a whole
+ * command, argments are in the client argv/argc fields.
+ * processCommand() execute the command or prepare the
+ * server for a bulk read from the client.
+ *
+ * If 1 is returned the client is still alive and valid and
+ * and other operations can be performed by the caller. Otherwise
+ * if 0 is returned the client was destroied (i.e. after QUIT). */
+int processCommand(redisClient *c) {
+ struct redisCommand *cmd;
+
+ /* Free some memory if needed (maxmemory setting) */
+ if (server.maxmemory) freeMemoryIfNeeded();
+
+ /* Handle the multi bulk command type. This is an alternative protocol
+ * supported by Redis in order to receive commands that are composed of
+ * multiple binary-safe "bulk" arguments. The latency of processing is
+ * a bit higher but this allows things like multi-sets, so if this
+ * protocol is used only for MSET and similar commands this is a big win. */
+ if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
+ c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
+ if (c->multibulk <= 0) {
+ resetClient(c);
+ return 1;
+ } else {
+ decrRefCount(c->argv[c->argc-1]);
+ c->argc--;
+ return 1;
+ }
+ } else if (c->multibulk) {
+ if (c->bulklen == -1) {
+ if (((char*)c->argv[0]->ptr)[0] != '$') {
+ addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
+ resetClient(c);
+ return 1;
+ } else {
+ int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
+ decrRefCount(c->argv[0]);
+ if (bulklen < 0 || bulklen > 1024*1024*1024) {
+ c->argc--;
+ addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
+ resetClient(c);
+ return 1;
+ }
+ c->argc--;
+ c->bulklen = bulklen+2; /* add two bytes for CR+LF */
+ return 1;
+ }
+ } else {
+ c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
+ c->mbargv[c->mbargc] = c->argv[0];
+ c->mbargc++;
+ c->argc--;
+ c->multibulk--;
+ if (c->multibulk == 0) {
+ robj **auxargv;
+ int auxargc;
+
+ /* Here we need to swap the multi-bulk argc/argv with the
+ * normal argc/argv of the client structure. */
+ auxargv = c->argv;
+ c->argv = c->mbargv;
+ c->mbargv = auxargv;
+
+ auxargc = c->argc;
+ c->argc = c->mbargc;
+ c->mbargc = auxargc;
+
+ /* We need to set bulklen to something different than -1
+ * in order for the code below to process the command without
+ * to try to read the last argument of a bulk command as
+ * a special argument. */
+ c->bulklen = 0;
+ /* continue below and process the command */
+ } else {
+ c->bulklen = -1;
+ return 1;
+ }
+ }
+ }
+ /* -- end of multi bulk commands processing -- */
+
+ /* The QUIT command is handled as a special case. Normal command
+ * procs are unable to close the client connection safely */
+ if (!strcasecmp(c->argv[0]->ptr,"quit")) {
+ freeClient(c);
+ return 0;
+ }
+
+ /* Now lookup the command and check ASAP about trivial error conditions
+ * such wrong arity, bad command name and so forth. */
+ cmd = lookupCommand(c->argv[0]->ptr);
+ if (!cmd) {
+ addReplySds(c,
+ sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
+ (char*)c->argv[0]->ptr));
+ resetClient(c);
+ return 1;
+ } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
+ (c->argc < -cmd->arity)) {
+ addReplySds(c,
+ sdscatprintf(sdsempty(),
+ "-ERR wrong number of arguments for '%s' command\r\n",
+ cmd->name));
+ resetClient(c);
+ return 1;
+ } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
+ /* This is a bulk command, we have to read the last argument yet. */
+ int bulklen = atoi(c->argv[c->argc-1]->ptr);
+
+ decrRefCount(c->argv[c->argc-1]);
+ if (bulklen < 0 || bulklen > 1024*1024*1024) {
+ c->argc--;
+ addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
+ resetClient(c);
+ return 1;
+ }
+ c->argc--;
+ c->bulklen = bulklen+2; /* add two bytes for CR+LF */
+ /* It is possible that the bulk read is already in the
+ * buffer. Check this condition and handle it accordingly.
+ * This is just a fast path, alternative to call processInputBuffer().
+ * It's a good idea since the code is small and this condition
+ * happens most of the times. */
+ if ((signed)sdslen(c->querybuf) >= c->bulklen) {
+ c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
+ c->argc++;
+ c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
+ } else {
+ /* Otherwise return... there is to read the last argument
+ * from the socket. */
+ return 1;
+ }
+ }
+ /* Let's try to encode the bulk object to save space. */
+ if (cmd->flags & REDIS_CMD_BULK)
+ c->argv[c->argc-1] = tryObjectEncoding(c->argv[c->argc-1]);
+
+ /* Check if the user is authenticated */
+ if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
+ addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
+ resetClient(c);
+ return 1;
+ }
+
+ /* Handle the maxmemory directive */
+ if (server.maxmemory && (cmd->flags & REDIS_CMD_DENYOOM) &&
+ zmalloc_used_memory() > server.maxmemory)
+ {
+ addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
+ resetClient(c);
+ return 1;
+ }
+
+ /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
+ if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
+ &&
+ cmd->proc != subscribeCommand && cmd->proc != unsubscribeCommand &&
+ cmd->proc != psubscribeCommand && cmd->proc != punsubscribeCommand) {
+ addReplySds(c,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
+ resetClient(c);
+ return 1;
+ }
+
+ /* Exec the command */
+ if (c->flags & REDIS_MULTI &&
+ cmd->proc != execCommand && cmd->proc != discardCommand &&
+ cmd->proc != multiCommand && cmd->proc != watchCommand)
+ {
+ queueMultiCommand(c,cmd);
+ addReply(c,shared.queued);
+ } else {
+ if (server.vm_enabled && server.vm_max_threads > 0 &&
+ blockClientOnSwappedKeys(c,cmd)) return 1;
+ call(c,cmd);
+ }
+
+ /* Prepare the client for the next command */
+ resetClient(c);
+ return 1;
+}
+
+/*================================== Shutdown =============================== */
+
+int prepareForShutdown() {
+ redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
+ /* Kill the saving child if there is a background saving in progress.
+ We want to avoid race conditions, for instance our saving child may
+ overwrite the synchronous saving did by SHUTDOWN. */
+ if (server.bgsavechildpid != -1) {
+ redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
+ kill(server.bgsavechildpid,SIGKILL);
+ rdbRemoveTempFile(server.bgsavechildpid);
+ }
+ if (server.appendonly) {
+ /* Append only file: fsync() the AOF and exit */
+ aof_fsync(server.appendfd);
+ if (server.vm_enabled) unlink(server.vm_swap_file);
+ } else {
+ /* Snapshotting. Perform a SYNC SAVE and exit */
+ if (rdbSave(server.dbfilename) == REDIS_OK) {
+ if (server.daemonize)
+ unlink(server.pidfile);
+ redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
+ } else {
+ /* Ooops.. error saving! The best we can do is to continue
+ * operating. Note that if there was a background saving process,
+ * in the next cron() Redis will be notified that the background
+ * saving aborted, handling special stuff like slaves pending for
+ * synchronization... */
+ redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
+ return REDIS_ERR;
+ }
+ }
+ redisLog(REDIS_WARNING,"Server exit now, bye bye...");
+ return REDIS_OK;
+}
+
+/*================================== Commands =============================== */
+
+void authCommand(redisClient *c) {
+ if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
+ c->authenticated = 1;
+ addReply(c,shared.ok);
+ } else {
+ c->authenticated = 0;
+ addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
+ }
+}
+
+void pingCommand(redisClient *c) {
+ addReply(c,shared.pong);
+}
+
+void echoCommand(redisClient *c) {
+ addReplyBulk(c,c->argv[1]);
+}
+
+/* Convert an amount of bytes into a human readable string in the form
+ * of 100B, 2G, 100M, 4K, and so forth. */
+void bytesToHuman(char *s, unsigned long long n) {
+ double d;
+
+ if (n < 1024) {
+ /* Bytes */
+ sprintf(s,"%lluB",n);
+ return;
+ } else if (n < (1024*1024)) {
+ d = (double)n/(1024);
+ sprintf(s,"%.2fK",d);
+ } else if (n < (1024LL*1024*1024)) {
+ d = (double)n/(1024*1024);
+ sprintf(s,"%.2fM",d);
+ } else if (n < (1024LL*1024*1024*1024)) {
+ d = (double)n/(1024LL*1024*1024);
+ sprintf(s,"%.2fG",d);
+ }
+}
+
+/* Create the string returned by the INFO command. This is decoupled
+ * by the INFO command itself as we need to report the same information
+ * on memory corruption problems. */
+sds genRedisInfoString(void) {
+ sds info;
+ time_t uptime = time(NULL)-server.stat_starttime;
+ int j;
+ char hmem[64];
+
+ bytesToHuman(hmem,zmalloc_used_memory());
+ info = sdscatprintf(sdsempty(),
+ "redis_version:%s\r\n"
+ "redis_git_sha1:%s\r\n"
+ "redis_git_dirty:%d\r\n"
+ "arch_bits:%s\r\n"
+ "multiplexing_api:%s\r\n"
+ "process_id:%ld\r\n"
+ "uptime_in_seconds:%ld\r\n"
+ "uptime_in_days:%ld\r\n"
+ "connected_clients:%d\r\n"
+ "connected_slaves:%d\r\n"
+ "blocked_clients:%d\r\n"
+ "used_memory:%zu\r\n"
+ "used_memory_human:%s\r\n"
+ "changes_since_last_save:%lld\r\n"
+ "bgsave_in_progress:%d\r\n"
+ "last_save_time:%ld\r\n"
+ "bgrewriteaof_in_progress:%d\r\n"
+ "total_connections_received:%lld\r\n"
+ "total_commands_processed:%lld\r\n"
+ "expired_keys:%lld\r\n"
+ "hash_max_zipmap_entries:%zu\r\n"
+ "hash_max_zipmap_value:%zu\r\n"
+ "pubsub_channels:%ld\r\n"
+ "pubsub_patterns:%u\r\n"
+ "vm_enabled:%d\r\n"
+ "role:%s\r\n"
+ ,REDIS_VERSION,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (sizeof(long) == 8) ? "64" : "32",
+ aeGetApiName(),
+ (long) getpid(),
+ uptime,
+ uptime/(3600*24),
+ listLength(server.clients)-listLength(server.slaves),
+ listLength(server.slaves),
+ server.blpop_blocked_clients,
+ zmalloc_used_memory(),
+ hmem,
+ server.dirty,
+ server.bgsavechildpid != -1,
+ server.lastsave,
+ server.bgrewritechildpid != -1,
+ server.stat_numconnections,
+ server.stat_numcommands,
+ server.stat_expiredkeys,
+ server.hash_max_zipmap_entries,
+ server.hash_max_zipmap_value,
+ dictSize(server.pubsub_channels),
+ listLength(server.pubsub_patterns),
+ server.vm_enabled != 0,
+ server.masterhost == NULL ? "master" : "slave"
+ );
+ if (server.masterhost) {
+ info = sdscatprintf(info,
+ "master_host:%s\r\n"
+ "master_port:%d\r\n"
+ "master_link_status:%s\r\n"
+ "master_last_io_seconds_ago:%d\r\n"
+ ,server.masterhost,
+ server.masterport,
+ (server.replstate == REDIS_REPL_CONNECTED) ?
+ "up" : "down",
+ server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
+ );
+ }
+ if (server.vm_enabled) {
+ lockThreadedIO();
+ info = sdscatprintf(info,
+ "vm_conf_max_memory:%llu\r\n"
+ "vm_conf_page_size:%llu\r\n"
+ "vm_conf_pages:%llu\r\n"
+ "vm_stats_used_pages:%llu\r\n"
+ "vm_stats_swapped_objects:%llu\r\n"
+ "vm_stats_swappin_count:%llu\r\n"
+ "vm_stats_swappout_count:%llu\r\n"
+ "vm_stats_io_newjobs_len:%lu\r\n"
+ "vm_stats_io_processing_len:%lu\r\n"
+ "vm_stats_io_processed_len:%lu\r\n"
+ "vm_stats_io_active_threads:%lu\r\n"
+ "vm_stats_blocked_clients:%lu\r\n"
+ ,(unsigned long long) server.vm_max_memory,
+ (unsigned long long) server.vm_page_size,
+ (unsigned long long) server.vm_pages,
+ (unsigned long long) server.vm_stats_used_pages,
+ (unsigned long long) server.vm_stats_swapped_objects,
+ (unsigned long long) server.vm_stats_swapins,
+ (unsigned long long) server.vm_stats_swapouts,
+ (unsigned long) listLength(server.io_newjobs),
+ (unsigned long) listLength(server.io_processing),
+ (unsigned long) listLength(server.io_processed),
+ (unsigned long) server.io_active_threads,
+ (unsigned long) server.vm_blocked_clients
+ );
+ unlockThreadedIO();
+ }
+ for (j = 0; j < server.dbnum; j++) {
+ long long keys, vkeys;
+
+ keys = dictSize(server.db[j].dict);
+ vkeys = dictSize(server.db[j].expires);
+ if (keys || vkeys) {
+ info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
+ j, keys, vkeys);
+ }
+ }
+ return info;
+}
+
+void infoCommand(redisClient *c) {
+ sds info = genRedisInfoString();
+ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
+ (unsigned long)sdslen(info)));
+ addReplySds(c,info);
+ addReply(c,shared.crlf);
+}
+
+void monitorCommand(redisClient *c) {
+ /* ignore MONITOR if aleady slave or in monitor mode */
+ if (c->flags & REDIS_SLAVE) return;
+
+ c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
+ c->slaveseldb = 0;
+ listAddNodeTail(server.monitors,c);
+ addReply(c,shared.ok);
+}
+
+/* ============================ Maxmemory directive ======================== */
+
+/* Try to free one object form the pre-allocated objects free list.
+ * This is useful under low mem conditions as by default we take 1 million
+ * free objects allocated. On success REDIS_OK is returned, otherwise
+ * REDIS_ERR. */
+int tryFreeOneObjectFromFreelist(void) {
+ robj *o;
+
+ if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
+ if (listLength(server.objfreelist)) {
+ listNode *head = listFirst(server.objfreelist);
+ o = listNodeValue(head);
+ listDelNode(server.objfreelist,head);
+ if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
+ zfree(o);
+ return REDIS_OK;
+ } else {
+ if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
+ return REDIS_ERR;
+ }
+}
+
+/* This function gets called when 'maxmemory' is set on the config file to limit
+ * the max memory used by the server, and we are out of memory.
+ * This function will try to, in order:
+ *
+ * - Free objects from the free list
+ * - Try to remove keys with an EXPIRE set
+ *
+ * It is not possible to free enough memory to reach used-memory < maxmemory
+ * the server will start refusing commands that will enlarge even more the
+ * memory usage.
+ */
+void freeMemoryIfNeeded(void) {
+ while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
+ int j, k, freed = 0;
+
+ if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
+ for (j = 0; j < server.dbnum; j++) {
+ int minttl = -1;
+ robj *minkey = NULL;
+ struct dictEntry *de;
+
+ if (dictSize(server.db[j].expires)) {
+ freed = 1;
+ /* From a sample of three keys drop the one nearest to
+ * the natural expire */
+ for (k = 0; k < 3; k++) {
+ time_t t;
+
+ de = dictGetRandomKey(server.db[j].expires);
+ t = (time_t) dictGetEntryVal(de);
+ if (minttl == -1 || t < minttl) {
+ minkey = dictGetEntryKey(de);
+ minttl = t;
+ }
+ }
+ dbDelete(server.db+j,minkey);
+ }
+ }
+ if (!freed) return; /* nothing to free... */
+ }
+}
+
+/* =================================== Main! ================================ */
+
+#ifdef __linux__
+int linuxOvercommitMemoryValue(void) {
+ FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
+ char buf[64];
+
+ if (!fp) return -1;
+ if (fgets(buf,64,fp) == NULL) {
+ fclose(fp);
+ return -1;
+ }
+ fclose(fp);
+
+ return atoi(buf);
+}
+
+void linuxOvercommitMemoryWarning(void) {
+ if (linuxOvercommitMemoryValue() == 0) {
+ redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
+ }
+}
+#endif /* __linux__ */
+
+void daemonize(void) {
+ int fd;
+ FILE *fp;
+
+ if (fork() != 0) exit(0); /* parent exits */
+ setsid(); /* create a new session */
+
+ /* Every output goes to /dev/null. If Redis is daemonized but
+ * the 'logfile' is set to 'stdout' in the configuration file
+ * it will not log at all. */
+ if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
+ dup2(fd, STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ dup2(fd, STDERR_FILENO);
+ if (fd > STDERR_FILENO) close(fd);
+ }
+ /* Try to write the pid file */
+ fp = fopen(server.pidfile,"w");
+ if (fp) {
+ fprintf(fp,"%d\n",getpid());
+ fclose(fp);
+ }
+}
+
+void version() {
+ printf("Redis server version %s (%s:%d)\n", REDIS_VERSION,
+ redisGitSHA1(), atoi(redisGitDirty()) > 0);
+ exit(0);
+}
+
+void usage() {
+ fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
+ fprintf(stderr," ./redis-server - (read config from stdin)\n");
+ exit(1);
+}
+
+int main(int argc, char **argv) {
+ time_t start;
+
+ initServerConfig();
+ sortCommandTable();
+ if (argc == 2) {
+ if (strcmp(argv[1], "-v") == 0 ||
+ strcmp(argv[1], "--version") == 0) version();
+ if (strcmp(argv[1], "--help") == 0) usage();
+ resetServerSaveParams();
+ loadServerConfig(argv[1]);
+ } else if ((argc > 2)) {
+ usage();
+ } else {
+ redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
+ }
+ if (server.daemonize) daemonize();
+ initServer();
+ redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
+#ifdef __linux__
+ linuxOvercommitMemoryWarning();
+#endif
+ start = time(NULL);
+ if (server.appendonly) {
+ if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
+ redisLog(REDIS_NOTICE,"DB loaded from append only file: %ld seconds",time(NULL)-start);
+ } else {
+ if (rdbLoad(server.dbfilename) == REDIS_OK)
+ redisLog(REDIS_NOTICE,"DB loaded from disk: %ld seconds",time(NULL)-start);
+ }
+ redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
+ aeSetBeforeSleepProc(server.el,beforeSleep);
+ aeMain(server.el);
+ aeDeleteEventLoop(server.el);
+ return 0;
+}
+
+/* ============================= Backtrace support ========================= */
+
+#ifdef HAVE_BACKTRACE
+void *getMcontextEip(ucontext_t *uc) {
+#if defined(__FreeBSD__)
+ return (void*) uc->uc_mcontext.mc_eip;
+#elif defined(__dietlibc__)
+ return (void*) uc->uc_mcontext.eip;
+#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
+ #if __x86_64__
+ return (void*) uc->uc_mcontext->__ss.__rip;
+ #else
+ return (void*) uc->uc_mcontext->__ss.__eip;
+ #endif
+#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
+ #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
+ return (void*) uc->uc_mcontext->__ss.__rip;
+ #else
+ return (void*) uc->uc_mcontext->__ss.__eip;
+ #endif
+#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
+ return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
+#elif defined(__ia64__) /* Linux IA64 */
+ return (void*) uc->uc_mcontext.sc_ip;
+#else
+ return NULL;
+#endif
+}
+
+void segvHandler(int sig, siginfo_t *info, void *secret) {
+ void *trace[100];
+ char **messages = NULL;
+ int i, trace_size = 0;
+ ucontext_t *uc = (ucontext_t*) secret;
+ sds infostring;
+ REDIS_NOTUSED(info);
+
+ redisLog(REDIS_WARNING,
+ "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
+ infostring = genRedisInfoString();
+ redisLog(REDIS_WARNING, "%s",infostring);
+ /* It's not safe to sdsfree() the returned string under memory
+ * corruption conditions. Let it leak as we are going to abort */
+
+ trace_size = backtrace(trace, 100);
+ /* overwrite sigaction with caller's address */
+ if (getMcontextEip(uc) != NULL) {
+ trace[1] = getMcontextEip(uc);
+ }
+ messages = backtrace_symbols(trace, trace_size);
+
+ for (i=1; i<trace_size; ++i)
+ redisLog(REDIS_WARNING,"%s", messages[i]);
+
+ /* free(messages); Don't call free() with possibly corrupted memory. */
+ _exit(0);
+}
+
+void sigtermHandler(int sig) {
+ REDIS_NOTUSED(sig);
+
+ redisLog(REDIS_WARNING,"SIGTERM received, scheduling shutting down...");
+ server.shutdown_asap = 1;
+}
+
+void setupSigSegvAction(void) {
+ struct sigaction act;
+
+ sigemptyset (&act.sa_mask);
+ /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
+ * is used. Otherwise, sa_handler is used */
+ act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
+ act.sa_sigaction = segvHandler;
+ sigaction (SIGSEGV, &act, NULL);
+ sigaction (SIGBUS, &act, NULL);
+ sigaction (SIGFPE, &act, NULL);
+ sigaction (SIGILL, &act, NULL);
+ sigaction (SIGBUS, &act, NULL);
+
+ act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
+ act.sa_handler = sigtermHandler;
+ sigaction (SIGTERM, &act, NULL);
+ return;
+}
+
+#else /* HAVE_BACKTRACE */
+void setupSigSegvAction(void) {
+}
+#endif /* HAVE_BACKTRACE */
+
+/* The End */
diff --git a/src/redis.h b/src/redis.h
new file mode 100644
index 000000000..e54caa2a2
--- /dev/null
+++ b/src/redis.h
@@ -0,0 +1,885 @@
+#ifndef __REDIS_H
+#define __REDIS_H
+
+#include "fmacros.h"
+#include "config.h"
+
+#if defined(__sun)
+#include "solarisfixes.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "ae.h" /* Event driven programming library */
+#include "sds.h" /* Dynamic safe strings */
+#include "dict.h" /* Hash tables */
+#include "adlist.h" /* Linked lists */
+#include "zmalloc.h" /* total memory usage aware version of malloc/free */
+#include "anet.h" /* Networking the easy way */
+#include "zipmap.h" /* Compact string -> string data structure */
+#include "ziplist.h" /* Compact list data structure */
+#include "version.h"
+
+/* Error codes */
+#define REDIS_OK 0
+#define REDIS_ERR -1
+
+/* Static server configuration */
+#define REDIS_SERVERPORT 6379 /* TCP port */
+#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
+#define REDIS_IOBUF_LEN 1024
+#define REDIS_LOADBUF_LEN 1024
+#define REDIS_STATIC_ARGS 8
+#define REDIS_DEFAULT_DBNUM 16
+#define REDIS_CONFIGLINE_MAX 1024
+#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
+#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
+#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
+#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
+#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
+#define REDIS_SHARED_INTEGERS 10000
+
+/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
+#define REDIS_WRITEV_THRESHOLD 3
+/* Max number of iovecs used for each writev call */
+#define REDIS_WRITEV_IOVEC_COUNT 256
+
+/* Hash table parameters */
+#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
+
+/* Command flags */
+#define REDIS_CMD_BULK 1 /* Bulk write command */
+#define REDIS_CMD_INLINE 2 /* Inline command */
+/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
+ this flags will return an error when the 'maxmemory' option is set in the
+ config file and the server is using more than maxmemory bytes of memory.
+ In short this commands are denied on low memory conditions. */
+#define REDIS_CMD_DENYOOM 4
+#define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
+
+/* Object types */
+#define REDIS_STRING 0
+#define REDIS_LIST 1
+#define REDIS_SET 2
+#define REDIS_ZSET 3
+#define REDIS_HASH 4
+#define REDIS_VMPOINTER 8
+
+/* Objects encoding. Some kind of objects like Strings and Hashes can be
+ * internally represented in multiple ways. The 'encoding' field of the object
+ * is set to one of this fields for this object. */
+#define REDIS_ENCODING_RAW 0 /* Raw representation */
+#define REDIS_ENCODING_INT 1 /* Encoded as integer */
+#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
+#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
+#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
+#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
+
+/* Object types only used for dumping to disk */
+#define REDIS_EXPIRETIME 253
+#define REDIS_SELECTDB 254
+#define REDIS_EOF 255
+
+/* Defines related to the dump file format. To store 32 bits lengths for short
+ * keys requires a lot of space, so we check the most significant 2 bits of
+ * the first byte to interpreter the length:
+ *
+ * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
+ * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
+ * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
+ * 11|000000 this means: specially encoded object will follow. The six bits
+ * number specify the kind of object that follows.
+ * See the REDIS_RDB_ENC_* defines.
+ *
+ * Lenghts up to 63 are stored using a single byte, most DB keys, and may
+ * values, will fit inside. */
+#define REDIS_RDB_6BITLEN 0
+#define REDIS_RDB_14BITLEN 1
+#define REDIS_RDB_32BITLEN 2
+#define REDIS_RDB_ENCVAL 3
+#define REDIS_RDB_LENERR UINT_MAX
+
+/* When a length of a string object stored on disk has the first two bits
+ * set, the remaining two bits specify a special encoding for the object
+ * accordingly to the following defines: */
+#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
+#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
+#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
+#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+
+/* Virtual memory object->where field. */
+#define REDIS_VM_MEMORY 0 /* The object is on memory */
+#define REDIS_VM_SWAPPED 1 /* The object is on disk */
+#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
+#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
+
+/* Virtual memory static configuration stuff.
+ * Check vmFindContiguousPages() to know more about this magic numbers. */
+#define REDIS_VM_MAX_NEAR_PAGES 65536
+#define REDIS_VM_MAX_RANDOM_JUMP 4096
+#define REDIS_VM_MAX_THREADS 32
+#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
+/* The following is the *percentage* of completed I/O jobs to process when the
+ * handelr is called. While Virtual Memory I/O operations are performed by
+ * threads, this operations must be processed by the main thread when completed
+ * in order to take effect. */
+#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
+
+/* Client flags */
+#define REDIS_SLAVE 1 /* This client is a slave server */
+#define REDIS_MASTER 2 /* This client is a master server */
+#define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
+#define REDIS_MULTI 8 /* This client is in a MULTI context */
+#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
+#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
+#define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
+
+/* Slave replication state - slave side */
+#define REDIS_REPL_NONE 0 /* No active replication */
+#define REDIS_REPL_CONNECT 1 /* Must connect to master */
+#define REDIS_REPL_CONNECTED 2 /* Connected to master */
+
+/* Slave replication state - from the point of view of master
+ * Note that in SEND_BULK and ONLINE state the slave receives new updates
+ * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
+ * to start the next background saving in order to send updates to it. */
+#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
+#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
+#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
+#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
+
+/* List related stuff */
+#define REDIS_HEAD 0
+#define REDIS_TAIL 1
+
+/* Sort operations */
+#define REDIS_SORT_GET 0
+#define REDIS_SORT_ASC 1
+#define REDIS_SORT_DESC 2
+#define REDIS_SORTKEY_MAX 1024
+
+/* Log levels */
+#define REDIS_DEBUG 0
+#define REDIS_VERBOSE 1
+#define REDIS_NOTICE 2
+#define REDIS_WARNING 3
+
+/* Anti-warning macro... */
+#define REDIS_NOTUSED(V) ((void) V)
+
+#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
+#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
+
+/* Append only defines */
+#define APPENDFSYNC_NO 0
+#define APPENDFSYNC_ALWAYS 1
+#define APPENDFSYNC_EVERYSEC 2
+
+/* Zip structure related defaults */
+#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
+#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
+#define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
+#define REDIS_LIST_MAX_ZIPLIST_VALUE 32
+
+/* Sets operations codes */
+#define REDIS_OP_UNION 0
+#define REDIS_OP_DIFF 1
+#define REDIS_OP_INTER 2
+
+/* We can print the stacktrace, so our assert is defined this way: */
+#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
+void _redisAssert(char *estr, char *file, int line);
+void _redisPanic(char *msg, char *file, int line);
+
+/*-----------------------------------------------------------------------------
+ * Data types
+ *----------------------------------------------------------------------------*/
+
+/* A redis object, that is a type able to hold a string / list / set */
+
+/* The actual Redis Object */
+typedef struct redisObject {
+ unsigned type:4;
+ unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
+ unsigned encoding:4;
+ unsigned lru:22; /* lru time (relative to server.lruclock) */
+ int refcount;
+ void *ptr;
+ /* VM fields are only allocated if VM is active, otherwise the
+ * object allocation function will just allocate
+ * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
+ * Redis without VM active will not have any overhead. */
+} robj;
+
+/* The VM pointer structure - identifies an object in the swap file.
+ *
+ * This object is stored in place of the value
+ * object in the main key->value hash table representing a database.
+ * Note that the first fields (type, storage) are the same as the redisObject
+ * structure so that vmPointer strucuters can be accessed even when casted
+ * as redisObject structures.
+ *
+ * This is useful as we don't know if a value object is or not on disk, but we
+ * are always able to read obj->storage to check this. For vmPointer
+ * structures "type" is set to REDIS_VMPOINTER (even if without this field
+ * is still possible to check the kind of object from the value of 'storage').*/
+typedef struct vmPointer {
+ unsigned type:4;
+ unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
+ unsigned notused:26;
+ unsigned int vtype; /* type of the object stored in the swap file */
+ off_t page; /* the page at witch the object is stored on disk */
+ off_t usedpages; /* number of pages used on disk */
+} vmpointer;
+
+/* Macro used to initalize a Redis object allocated on the stack.
+ * Note that this macro is taken near the structure definition to make sure
+ * we'll update it when the structure is changed, to avoid bugs like
+ * bug #85 introduced exactly in this way. */
+#define initStaticStringObject(_var,_ptr) do { \
+ _var.refcount = 1; \
+ _var.type = REDIS_STRING; \
+ _var.encoding = REDIS_ENCODING_RAW; \
+ _var.ptr = _ptr; \
+ _var.storage = REDIS_VM_MEMORY; \
+} while(0);
+
+typedef struct redisDb {
+ dict *dict; /* The keyspace for this DB */
+ dict *expires; /* Timeout of keys with a timeout set */
+ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
+ dict *io_keys; /* Keys with clients waiting for VM I/O */
+ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
+ int id;
+} redisDb;
+
+/* Client MULTI/EXEC state */
+typedef struct multiCmd {
+ robj **argv;
+ int argc;
+ struct redisCommand *cmd;
+} multiCmd;
+
+typedef struct multiState {
+ multiCmd *commands; /* Array of MULTI commands */
+ int count; /* Total number of MULTI commands */
+} multiState;
+
+/* With multiplexing we need to take per-clinet state.
+ * Clients are taken in a liked list. */
+typedef struct redisClient {
+ int fd;
+ redisDb *db;
+ int dictid;
+ sds querybuf;
+ robj **argv, **mbargv;
+ int argc, mbargc;
+ int bulklen; /* bulk read len. -1 if not in bulk read mode */
+ int multibulk; /* multi bulk command format active */
+ list *reply;
+ int sentlen;
+ time_t lastinteraction; /* time of the last interaction, used for timeout */
+ int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
+ int slaveseldb; /* slave selected db, if this client is a slave */
+ int authenticated; /* when requirepass is non-NULL */
+ int replstate; /* replication state if this is a slave */
+ int repldbfd; /* replication DB file descriptor */
+ long repldboff; /* replication DB file offset */
+ off_t repldbsize; /* replication DB file size */
+ multiState mstate; /* MULTI/EXEC state */
+ robj **blocking_keys; /* The key we are waiting to terminate a blocking
+ * operation such as BLPOP. Otherwise NULL. */
+ int blocking_keys_num; /* Number of blocking keys */
+ time_t blockingto; /* Blocking operation timeout. If UNIX current time
+ * is >= blockingto then the operation timed out. */
+ list *io_keys; /* Keys this client is waiting to be loaded from the
+ * swap file in order to continue. */
+ list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
+ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
+ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
+} redisClient;
+
+struct saveparam {
+ time_t seconds;
+ int changes;
+};
+
+struct sharedObjectsStruct {
+ robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
+ *colon, *nullbulk, *nullmultibulk, *queued,
+ *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
+ *outofrangeerr, *plus,
+ *select0, *select1, *select2, *select3, *select4,
+ *select5, *select6, *select7, *select8, *select9,
+ *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
+ *mbulk4, *psubscribebulk, *punsubscribebulk,
+ *integers[REDIS_SHARED_INTEGERS];
+};
+
+/* Global server state structure */
+struct redisServer {
+ int port;
+ int fd;
+ redisDb *db;
+ long long dirty; /* changes to DB from the last save */
+ list *clients;
+ list *slaves, *monitors;
+ char neterr[ANET_ERR_LEN];
+ aeEventLoop *el;
+ int cronloops; /* number of times the cron function run */
+ list *objfreelist; /* A list of freed objects to avoid malloc() */
+ time_t lastsave; /* Unix time of last save succeeede */
+ /* Fields used only for stats */
+ time_t stat_starttime; /* server start time */
+ long long stat_numcommands; /* number of processed commands */
+ long long stat_numconnections; /* number of connections received */
+ long long stat_expiredkeys; /* number of expired keys */
+ /* Configuration */
+ int verbosity;
+ int glueoutputbuf;
+ int maxidletime;
+ int dbnum;
+ int daemonize;
+ int appendonly;
+ int appendfsync;
+ int no_appendfsync_on_rewrite;
+ int shutdown_asap;
+ time_t lastfsync;
+ int appendfd;
+ int appendseldb;
+ char *pidfile;
+ pid_t bgsavechildpid;
+ pid_t bgrewritechildpid;
+ sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
+ sds aofbuf; /* AOF buffer, written before entering the event loop */
+ struct saveparam *saveparams;
+ int saveparamslen;
+ char *logfile;
+ char *bindaddr;
+ char *dbfilename;
+ char *appendfilename;
+ char *requirepass;
+ int rdbcompression;
+ int activerehashing;
+ /* Replication related */
+ int isslave;
+ char *masterauth;
+ char *masterhost;
+ int masterport;
+ redisClient *master; /* client that is master for this slave */
+ int replstate;
+ unsigned int maxclients;
+ unsigned long long maxmemory;
+ unsigned int blpop_blocked_clients;
+ unsigned int vm_blocked_clients;
+ /* Sort parameters - qsort_r() is only available under BSD so we
+ * have to take this state global, in order to pass it to sortCompare() */
+ int sort_desc;
+ int sort_alpha;
+ int sort_bypattern;
+ /* Virtual memory configuration */
+ int vm_enabled;
+ char *vm_swap_file;
+ off_t vm_page_size;
+ off_t vm_pages;
+ unsigned long long vm_max_memory;
+ /* Zip structure config */
+ size_t hash_max_zipmap_entries;
+ size_t hash_max_zipmap_value;
+ size_t list_max_ziplist_entries;
+ size_t list_max_ziplist_value;
+ /* Virtual memory state */
+ FILE *vm_fp;
+ int vm_fd;
+ off_t vm_next_page; /* Next probably empty page */
+ off_t vm_near_pages; /* Number of pages allocated sequentially */
+ unsigned char *vm_bitmap; /* Bitmap of free/used pages */
+ time_t unixtime; /* Unix time sampled every second. */
+ /* Virtual memory I/O threads stuff */
+ /* An I/O thread process an element taken from the io_jobs queue and
+ * put the result of the operation in the io_done list. While the
+ * job is being processed, it's put on io_processing queue. */
+ list *io_newjobs; /* List of VM I/O jobs yet to be processed */
+ list *io_processing; /* List of VM I/O jobs being processed */
+ list *io_processed; /* List of VM I/O jobs already processed */
+ list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
+ pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
+ pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
+ pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
+ pthread_attr_t io_threads_attr; /* attributes for threads creation */
+ int io_active_threads; /* Number of running I/O threads */
+ int vm_max_threads; /* Max number of I/O threads running at the same time */
+ /* Our main thread is blocked on the event loop, locking for sockets ready
+ * to be read or written, so when a threaded I/O operation is ready to be
+ * processed by the main thread, the I/O thread will use a unix pipe to
+ * awake the main thread. The followings are the two pipe FDs. */
+ int io_ready_pipe_read;
+ int io_ready_pipe_write;
+ /* Virtual memory stats */
+ unsigned long long vm_stats_used_pages;
+ unsigned long long vm_stats_swapped_objects;
+ unsigned long long vm_stats_swapouts;
+ unsigned long long vm_stats_swapins;
+ /* Pubsub */
+ dict *pubsub_channels; /* Map channels to list of subscribed clients */
+ list *pubsub_patterns; /* A list of pubsub_patterns */
+ /* Misc */
+ FILE *devnull;
+ unsigned lruclock:22; /* clock incrementing every minute, for LRU */
+ unsigned lruclock_padding:10;
+};
+
+typedef struct pubsubPattern {
+ redisClient *client;
+ robj *pattern;
+} pubsubPattern;
+
+typedef void redisCommandProc(redisClient *c);
+typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
+struct redisCommand {
+ char *name;
+ redisCommandProc *proc;
+ int arity;
+ int flags;
+ /* Use a function to determine which keys need to be loaded
+ * in the background prior to executing this command. Takes precedence
+ * over vm_firstkey and others, ignored when NULL */
+ redisVmPreloadProc *vm_preload_proc;
+ /* What keys should be loaded in background when calling this command? */
+ int vm_firstkey; /* The first argument that's a key (0 = no keys) */
+ int vm_lastkey; /* THe last argument that's a key */
+ int vm_keystep; /* The step between first and last key */
+};
+
+struct redisFunctionSym {
+ char *name;
+ unsigned long pointer;
+};
+
+typedef struct _redisSortObject {
+ robj *obj;
+ union {
+ double score;
+ robj *cmpobj;
+ } u;
+} redisSortObject;
+
+typedef struct _redisSortOperation {
+ int type;
+ robj *pattern;
+} redisSortOperation;
+
+/* ZSETs use a specialized version of Skiplists */
+
+typedef struct zskiplistNode {
+ struct zskiplistNode **forward;
+ struct zskiplistNode *backward;
+ unsigned int *span;
+ double score;
+ robj *obj;
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header, *tail;
+ unsigned long length;
+ int level;
+} zskiplist;
+
+typedef struct zset {
+ dict *dict;
+ zskiplist *zsl;
+} zset;
+
+/* VM threaded I/O request message */
+#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
+#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
+#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
+typedef struct iojob {
+ int type; /* Request type, REDIS_IOJOB_* */
+ redisDb *db;/* Redis database */
+ robj *key; /* This I/O request is about swapping this key */
+ robj *id; /* Unique identifier of this job:
+ this is the object to swap for REDIS_IOREQ_*_SWAP, or the
+ vmpointer objct for REDIS_IOREQ_LOAD. */
+ robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
+ * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
+ off_t page; /* Swap page where to read/write the object */
+ off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
+ int canceled; /* True if this command was canceled by blocking side of VM */
+ pthread_t thread; /* ID of the thread processing this entry */
+} iojob;
+
+/* Structure to hold list iteration abstraction. */
+typedef struct {
+ robj *subject;
+ unsigned char encoding;
+ unsigned char direction; /* Iteration direction */
+ unsigned char *zi;
+ listNode *ln;
+} listTypeIterator;
+
+/* Structure for an entry while iterating over a list. */
+typedef struct {
+ listTypeIterator *li;
+ unsigned char *zi; /* Entry in ziplist */
+ listNode *ln; /* Entry in linked list */
+} listTypeEntry;
+
+/* Structure to hold hash iteration abstration. Note that iteration over
+ * hashes involves both fields and values. Because it is possible that
+ * not both are required, store pointers in the iterator to avoid
+ * unnecessary memory allocation for fields/values. */
+typedef struct {
+ int encoding;
+ unsigned char *zi;
+ unsigned char *zk, *zv;
+ unsigned int zklen, zvlen;
+
+ dictIterator *di;
+ dictEntry *de;
+} hashTypeIterator;
+
+#define REDIS_HASH_KEY 1
+#define REDIS_HASH_VALUE 2
+
+/*-----------------------------------------------------------------------------
+ * Extern declarations
+ *----------------------------------------------------------------------------*/
+
+extern struct redisServer server;
+extern struct sharedObjectsStruct shared;
+extern dictType setDictType;
+extern dictType zsetDictType;
+extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
+dictType hashDictType;
+
+/*-----------------------------------------------------------------------------
+ * Functions prototypes
+ *----------------------------------------------------------------------------*/
+
+/* networking.c -- Networking and Client related operations */
+redisClient *createClient(int fd);
+void closeTimedoutClients(void);
+void freeClient(redisClient *c);
+void resetClient(redisClient *c);
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
+void addReply(redisClient *c, robj *obj);
+void addReplySds(redisClient *c, sds s);
+void processInputBuffer(redisClient *c);
+void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void addReplyBulk(redisClient *c, robj *obj);
+void addReplyBulkCString(redisClient *c, char *s);
+void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void addReply(redisClient *c, robj *obj);
+void addReplySds(redisClient *c, sds s);
+void addReplyDouble(redisClient *c, double d);
+void addReplyLongLong(redisClient *c, long long ll);
+void addReplyUlong(redisClient *c, unsigned long ul);
+void *dupClientReplyValue(void *o);
+
+/* List data type */
+void listTypeTryConversion(robj *subject, robj *value);
+void listTypePush(robj *subject, robj *value, int where);
+robj *listTypePop(robj *subject, int where);
+unsigned long listTypeLength(robj *subject);
+listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction);
+void listTypeReleaseIterator(listTypeIterator *li);
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
+robj *listTypeGet(listTypeEntry *entry);
+void listTypeInsert(listTypeEntry *entry, robj *value, int where);
+int listTypeEqual(listTypeEntry *entry, robj *o);
+void listTypeDelete(listTypeEntry *entry);
+void listTypeConvert(robj *subject, int enc);
+void unblockClientWaitingData(redisClient *c);
+int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
+void popGenericCommand(redisClient *c, int where);
+
+/* MULTI/EXEC/WATCH... */
+void unwatchAllKeys(redisClient *c);
+void initClientMultiState(redisClient *c);
+void freeClientMultiState(redisClient *c);
+void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
+void touchWatchedKey(redisDb *db, robj *key);
+void touchWatchedKeysOnFlush(int dbid);
+
+/* Redis object implementation */
+void decrRefCount(void *o);
+void incrRefCount(robj *o);
+void freeStringObject(robj *o);
+void freeListObject(robj *o);
+void freeSetObject(robj *o);
+void freeZsetObject(robj *o);
+void freeHashObject(robj *o);
+robj *createObject(int type, void *ptr);
+robj *createStringObject(char *ptr, size_t len);
+robj *dupStringObject(robj *o);
+robj *tryObjectEncoding(robj *o);
+robj *getDecodedObject(robj *o);
+size_t stringObjectLen(robj *o);
+int tryFreeOneObjectFromFreelist(void);
+robj *createStringObjectFromLongLong(long long value);
+robj *createListObject(void);
+robj *createZiplistObject(void);
+robj *createSetObject(void);
+robj *createHashObject(void);
+robj *createZsetObject(void);
+int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg);
+int checkType(redisClient *c, robj *o, int type);
+int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg);
+int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg);
+int getLongLongFromObject(robj *o, long long *target);
+char *strEncoding(int encoding);
+int compareStringObjects(robj *a, robj *b);
+int equalStringObjects(robj *a, robj *b);
+
+/* Replication */
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
+void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
+int syncWithMaster(void);
+void updateSlavesWaitingBgsave(int bgsaveerr);
+
+/* RDB persistence */
+int rdbLoad(char *filename);
+int rdbSaveBackground(char *filename);
+void rdbRemoveTempFile(pid_t childpid);
+int rdbSave(char *filename);
+int rdbSaveObject(FILE *fp, robj *o);
+off_t rdbSavedObjectPages(robj *o, FILE *fp);
+off_t rdbSavedObjectLen(robj *o, FILE *fp);
+robj *rdbLoadObject(int type, FILE *fp);
+void backgroundSaveDoneHandler(int statloc);
+
+/* AOF persistence */
+void flushAppendOnlyFile(void);
+void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
+void aofRemoveTempFile(pid_t childpid);
+int rewriteAppendOnlyFileBackground(void);
+int loadAppendOnlyFile(char *filename);
+void stopAppendOnly(void);
+int startAppendOnly(void);
+void backgroundRewriteDoneHandler(int statloc);
+
+/* Sorted sets data type */
+zskiplist *zslCreate(void);
+void zslFree(zskiplist *zsl);
+void zslInsert(zskiplist *zsl, double score, robj *obj);
+
+/* Core functions */
+void freeMemoryIfNeeded(void);
+int processCommand(redisClient *c);
+void setupSigSegvAction(void);
+struct redisCommand *lookupCommand(char *name);
+void call(redisClient *c, struct redisCommand *cmd);
+int prepareForShutdown();
+void redisLog(int level, const char *fmt, ...);
+void usage();
+void updateDictResizePolicy(void);
+int htNeedsResize(dict *dict);
+void oom(const char *msg);
+
+/* Virtual Memory */
+void vmInit(void);
+void vmMarkPagesFree(off_t page, off_t count);
+robj *vmLoadObject(robj *o);
+robj *vmPreviewObject(robj *o);
+int vmSwapOneObjectBlocking(void);
+int vmSwapOneObjectThreaded(void);
+int vmCanSwapOut(void);
+void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
+void vmCancelThreadedIOJob(robj *o);
+void lockThreadedIO(void);
+void unlockThreadedIO(void);
+int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
+void freeIOJob(iojob *j);
+void queueIOJob(iojob *j);
+int vmWriteObjectOnSwap(robj *o, off_t page);
+robj *vmReadObjectFromSwap(off_t page, int type);
+void waitEmptyIOJobsQueue(void);
+void vmReopenSwapFile(void);
+int vmFreePage(off_t page);
+void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
+void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
+int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
+int dontWaitForSwappedKey(redisClient *c, robj *key);
+void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
+vmpointer *vmSwapObjectBlocking(robj *val);
+
+/* Hash data type */
+void convertToRealHash(robj *o);
+void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
+void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
+robj *hashTypeGet(robj *o, robj *key);
+int hashTypeExists(robj *o, robj *key);
+int hashTypeSet(robj *o, robj *key, robj *value);
+int hashTypeDelete(robj *o, robj *key);
+unsigned long hashTypeLength(robj *o);
+hashTypeIterator *hashTypeInitIterator(robj *subject);
+void hashTypeReleaseIterator(hashTypeIterator *hi);
+int hashTypeNext(hashTypeIterator *hi);
+robj *hashTypeCurrent(hashTypeIterator *hi, int what);
+robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
+
+/* Pub / Sub */
+int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
+int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
+void freePubsubPattern(void *p);
+int listMatchPubsubPattern(void *a, void *b);
+
+/* Utility functions */
+int stringmatchlen(const char *pattern, int patternLen,
+ const char *string, int stringLen, int nocase);
+int stringmatch(const char *pattern, const char *string, int nocase);
+long long memtoll(const char *p, int *err);
+int ll2string(char *s, size_t len, long long value);
+int isStringRepresentableAsLong(sds s, long *longval);
+
+/* Configuration */
+void loadServerConfig(char *filename);
+void appendServerSaveParams(time_t seconds, int changes);
+void resetServerSaveParams();
+
+/* db.c -- Keyspace access API */
+int removeExpire(redisDb *db, robj *key);
+int expireIfNeeded(redisDb *db, robj *key);
+int deleteIfVolatile(redisDb *db, robj *key);
+time_t getExpire(redisDb *db, robj *key);
+int setExpire(redisDb *db, robj *key, time_t when);
+robj *lookupKey(redisDb *db, robj *key);
+robj *lookupKeyRead(redisDb *db, robj *key);
+robj *lookupKeyWrite(redisDb *db, robj *key);
+robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply);
+robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply);
+int dbAdd(redisDb *db, robj *key, robj *val);
+int dbReplace(redisDb *db, robj *key, robj *val);
+int dbExists(redisDb *db, robj *key);
+robj *dbRandomKey(redisDb *db);
+int dbDelete(redisDb *db, robj *key);
+long long emptyDb();
+int selectDb(redisClient *c, int id);
+
+/* Git SHA1 */
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+
+/* Commands prototypes */
+void authCommand(redisClient *c);
+void pingCommand(redisClient *c);
+void echoCommand(redisClient *c);
+void setCommand(redisClient *c);
+void setnxCommand(redisClient *c);
+void setexCommand(redisClient *c);
+void getCommand(redisClient *c);
+void delCommand(redisClient *c);
+void existsCommand(redisClient *c);
+void incrCommand(redisClient *c);
+void decrCommand(redisClient *c);
+void incrbyCommand(redisClient *c);
+void decrbyCommand(redisClient *c);
+void selectCommand(redisClient *c);
+void randomkeyCommand(redisClient *c);
+void keysCommand(redisClient *c);
+void dbsizeCommand(redisClient *c);
+void lastsaveCommand(redisClient *c);
+void saveCommand(redisClient *c);
+void bgsaveCommand(redisClient *c);
+void bgrewriteaofCommand(redisClient *c);
+void shutdownCommand(redisClient *c);
+void moveCommand(redisClient *c);
+void renameCommand(redisClient *c);
+void renamenxCommand(redisClient *c);
+void lpushCommand(redisClient *c);
+void rpushCommand(redisClient *c);
+void lpushxCommand(redisClient *c);
+void rpushxCommand(redisClient *c);
+void linsertCommand(redisClient *c);
+void lpopCommand(redisClient *c);
+void rpopCommand(redisClient *c);
+void llenCommand(redisClient *c);
+void lindexCommand(redisClient *c);
+void lrangeCommand(redisClient *c);
+void ltrimCommand(redisClient *c);
+void typeCommand(redisClient *c);
+void lsetCommand(redisClient *c);
+void saddCommand(redisClient *c);
+void sremCommand(redisClient *c);
+void smoveCommand(redisClient *c);
+void sismemberCommand(redisClient *c);
+void scardCommand(redisClient *c);
+void spopCommand(redisClient *c);
+void srandmemberCommand(redisClient *c);
+void sinterCommand(redisClient *c);
+void sinterstoreCommand(redisClient *c);
+void sunionCommand(redisClient *c);
+void sunionstoreCommand(redisClient *c);
+void sdiffCommand(redisClient *c);
+void sdiffstoreCommand(redisClient *c);
+void syncCommand(redisClient *c);
+void flushdbCommand(redisClient *c);
+void flushallCommand(redisClient *c);
+void sortCommand(redisClient *c);
+void lremCommand(redisClient *c);
+void rpoplpushcommand(redisClient *c);
+void infoCommand(redisClient *c);
+void mgetCommand(redisClient *c);
+void monitorCommand(redisClient *c);
+void expireCommand(redisClient *c);
+void expireatCommand(redisClient *c);
+void getsetCommand(redisClient *c);
+void ttlCommand(redisClient *c);
+void slaveofCommand(redisClient *c);
+void debugCommand(redisClient *c);
+void msetCommand(redisClient *c);
+void msetnxCommand(redisClient *c);
+void zaddCommand(redisClient *c);
+void zincrbyCommand(redisClient *c);
+void zrangeCommand(redisClient *c);
+void zrangebyscoreCommand(redisClient *c);
+void zcountCommand(redisClient *c);
+void zrevrangeCommand(redisClient *c);
+void zcardCommand(redisClient *c);
+void zremCommand(redisClient *c);
+void zscoreCommand(redisClient *c);
+void zremrangebyscoreCommand(redisClient *c);
+void multiCommand(redisClient *c);
+void execCommand(redisClient *c);
+void discardCommand(redisClient *c);
+void blpopCommand(redisClient *c);
+void brpopCommand(redisClient *c);
+void appendCommand(redisClient *c);
+void substrCommand(redisClient *c);
+void zrankCommand(redisClient *c);
+void zrevrankCommand(redisClient *c);
+void hsetCommand(redisClient *c);
+void hsetnxCommand(redisClient *c);
+void hgetCommand(redisClient *c);
+void hmsetCommand(redisClient *c);
+void hmgetCommand(redisClient *c);
+void hdelCommand(redisClient *c);
+void hlenCommand(redisClient *c);
+void zremrangebyrankCommand(redisClient *c);
+void zunionstoreCommand(redisClient *c);
+void zinterstoreCommand(redisClient *c);
+void hkeysCommand(redisClient *c);
+void hvalsCommand(redisClient *c);
+void hgetallCommand(redisClient *c);
+void hexistsCommand(redisClient *c);
+void configCommand(redisClient *c);
+void hincrbyCommand(redisClient *c);
+void subscribeCommand(redisClient *c);
+void unsubscribeCommand(redisClient *c);
+void psubscribeCommand(redisClient *c);
+void punsubscribeCommand(redisClient *c);
+void publishCommand(redisClient *c);
+void watchCommand(redisClient *c);
+void unwatchCommand(redisClient *c);
+
+#endif
diff --git a/release.c b/src/release.c
index 64186ec4e..64186ec4e 100644
--- a/release.c
+++ b/src/release.c
diff --git a/src/replication.c b/src/replication.c
new file mode 100644
index 000000000..ecb04ce1a
--- /dev/null
+++ b/src/replication.c
@@ -0,0 +1,475 @@
+#include "redis.h"
+
+#include <sys/time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
+ listNode *ln;
+ listIter li;
+ int outc = 0, j;
+ robj **outv;
+ /* We need 1+(ARGS*3) objects since commands are using the new protocol
+ * and we one 1 object for the first "*<count>\r\n" multibulk count, then
+ * for every additional object we have "$<count>\r\n" + object + "\r\n". */
+ robj *static_outv[REDIS_STATIC_ARGS*3+1];
+ robj *lenobj;
+
+ if (argc <= REDIS_STATIC_ARGS) {
+ outv = static_outv;
+ } else {
+ outv = zmalloc(sizeof(robj*)*(argc*3+1));
+ }
+
+ lenobj = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(), "*%d\r\n", argc));
+ lenobj->refcount = 0;
+ outv[outc++] = lenobj;
+ for (j = 0; j < argc; j++) {
+ lenobj = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"$%lu\r\n",
+ (unsigned long) stringObjectLen(argv[j])));
+ lenobj->refcount = 0;
+ outv[outc++] = lenobj;
+ outv[outc++] = argv[j];
+ outv[outc++] = shared.crlf;
+ }
+
+ /* Increment all the refcounts at start and decrement at end in order to
+ * be sure to free objects if there is no slave in a replication state
+ * able to be feed with commands */
+ for (j = 0; j < outc; j++) incrRefCount(outv[j]);
+ listRewind(slaves,&li);
+ while((ln = listNext(&li))) {
+ redisClient *slave = ln->value;
+
+ /* Don't feed slaves that are still waiting for BGSAVE to start */
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
+
+ /* Feed all the other slaves, MONITORs and so on */
+ if (slave->slaveseldb != dictid) {
+ robj *selectcmd;
+
+ switch(dictid) {
+ case 0: selectcmd = shared.select0; break;
+ case 1: selectcmd = shared.select1; break;
+ case 2: selectcmd = shared.select2; break;
+ case 3: selectcmd = shared.select3; break;
+ case 4: selectcmd = shared.select4; break;
+ case 5: selectcmd = shared.select5; break;
+ case 6: selectcmd = shared.select6; break;
+ case 7: selectcmd = shared.select7; break;
+ case 8: selectcmd = shared.select8; break;
+ case 9: selectcmd = shared.select9; break;
+ default:
+ selectcmd = createObject(REDIS_STRING,
+ sdscatprintf(sdsempty(),"select %d\r\n",dictid));
+ selectcmd->refcount = 0;
+ break;
+ }
+ addReply(slave,selectcmd);
+ slave->slaveseldb = dictid;
+ }
+ for (j = 0; j < outc; j++) addReply(slave,outv[j]);
+ }
+ for (j = 0; j < outc; j++) decrRefCount(outv[j]);
+ if (outv != static_outv) zfree(outv);
+}
+
+void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) {
+ listNode *ln;
+ listIter li;
+ int j;
+ sds cmdrepr = sdsnew("+");
+ robj *cmdobj;
+ struct timeval tv;
+
+ gettimeofday(&tv,NULL);
+ cmdrepr = sdscatprintf(cmdrepr,"%ld.%ld ",(long)tv.tv_sec,(long)tv.tv_usec);
+ if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid);
+
+ for (j = 0; j < argc; j++) {
+ if (argv[j]->encoding == REDIS_ENCODING_INT) {
+ cmdrepr = sdscatprintf(cmdrepr, "%ld", (long)argv[j]->ptr);
+ } else {
+ cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
+ sdslen(argv[j]->ptr));
+ }
+ if (j != argc-1)
+ cmdrepr = sdscatlen(cmdrepr," ",1);
+ }
+ cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
+ cmdobj = createObject(REDIS_STRING,cmdrepr);
+
+ listRewind(monitors,&li);
+ while((ln = listNext(&li))) {
+ redisClient *monitor = ln->value;
+ addReply(monitor,cmdobj);
+ }
+ decrRefCount(cmdobj);
+}
+
+int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nwritten, ret = size;
+ time_t start = time(NULL);
+
+ timeout++;
+ while(size) {
+ if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
+ nwritten = write(fd,ptr,size);
+ if (nwritten == -1) return -1;
+ ptr += nwritten;
+ size -= nwritten;
+ }
+ if ((time(NULL)-start) > timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ }
+ return ret;
+}
+
+int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nread, totread = 0;
+ time_t start = time(NULL);
+
+ timeout++;
+ while(size) {
+ if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
+ nread = read(fd,ptr,size);
+ if (nread == -1) return -1;
+ ptr += nread;
+ size -= nread;
+ totread += nread;
+ }
+ if ((time(NULL)-start) > timeout) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ }
+ return totread;
+}
+
+int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
+ ssize_t nread = 0;
+
+ size--;
+ while(size) {
+ char c;
+
+ if (syncRead(fd,&c,1,timeout) == -1) return -1;
+ if (c == '\n') {
+ *ptr = '\0';
+ if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
+ return nread;
+ } else {
+ *ptr++ = c;
+ *ptr = '\0';
+ nread++;
+ }
+ }
+ return nread;
+}
+
+void syncCommand(redisClient *c) {
+ /* ignore SYNC if aleady slave or in monitor mode */
+ if (c->flags & REDIS_SLAVE) return;
+
+ /* SYNC can't be issued when the server has pending data to send to
+ * the client about already issued commands. We need a fresh reply
+ * buffer registering the differences between the BGSAVE and the current
+ * dataset, so that we can copy to other slaves if needed. */
+ if (listLength(c->reply) != 0) {
+ addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
+ return;
+ }
+
+ redisLog(REDIS_NOTICE,"Slave ask for synchronization");
+ /* Here we need to check if there is a background saving operation
+ * in progress, or if it is required to start one */
+ if (server.bgsavechildpid != -1) {
+ /* Ok a background save is in progress. Let's check if it is a good
+ * one for replication, i.e. if there is another slave that is
+ * registering differences since the server forked to save */
+ redisClient *slave;
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ slave = ln->value;
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
+ }
+ if (ln) {
+ /* Perfect, the server is already registering differences for
+ * another slave. Set the right state, and copy the buffer. */
+ listRelease(c->reply);
+ c->reply = listDup(slave->reply);
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
+ } else {
+ /* No way, we need to wait for the next BGSAVE in order to
+ * register differences */
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
+ }
+ } else {
+ /* Ok we don't have a BGSAVE in progress, let's start one */
+ redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
+ if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
+ redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
+ addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
+ return;
+ }
+ c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ }
+ c->repldbfd = -1;
+ c->flags |= REDIS_SLAVE;
+ c->slaveseldb = 0;
+ listAddNodeTail(server.slaves,c);
+ return;
+}
+
+void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
+ redisClient *slave = privdata;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+ char buf[REDIS_IOBUF_LEN];
+ ssize_t nwritten, buflen;
+
+ if (slave->repldboff == 0) {
+ /* Write the bulk write count before to transfer the DB. In theory here
+ * we don't know how much room there is in the output buffer of the
+ * socket, but in pratice SO_SNDLOWAT (the minimum count for output
+ * operations) will never be smaller than the few bytes we need. */
+ sds bulkcount;
+
+ bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
+ slave->repldbsize);
+ if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
+ {
+ sdsfree(bulkcount);
+ freeClient(slave);
+ return;
+ }
+ sdsfree(bulkcount);
+ }
+ lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
+ buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
+ if (buflen <= 0) {
+ redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
+ (buflen == 0) ? "premature EOF" : strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ if ((nwritten = write(fd,buf,buflen)) == -1) {
+ redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
+ strerror(errno));
+ freeClient(slave);
+ return;
+ }
+ slave->repldboff += nwritten;
+ if (slave->repldboff == slave->repldbsize) {
+ close(slave->repldbfd);
+ slave->repldbfd = -1;
+ aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ slave->replstate = REDIS_REPL_ONLINE;
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
+ sendReplyToClient, slave) == AE_ERR) {
+ freeClient(slave);
+ return;
+ }
+ addReplySds(slave,sdsempty());
+ redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
+ }
+}
+
+/* This function is called at the end of every backgrond saving.
+ * The argument bgsaveerr is REDIS_OK if the background saving succeeded
+ * otherwise REDIS_ERR is passed to the function.
+ *
+ * The goal of this function is to handle slaves waiting for a successful
+ * background saving in order to perform non-blocking synchronization. */
+void updateSlavesWaitingBgsave(int bgsaveerr) {
+ listNode *ln;
+ int startbgsave = 0;
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ redisClient *slave = ln->value;
+
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ startbgsave = 1;
+ slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
+ } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
+ struct redis_stat buf;
+
+ if (bgsaveerr != REDIS_OK) {
+ freeClient(slave);
+ redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
+ continue;
+ }
+ if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
+ redis_fstat(slave->repldbfd,&buf) == -1) {
+ freeClient(slave);
+ redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
+ continue;
+ }
+ slave->repldboff = 0;
+ slave->repldbsize = buf.st_size;
+ slave->replstate = REDIS_REPL_SEND_BULK;
+ aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
+ freeClient(slave);
+ continue;
+ }
+ }
+ }
+ if (startbgsave) {
+ if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
+ listIter li;
+
+ listRewind(server.slaves,&li);
+ redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
+ while((ln = listNext(&li))) {
+ redisClient *slave = ln->value;
+
+ if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
+ freeClient(slave);
+ }
+ }
+ }
+}
+
+int syncWithMaster(void) {
+ char buf[1024], tmpfile[256], authcmd[1024];
+ long dumpsize;
+ int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
+ int dfd, maxtries = 5;
+
+ if (fd == -1) {
+ redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+
+ /* AUTH with the master if required. */
+ if(server.masterauth) {
+ snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
+ if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ /* Read the AUTH result. */
+ if (syncReadLine(fd,buf,1024,3600) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ if (buf[0] != '+') {
+ close(fd);
+ redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
+ return REDIS_ERR;
+ }
+ }
+
+ /* Issue the SYNC command */
+ if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ /* Read the bulk write count */
+ if (syncReadLine(fd,buf,1024,3600) == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ if (buf[0] != '$') {
+ close(fd);
+ redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
+ return REDIS_ERR;
+ }
+ dumpsize = strtol(buf+1,NULL,10);
+ redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
+ /* Read the bulk write data on a temp file */
+ while(maxtries--) {
+ snprintf(tmpfile,256,
+ "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
+ dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
+ if (dfd != -1) break;
+ sleep(1);
+ }
+ if (dfd == -1) {
+ close(fd);
+ redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
+ return REDIS_ERR;
+ }
+ while(dumpsize) {
+ int nread, nwritten;
+
+ nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
+ if (nread == -1) {
+ redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
+ strerror(errno));
+ close(fd);
+ close(dfd);
+ return REDIS_ERR;
+ }
+ nwritten = write(dfd,buf,nread);
+ if (nwritten == -1) {
+ redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
+ close(fd);
+ close(dfd);
+ return REDIS_ERR;
+ }
+ dumpsize -= nread;
+ }
+ close(dfd);
+ if (rename(tmpfile,server.dbfilename) == -1) {
+ redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ unlink(tmpfile);
+ close(fd);
+ return REDIS_ERR;
+ }
+ emptyDb();
+ if (rdbLoad(server.dbfilename) != REDIS_OK) {
+ redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
+ close(fd);
+ return REDIS_ERR;
+ }
+ server.master = createClient(fd);
+ server.master->flags |= REDIS_MASTER;
+ server.master->authenticated = 1;
+ server.replstate = REDIS_REPL_CONNECTED;
+ return REDIS_OK;
+}
+
+void slaveofCommand(redisClient *c) {
+ if (!strcasecmp(c->argv[1]->ptr,"no") &&
+ !strcasecmp(c->argv[2]->ptr,"one")) {
+ if (server.masterhost) {
+ sdsfree(server.masterhost);
+ server.masterhost = NULL;
+ if (server.master) freeClient(server.master);
+ server.replstate = REDIS_REPL_NONE;
+ redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
+ }
+ } else {
+ sdsfree(server.masterhost);
+ server.masterhost = sdsdup(c->argv[1]->ptr);
+ server.masterport = atoi(c->argv[2]->ptr);
+ if (server.master) freeClient(server.master);
+ server.replstate = REDIS_REPL_CONNECT;
+ redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
+ server.masterhost, server.masterport);
+ }
+ addReply(c,shared.ok);
+}
diff --git a/sds.c b/src/sds.c
index feb1a6212..5e67f0443 100644
--- a/sds.c
+++ b/src/sds.c
@@ -201,7 +201,7 @@ sds sdstrim(sds s, const char *cset) {
return s;
}
-sds sdsrange(sds s, long start, long end) {
+sds sdsrange(sds s, int start, int end) {
struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
@@ -357,3 +357,28 @@ sds sdsfromlonglong(long long value) {
p++;
return sdsnewlen(p,32-(p-buf));
}
+
+sds sdscatrepr(sds s, char *p, size_t len) {
+ s = sdscatlen(s,"\"",1);
+ while(len--) {
+ switch(*p) {
+ case '\\':
+ case '"':
+ s = sdscatprintf(s,"\\%c",*p);
+ break;
+ case '\n': s = sdscatlen(s,"\\n",1); break;
+ case '\r': s = sdscatlen(s,"\\r",1); break;
+ case '\t': s = sdscatlen(s,"\\t",1); break;
+ case '\a': s = sdscatlen(s,"\\a",1); break;
+ case '\b': s = sdscatlen(s,"\\b",1); break;
+ default:
+ if (isprint(*p))
+ s = sdscatprintf(s,"%c",*p);
+ else
+ s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
+ break;
+ }
+ p++;
+ }
+ return sdscatlen(s,"\"",1);
+}
diff --git a/sds.h b/src/sds.h
index 8b632ff92..ef3a418f2 100644
--- a/sds.h
+++ b/src/sds.h
@@ -36,8 +36,8 @@
typedef char *sds;
struct sdshdr {
- long len;
- long free;
+ int len;
+ int free;
char buf[];
};
@@ -61,7 +61,7 @@ sds sdscatprintf(sds s, const char *fmt, ...);
#endif
sds sdstrim(sds s, const char *cset);
-sds sdsrange(sds s, long start, long end);
+sds sdsrange(sds s, int start, int end);
void sdsupdatelen(sds s);
int sdscmp(sds s1, sds s2);
sds *sdssplitlen(char *s, int len, char *sep, int seplen, int *count);
@@ -69,5 +69,6 @@ void sdsfreesplitres(sds *tokens, int count);
void sdstolower(sds s);
void sdstoupper(sds s);
sds sdsfromlonglong(long long value);
+sds sdscatrepr(sds s, char *p, size_t len);
#endif
diff --git a/sha1.c b/src/sha1.c
index 2c50433e8..2c50433e8 100644
--- a/sha1.c
+++ b/src/sha1.c
diff --git a/sha1.h b/src/sha1.h
index 9d6f12965..9d6f12965 100644
--- a/sha1.h
+++ b/src/sha1.h
diff --git a/solarisfixes.h b/src/solarisfixes.h
index ce8e7b6fd..ce8e7b6fd 100644
--- a/solarisfixes.h
+++ b/src/solarisfixes.h
diff --git a/src/sort.c b/src/sort.c
new file mode 100644
index 000000000..0bc86b474
--- /dev/null
+++ b/src/sort.c
@@ -0,0 +1,383 @@
+#include "redis.h"
+#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
+
+redisSortOperation *createSortOperation(int type, robj *pattern) {
+ redisSortOperation *so = zmalloc(sizeof(*so));
+ so->type = type;
+ so->pattern = pattern;
+ return so;
+}
+
+/* Return the value associated to the key with a name obtained
+ * substituting the first occurence of '*' in 'pattern' with 'subst'.
+ * The returned object will always have its refcount increased by 1
+ * when it is non-NULL. */
+robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
+ char *p, *f;
+ sds spat, ssub;
+ robj keyobj, fieldobj, *o;
+ int prefixlen, sublen, postfixlen, fieldlen;
+ /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
+ struct {
+ int len;
+ int free;
+ char buf[REDIS_SORTKEY_MAX+1];
+ } keyname, fieldname;
+
+ /* If the pattern is "#" return the substitution object itself in order
+ * to implement the "SORT ... GET #" feature. */
+ spat = pattern->ptr;
+ if (spat[0] == '#' && spat[1] == '\0') {
+ incrRefCount(subst);
+ return subst;
+ }
+
+ /* The substitution object may be specially encoded. If so we create
+ * a decoded object on the fly. Otherwise getDecodedObject will just
+ * increment the ref count, that we'll decrement later. */
+ subst = getDecodedObject(subst);
+
+ ssub = subst->ptr;
+ if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
+ p = strchr(spat,'*');
+ if (!p) {
+ decrRefCount(subst);
+ return NULL;
+ }
+
+ /* Find out if we're dealing with a hash dereference. */
+ if ((f = strstr(p+1, "->")) != NULL) {
+ fieldlen = sdslen(spat)-(f-spat);
+ /* this also copies \0 character */
+ memcpy(fieldname.buf,f+2,fieldlen-1);
+ fieldname.len = fieldlen-2;
+ } else {
+ fieldlen = 0;
+ }
+
+ prefixlen = p-spat;
+ sublen = sdslen(ssub);
+ postfixlen = sdslen(spat)-(prefixlen+1)-fieldlen;
+ memcpy(keyname.buf,spat,prefixlen);
+ memcpy(keyname.buf+prefixlen,ssub,sublen);
+ memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
+ keyname.buf[prefixlen+sublen+postfixlen] = '\0';
+ keyname.len = prefixlen+sublen+postfixlen;
+ decrRefCount(subst);
+
+ /* Lookup substituted key */
+ initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(struct sdshdr)));
+ o = lookupKeyRead(db,&keyobj);
+ if (o == NULL) return NULL;
+
+ if (fieldlen > 0) {
+ if (o->type != REDIS_HASH || fieldname.len < 1) return NULL;
+
+ /* Retrieve value from hash by the field name. This operation
+ * already increases the refcount of the returned object. */
+ initStaticStringObject(fieldobj,((char*)&fieldname)+(sizeof(struct sdshdr)));
+ o = hashTypeGet(o, &fieldobj);
+ } else {
+ if (o->type != REDIS_STRING) return NULL;
+
+ /* Every object that this function returns needs to have its refcount
+ * increased. sortCommand decreases it again. */
+ incrRefCount(o);
+ }
+
+ return o;
+}
+
+/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
+ * the additional parameter is not standard but a BSD-specific we have to
+ * pass sorting parameters via the global 'server' structure */
+int sortCompare(const void *s1, const void *s2) {
+ const redisSortObject *so1 = s1, *so2 = s2;
+ int cmp;
+
+ if (!server.sort_alpha) {
+ /* Numeric sorting. Here it's trivial as we precomputed scores */
+ if (so1->u.score > so2->u.score) {
+ cmp = 1;
+ } else if (so1->u.score < so2->u.score) {
+ cmp = -1;
+ } else {
+ cmp = 0;
+ }
+ } else {
+ /* Alphanumeric sorting */
+ if (server.sort_bypattern) {
+ if (!so1->u.cmpobj || !so2->u.cmpobj) {
+ /* At least one compare object is NULL */
+ if (so1->u.cmpobj == so2->u.cmpobj)
+ cmp = 0;
+ else if (so1->u.cmpobj == NULL)
+ cmp = -1;
+ else
+ cmp = 1;
+ } else {
+ /* We have both the objects, use strcoll */
+ cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
+ }
+ } else {
+ /* Compare elements directly. */
+ cmp = compareStringObjects(so1->obj,so2->obj);
+ }
+ }
+ return server.sort_desc ? -cmp : cmp;
+}
+
+/* The SORT command is the most complex command in Redis. Warning: this code
+ * is optimized for speed and a bit less for readability */
+void sortCommand(redisClient *c) {
+ list *operations;
+ unsigned int outputlen = 0;
+ int desc = 0, alpha = 0;
+ int limit_start = 0, limit_count = -1, start, end;
+ int j, dontsort = 0, vectorlen;
+ int getop = 0; /* GET operation counter */
+ robj *sortval, *sortby = NULL, *storekey = NULL;
+ redisSortObject *vector; /* Resulting vector to sort */
+
+ /* Lookup the key to sort. It must be of the right types */
+ sortval = lookupKeyRead(c->db,c->argv[1]);
+ if (sortval == NULL) {
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+ if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
+ sortval->type != REDIS_ZSET)
+ {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+
+ /* Create a list of operations to perform for every sorted element.
+ * Operations can be GET/DEL/INCR/DECR */
+ operations = listCreate();
+ listSetFreeMethod(operations,zfree);
+ j = 2;
+
+ /* Now we need to protect sortval incrementing its count, in the future
+ * SORT may have options able to overwrite/delete keys during the sorting
+ * and the sorted key itself may get destroied */
+ incrRefCount(sortval);
+
+ /* The SORT command has an SQL-alike syntax, parse it */
+ while(j < c->argc) {
+ int leftargs = c->argc-j-1;
+ if (!strcasecmp(c->argv[j]->ptr,"asc")) {
+ desc = 0;
+ } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
+ desc = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
+ alpha = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
+ limit_start = atoi(c->argv[j+1]->ptr);
+ limit_count = atoi(c->argv[j+2]->ptr);
+ j+=2;
+ } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
+ storekey = c->argv[j+1];
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
+ sortby = c->argv[j+1];
+ /* If the BY pattern does not contain '*', i.e. it is constant,
+ * we don't need to sort nor to lookup the weight keys. */
+ if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
+ j++;
+ } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
+ listAddNodeTail(operations,createSortOperation(
+ REDIS_SORT_GET,c->argv[j+1]));
+ getop++;
+ j++;
+ } else {
+ decrRefCount(sortval);
+ listRelease(operations);
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ }
+
+ /* Load the sorting vector with all the objects to sort */
+ switch(sortval->type) {
+ case REDIS_LIST: vectorlen = listTypeLength(sortval); break;
+ case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
+ case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
+ default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
+ }
+ vector = zmalloc(sizeof(redisSortObject)*vectorlen);
+ j = 0;
+
+ if (sortval->type == REDIS_LIST) {
+ listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL);
+ listTypeEntry entry;
+ while(listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ } else {
+ dict *set;
+ dictIterator *di;
+ dictEntry *setele;
+
+ if (sortval->type == REDIS_SET) {
+ set = sortval->ptr;
+ } else {
+ zset *zs = sortval->ptr;
+ set = zs->dict;
+ }
+
+ di = dictGetIterator(set);
+ while((setele = dictNext(di)) != NULL) {
+ vector[j].obj = dictGetEntryKey(setele);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ dictReleaseIterator(di);
+ }
+ redisAssert(j == vectorlen);
+
+ /* Now it's time to load the right scores in the sorting vector */
+ if (dontsort == 0) {
+ for (j = 0; j < vectorlen; j++) {
+ robj *byval;
+ if (sortby) {
+ /* lookup value to sort by */
+ byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
+ if (!byval) continue;
+ } else {
+ /* use object itself to sort by */
+ byval = vector[j].obj;
+ }
+
+ if (alpha) {
+ if (sortby) vector[j].u.cmpobj = getDecodedObject(byval);
+ } else {
+ if (byval->encoding == REDIS_ENCODING_RAW) {
+ vector[j].u.score = strtod(byval->ptr,NULL);
+ } else if (byval->encoding == REDIS_ENCODING_INT) {
+ /* Don't need to decode the object if it's
+ * integer-encoded (the only encoding supported) so
+ * far. We can just cast it */
+ vector[j].u.score = (long)byval->ptr;
+ } else {
+ redisAssert(1 != 1);
+ }
+ }
+
+ /* when the object was retrieved using lookupKeyByPattern,
+ * its refcount needs to be decreased. */
+ if (sortby) {
+ decrRefCount(byval);
+ }
+ }
+ }
+
+ /* We are ready to sort the vector... perform a bit of sanity check
+ * on the LIMIT option too. We'll use a partial version of quicksort. */
+ start = (limit_start < 0) ? 0 : limit_start;
+ end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
+ if (start >= vectorlen) {
+ start = vectorlen-1;
+ end = vectorlen-2;
+ }
+ if (end >= vectorlen) end = vectorlen-1;
+
+ if (dontsort == 0) {
+ server.sort_desc = desc;
+ server.sort_alpha = alpha;
+ server.sort_bypattern = sortby ? 1 : 0;
+ if (sortby && (start != 0 || end != vectorlen-1))
+ pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
+ else
+ qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
+ }
+
+ /* Send command output to the output buffer, performing the specified
+ * GET/DEL/INCR/DECR operations if any. */
+ outputlen = getop ? getop*(end-start+1) : end-start+1;
+ if (storekey == NULL) {
+ /* STORE option not specified, sent the sorting result to client */
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ listIter li;
+
+ if (!getop) addReplyBulk(c,vector[j].obj);
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == REDIS_SORT_GET) {
+ if (!val) {
+ addReply(c,shared.nullbulk);
+ } else {
+ addReplyBulk(c,val);
+ decrRefCount(val);
+ }
+ } else {
+ redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
+ }
+ }
+ }
+ } else {
+ robj *sobj = createZiplistObject();
+
+ /* STORE option specified, set the sorting result as a List object */
+ for (j = start; j <= end; j++) {
+ listNode *ln;
+ listIter li;
+
+ if (!getop) {
+ listTypePush(sobj,vector[j].obj,REDIS_TAIL);
+ } else {
+ listRewind(operations,&li);
+ while((ln = listNext(&li))) {
+ redisSortOperation *sop = ln->value;
+ robj *val = lookupKeyByPattern(c->db,sop->pattern,
+ vector[j].obj);
+
+ if (sop->type == REDIS_SORT_GET) {
+ if (!val) val = createStringObject("",0);
+
+ /* listTypePush does an incrRefCount, so we should take care
+ * care of the incremented refcount caused by either
+ * lookupKeyByPattern or createStringObject("",0) */
+ listTypePush(sobj,val,REDIS_TAIL);
+ decrRefCount(val);
+ } else {
+ /* always fails */
+ redisAssert(sop->type == REDIS_SORT_GET);
+ }
+ }
+ }
+ }
+ dbReplace(c->db,storekey,sobj);
+ /* Note: we add 1 because the DB is dirty anyway since even if the
+ * SORT result is empty a new key is set and maybe the old content
+ * replaced. */
+ server.dirty += 1+outputlen;
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
+ }
+
+ /* Cleanup */
+ if (sortval->type == REDIS_LIST)
+ for (j = 0; j < vectorlen; j++)
+ decrRefCount(vector[j].obj);
+ decrRefCount(sortval);
+ listRelease(operations);
+ for (j = 0; j < vectorlen; j++) {
+ if (alpha && vector[j].u.cmpobj)
+ decrRefCount(vector[j].u.cmpobj);
+ }
+ zfree(vector);
+}
+
+
diff --git a/src/t_hash.c b/src/t_hash.c
new file mode 100644
index 000000000..3f5fd6e16
--- /dev/null
+++ b/src/t_hash.c
@@ -0,0 +1,397 @@
+#include "redis.h"
+
+#include <math.h>
+
+/*-----------------------------------------------------------------------------
+ * Hash type API
+ *----------------------------------------------------------------------------*/
+
+/* Check the length of a number of objects to see if we need to convert a
+ * zipmap to a real hash. Note that we only check string encoded objects
+ * as their string length can be queried in constant time. */
+void hashTypeTryConversion(robj *subject, robj **argv, int start, int end) {
+ int i;
+ if (subject->encoding != REDIS_ENCODING_ZIPMAP) return;
+
+ for (i = start; i <= end; i++) {
+ if (argv[i]->encoding == REDIS_ENCODING_RAW &&
+ sdslen(argv[i]->ptr) > server.hash_max_zipmap_value)
+ {
+ convertToRealHash(subject);
+ return;
+ }
+ }
+}
+
+/* Encode given objects in-place when the hash uses a dict. */
+void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) {
+ if (subject->encoding == REDIS_ENCODING_HT) {
+ if (o1) *o1 = tryObjectEncoding(*o1);
+ if (o2) *o2 = tryObjectEncoding(*o2);
+ }
+}
+
+/* Get the value from a hash identified by key. Returns either a string
+ * object or NULL if the value cannot be found. The refcount of the object
+ * is always increased by 1 when the value was found. */
+robj *hashTypeGet(robj *o, robj *key) {
+ robj *value = NULL;
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *v;
+ unsigned int vlen;
+ key = getDecodedObject(key);
+ if (zipmapGet(o->ptr,key->ptr,sdslen(key->ptr),&v,&vlen)) {
+ value = createStringObject((char*)v,vlen);
+ }
+ decrRefCount(key);
+ } else {
+ dictEntry *de = dictFind(o->ptr,key);
+ if (de != NULL) {
+ value = dictGetEntryVal(de);
+ incrRefCount(value);
+ }
+ }
+ return value;
+}
+
+/* Test if the key exists in the given hash. Returns 1 if the key
+ * exists and 0 when it doesn't. */
+int hashTypeExists(robj *o, robj *key) {
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ key = getDecodedObject(key);
+ if (zipmapExists(o->ptr,key->ptr,sdslen(key->ptr))) {
+ decrRefCount(key);
+ return 1;
+ }
+ decrRefCount(key);
+ } else {
+ if (dictFind(o->ptr,key) != NULL) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Add an element, discard the old if the key already exists.
+ * Return 0 on insert and 1 on update. */
+int hashTypeSet(robj *o, robj *key, robj *value) {
+ int update = 0;
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ key = getDecodedObject(key);
+ value = getDecodedObject(value);
+ o->ptr = zipmapSet(o->ptr,
+ key->ptr,sdslen(key->ptr),
+ value->ptr,sdslen(value->ptr), &update);
+ decrRefCount(key);
+ decrRefCount(value);
+
+ /* Check if the zipmap needs to be upgraded to a real hash table */
+ if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries)
+ convertToRealHash(o);
+ } else {
+ if (dictReplace(o->ptr,key,value)) {
+ /* Insert */
+ incrRefCount(key);
+ } else {
+ /* Update */
+ update = 1;
+ }
+ incrRefCount(value);
+ }
+ return update;
+}
+
+/* Delete an element from a hash.
+ * Return 1 on deleted and 0 on not found. */
+int hashTypeDelete(robj *o, robj *key) {
+ int deleted = 0;
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ key = getDecodedObject(key);
+ o->ptr = zipmapDel(o->ptr,key->ptr,sdslen(key->ptr), &deleted);
+ decrRefCount(key);
+ } else {
+ deleted = dictDelete((dict*)o->ptr,key) == DICT_OK;
+ /* Always check if the dictionary needs a resize after a delete. */
+ if (deleted && htNeedsResize(o->ptr)) dictResize(o->ptr);
+ }
+ return deleted;
+}
+
+/* Return the number of elements in a hash. */
+unsigned long hashTypeLength(robj *o) {
+ return (o->encoding == REDIS_ENCODING_ZIPMAP) ?
+ zipmapLen((unsigned char*)o->ptr) : dictSize((dict*)o->ptr);
+}
+
+hashTypeIterator *hashTypeInitIterator(robj *subject) {
+ hashTypeIterator *hi = zmalloc(sizeof(hashTypeIterator));
+ hi->encoding = subject->encoding;
+ if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
+ hi->zi = zipmapRewind(subject->ptr);
+ } else if (hi->encoding == REDIS_ENCODING_HT) {
+ hi->di = dictGetIterator(subject->ptr);
+ } else {
+ redisAssert(NULL);
+ }
+ return hi;
+}
+
+void hashTypeReleaseIterator(hashTypeIterator *hi) {
+ if (hi->encoding == REDIS_ENCODING_HT) {
+ dictReleaseIterator(hi->di);
+ }
+ zfree(hi);
+}
+
+/* Move to the next entry in the hash. Return REDIS_OK when the next entry
+ * could be found and REDIS_ERR when the iterator reaches the end. */
+int hashTypeNext(hashTypeIterator *hi) {
+ if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
+ if ((hi->zi = zipmapNext(hi->zi, &hi->zk, &hi->zklen,
+ &hi->zv, &hi->zvlen)) == NULL) return REDIS_ERR;
+ } else {
+ if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR;
+ }
+ return REDIS_OK;
+}
+
+/* Get key or value object at current iteration position.
+ * This increases the refcount of the field object by 1. */
+robj *hashTypeCurrent(hashTypeIterator *hi, int what) {
+ robj *o;
+ if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
+ if (what & REDIS_HASH_KEY) {
+ o = createStringObject((char*)hi->zk,hi->zklen);
+ } else {
+ o = createStringObject((char*)hi->zv,hi->zvlen);
+ }
+ } else {
+ if (what & REDIS_HASH_KEY) {
+ o = dictGetEntryKey(hi->de);
+ } else {
+ o = dictGetEntryVal(hi->de);
+ }
+ incrRefCount(o);
+ }
+ return o;
+}
+
+robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
+ robj *o = lookupKeyWrite(c->db,key);
+ if (o == NULL) {
+ o = createHashObject();
+ dbAdd(c->db,key,o);
+ } else {
+ if (o->type != REDIS_HASH) {
+ addReply(c,shared.wrongtypeerr);
+ return NULL;
+ }
+ }
+ return o;
+}
+
+void convertToRealHash(robj *o) {
+ unsigned char *key, *val, *p, *zm = o->ptr;
+ unsigned int klen, vlen;
+ dict *dict = dictCreate(&hashDictType,NULL);
+
+ redisAssert(o->type == REDIS_HASH && o->encoding != REDIS_ENCODING_HT);
+ p = zipmapRewind(zm);
+ while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
+ robj *keyobj, *valobj;
+
+ keyobj = createStringObject((char*)key,klen);
+ valobj = createStringObject((char*)val,vlen);
+ keyobj = tryObjectEncoding(keyobj);
+ valobj = tryObjectEncoding(valobj);
+ dictAdd(dict,keyobj,valobj);
+ }
+ o->encoding = REDIS_ENCODING_HT;
+ o->ptr = dict;
+ zfree(zm);
+}
+
+/*-----------------------------------------------------------------------------
+ * Hash type commands
+ *----------------------------------------------------------------------------*/
+
+void hsetCommand(redisClient *c) {
+ int update;
+ robj *o;
+
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ hashTypeTryConversion(o,c->argv,2,3);
+ hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
+ update = hashTypeSet(o,c->argv[2],c->argv[3]);
+ addReply(c, update ? shared.czero : shared.cone);
+ server.dirty++;
+}
+
+void hsetnxCommand(redisClient *c) {
+ robj *o;
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ hashTypeTryConversion(o,c->argv,2,3);
+
+ if (hashTypeExists(o, c->argv[2])) {
+ addReply(c, shared.czero);
+ } else {
+ hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
+ hashTypeSet(o,c->argv[2],c->argv[3]);
+ addReply(c, shared.cone);
+ server.dirty++;
+ }
+}
+
+void hmsetCommand(redisClient *c) {
+ int i;
+ robj *o;
+
+ if ((c->argc % 2) == 1) {
+ addReplySds(c,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
+ return;
+ }
+
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ hashTypeTryConversion(o,c->argv,2,c->argc-1);
+ for (i = 2; i < c->argc; i += 2) {
+ hashTypeTryObjectEncoding(o,&c->argv[i], &c->argv[i+1]);
+ hashTypeSet(o,c->argv[i],c->argv[i+1]);
+ }
+ addReply(c, shared.ok);
+ server.dirty++;
+}
+
+void hincrbyCommand(redisClient *c) {
+ long long value, incr;
+ robj *o, *current, *new;
+
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
+ if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ if ((current = hashTypeGet(o,c->argv[2])) != NULL) {
+ if (getLongLongFromObjectOrReply(c,current,&value,
+ "hash value is not an integer") != REDIS_OK) {
+ decrRefCount(current);
+ return;
+ }
+ decrRefCount(current);
+ } else {
+ value = 0;
+ }
+
+ value += incr;
+ new = createStringObjectFromLongLong(value);
+ hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
+ hashTypeSet(o,c->argv[2],new);
+ decrRefCount(new);
+ addReplyLongLong(c,value);
+ server.dirty++;
+}
+
+void hgetCommand(redisClient *c) {
+ robj *o, *value;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,o,REDIS_HASH)) return;
+
+ if ((value = hashTypeGet(o,c->argv[2])) != NULL) {
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+}
+
+void hmgetCommand(redisClient *c) {
+ int i;
+ robj *o, *value;
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o != NULL && o->type != REDIS_HASH) {
+ addReply(c,shared.wrongtypeerr);
+ }
+
+ /* Note the check for o != NULL happens inside the loop. This is
+ * done because objects that cannot be found are considered to be
+ * an empty hash. The reply should then be a series of NULLs. */
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-2));
+ for (i = 2; i < c->argc; i++) {
+ if (o != NULL && (value = hashTypeGet(o,c->argv[i])) != NULL) {
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ }
+}
+
+void hdelCommand(redisClient *c) {
+ robj *o;
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,REDIS_HASH)) return;
+
+ if (hashTypeDelete(o,c->argv[2])) {
+ if (hashTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+void hlenCommand(redisClient *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,REDIS_HASH)) return;
+
+ addReplyUlong(c,hashTypeLength(o));
+}
+
+void genericHgetallCommand(redisClient *c, int flags) {
+ robj *o, *lenobj, *obj;
+ unsigned long count = 0;
+ hashTypeIterator *hi;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
+ || checkType(c,o,REDIS_HASH)) return;
+
+ lenobj = createObject(REDIS_STRING,NULL);
+ addReply(c,lenobj);
+ decrRefCount(lenobj);
+
+ hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != REDIS_ERR) {
+ if (flags & REDIS_HASH_KEY) {
+ obj = hashTypeCurrent(hi,REDIS_HASH_KEY);
+ addReplyBulk(c,obj);
+ decrRefCount(obj);
+ count++;
+ }
+ if (flags & REDIS_HASH_VALUE) {
+ obj = hashTypeCurrent(hi,REDIS_HASH_VALUE);
+ addReplyBulk(c,obj);
+ decrRefCount(obj);
+ count++;
+ }
+ }
+ hashTypeReleaseIterator(hi);
+
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",count);
+}
+
+void hkeysCommand(redisClient *c) {
+ genericHgetallCommand(c,REDIS_HASH_KEY);
+}
+
+void hvalsCommand(redisClient *c) {
+ genericHgetallCommand(c,REDIS_HASH_VALUE);
+}
+
+void hgetallCommand(redisClient *c) {
+ genericHgetallCommand(c,REDIS_HASH_KEY|REDIS_HASH_VALUE);
+}
+
+void hexistsCommand(redisClient *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,REDIS_HASH)) return;
+
+ addReply(c, hashTypeExists(o,c->argv[2]) ? shared.cone : shared.czero);
+}
diff --git a/src/t_list.c b/src/t_list.c
new file mode 100644
index 000000000..ec8b30c3f
--- /dev/null
+++ b/src/t_list.c
@@ -0,0 +1,829 @@
+#include "redis.h"
+
+/*-----------------------------------------------------------------------------
+ * List API
+ *----------------------------------------------------------------------------*/
+
+/* Check the argument length to see if it requires us to convert the ziplist
+ * to a real list. Only check raw-encoded objects because integer encoded
+ * objects are never too long. */
+void listTypeTryConversion(robj *subject, robj *value) {
+ if (subject->encoding != REDIS_ENCODING_ZIPLIST) return;
+ if (value->encoding == REDIS_ENCODING_RAW &&
+ sdslen(value->ptr) > server.list_max_ziplist_value)
+ listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
+}
+
+void listTypePush(robj *subject, robj *value, int where) {
+ /* Check if we need to convert the ziplist */
+ listTypeTryConversion(subject,value);
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
+ ziplistLen(subject->ptr) >= server.list_max_ziplist_entries)
+ listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
+
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
+ int pos = (where == REDIS_HEAD) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ value = getDecodedObject(value);
+ subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),pos);
+ decrRefCount(value);
+ } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
+ if (where == REDIS_HEAD) {
+ listAddNodeHead(subject->ptr,value);
+ } else {
+ listAddNodeTail(subject->ptr,value);
+ }
+ incrRefCount(value);
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+robj *listTypePop(robj *subject, int where) {
+ robj *value = NULL;
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ int pos = (where == REDIS_HEAD) ? 0 : -1;
+ p = ziplistIndex(subject->ptr,pos);
+ if (ziplistGet(p,&vstr,&vlen,&vlong)) {
+ if (vstr) {
+ value = createStringObject((char*)vstr,vlen);
+ } else {
+ value = createStringObjectFromLongLong(vlong);
+ }
+ /* We only need to delete an element when it exists */
+ subject->ptr = ziplistDelete(subject->ptr,&p);
+ }
+ } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
+ list *list = subject->ptr;
+ listNode *ln;
+ if (where == REDIS_HEAD) {
+ ln = listFirst(list);
+ } else {
+ ln = listLast(list);
+ }
+ if (ln != NULL) {
+ value = listNodeValue(ln);
+ incrRefCount(value);
+ listDelNode(list,ln);
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ return value;
+}
+
+unsigned long listTypeLength(robj *subject) {
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST) {
+ return ziplistLen(subject->ptr);
+ } else if (subject->encoding == REDIS_ENCODING_LINKEDLIST) {
+ return listLength((list*)subject->ptr);
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+/* Initialize an iterator at the specified index. */
+listTypeIterator *listTypeInitIterator(robj *subject, int index, unsigned char direction) {
+ listTypeIterator *li = zmalloc(sizeof(listTypeIterator));
+ li->subject = subject;
+ li->encoding = subject->encoding;
+ li->direction = direction;
+ if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ li->zi = ziplistIndex(subject->ptr,index);
+ } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ li->ln = listIndex(subject->ptr,index);
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ return li;
+}
+
+/* Clean up the iterator. */
+void listTypeReleaseIterator(listTypeIterator *li) {
+ zfree(li);
+}
+
+/* Stores pointer to current the entry in the provided entry structure
+ * and advances the position of the iterator. Returns 1 when the current
+ * entry is in fact an entry, 0 otherwise. */
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
+ /* Protect from converting when iterating */
+ redisAssert(li->subject->encoding == li->encoding);
+
+ entry->li = li;
+ if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ entry->zi = li->zi;
+ if (entry->zi != NULL) {
+ if (li->direction == REDIS_TAIL)
+ li->zi = ziplistNext(li->subject->ptr,li->zi);
+ else
+ li->zi = ziplistPrev(li->subject->ptr,li->zi);
+ return 1;
+ }
+ } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ entry->ln = li->ln;
+ if (entry->ln != NULL) {
+ if (li->direction == REDIS_TAIL)
+ li->ln = li->ln->next;
+ else
+ li->ln = li->ln->prev;
+ return 1;
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ return 0;
+}
+
+/* Return entry or NULL at the current position of the iterator. */
+robj *listTypeGet(listTypeEntry *entry) {
+ listTypeIterator *li = entry->li;
+ robj *value = NULL;
+ if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ redisAssert(entry->zi != NULL);
+ if (ziplistGet(entry->zi,&vstr,&vlen,&vlong)) {
+ if (vstr) {
+ value = createStringObject((char*)vstr,vlen);
+ } else {
+ value = createStringObjectFromLongLong(vlong);
+ }
+ }
+ } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ redisAssert(entry->ln != NULL);
+ value = listNodeValue(entry->ln);
+ incrRefCount(value);
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ return value;
+}
+
+void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
+ robj *subject = entry->li->subject;
+ if (entry->li->encoding == REDIS_ENCODING_ZIPLIST) {
+ value = getDecodedObject(value);
+ if (where == REDIS_TAIL) {
+ unsigned char *next = ziplistNext(subject->ptr,entry->zi);
+
+ /* When we insert after the current element, but the current element
+ * is the tail of the list, we need to do a push. */
+ if (next == NULL) {
+ subject->ptr = ziplistPush(subject->ptr,value->ptr,sdslen(value->ptr),REDIS_TAIL);
+ } else {
+ subject->ptr = ziplistInsert(subject->ptr,next,value->ptr,sdslen(value->ptr));
+ }
+ } else {
+ subject->ptr = ziplistInsert(subject->ptr,entry->zi,value->ptr,sdslen(value->ptr));
+ }
+ decrRefCount(value);
+ } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ if (where == REDIS_TAIL) {
+ listInsertNode(subject->ptr,entry->ln,value,AL_START_TAIL);
+ } else {
+ listInsertNode(subject->ptr,entry->ln,value,AL_START_HEAD);
+ }
+ incrRefCount(value);
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+/* Compare the given object with the entry at the current position. */
+int listTypeEqual(listTypeEntry *entry, robj *o) {
+ listTypeIterator *li = entry->li;
+ if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ redisAssert(o->encoding == REDIS_ENCODING_RAW);
+ return ziplistCompare(entry->zi,o->ptr,sdslen(o->ptr));
+ } else if (li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ return equalStringObjects(o,listNodeValue(entry->ln));
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+/* Delete the element pointed to. */
+void listTypeDelete(listTypeEntry *entry) {
+ listTypeIterator *li = entry->li;
+ if (li->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *p = entry->zi;
+ li->subject->ptr = ziplistDelete(li->subject->ptr,&p);
+
+ /* Update position of the iterator depending on the direction */
+ if (li->direction == REDIS_TAIL)
+ li->zi = p;
+ else
+ li->zi = ziplistPrev(li->subject->ptr,p);
+ } else if (entry->li->encoding == REDIS_ENCODING_LINKEDLIST) {
+ listNode *next;
+ if (li->direction == REDIS_TAIL)
+ next = entry->ln->next;
+ else
+ next = entry->ln->prev;
+ listDelNode(li->subject->ptr,entry->ln);
+ li->ln = next;
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+void listTypeConvert(robj *subject, int enc) {
+ listTypeIterator *li;
+ listTypeEntry entry;
+ redisAssert(subject->type == REDIS_LIST);
+
+ if (enc == REDIS_ENCODING_LINKEDLIST) {
+ list *l = listCreate();
+ listSetFreeMethod(l,decrRefCount);
+
+ /* listTypeGet returns a robj with incremented refcount */
+ li = listTypeInitIterator(subject,0,REDIS_TAIL);
+ while (listTypeNext(li,&entry)) listAddNodeTail(l,listTypeGet(&entry));
+ listTypeReleaseIterator(li);
+
+ subject->encoding = REDIS_ENCODING_LINKEDLIST;
+ zfree(subject->ptr);
+ subject->ptr = l;
+ } else {
+ redisPanic("Unsupported list conversion");
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * List Commands
+ *----------------------------------------------------------------------------*/
+
+void pushGenericCommand(redisClient *c, int where) {
+ robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
+ if (lobj == NULL) {
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.cone);
+ return;
+ }
+ lobj = createZiplistObject();
+ dbAdd(c->db,c->argv[1],lobj);
+ } else {
+ if (lobj->type != REDIS_LIST) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
+ addReply(c,shared.cone);
+ return;
+ }
+ }
+ listTypePush(lobj,c->argv[2],where);
+ addReplyLongLong(c,listTypeLength(lobj));
+ server.dirty++;
+}
+
+void lpushCommand(redisClient *c) {
+ pushGenericCommand(c,REDIS_HEAD);
+}
+
+void rpushCommand(redisClient *c) {
+ pushGenericCommand(c,REDIS_TAIL);
+}
+
+void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
+ robj *subject;
+ listTypeIterator *iter;
+ listTypeEntry entry;
+ int inserted = 0;
+
+ if ((subject = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,subject,REDIS_LIST)) return;
+
+ if (refval != NULL) {
+ /* Note: we expect refval to be string-encoded because it is *not* the
+ * last argument of the multi-bulk LINSERT. */
+ redisAssert(refval->encoding == REDIS_ENCODING_RAW);
+
+ /* We're not sure if this value can be inserted yet, but we cannot
+ * convert the list inside the iterator. We don't want to loop over
+ * the list twice (once to see if the value can be inserted and once
+ * to do the actual insert), so we assume this value can be inserted
+ * and convert the ziplist to a regular list if necessary. */
+ listTypeTryConversion(subject,val);
+
+ /* Seek refval from head to tail */
+ iter = listTypeInitIterator(subject,0,REDIS_TAIL);
+ while (listTypeNext(iter,&entry)) {
+ if (listTypeEqual(&entry,refval)) {
+ listTypeInsert(&entry,val,where);
+ inserted = 1;
+ break;
+ }
+ }
+ listTypeReleaseIterator(iter);
+
+ if (inserted) {
+ /* Check if the length exceeds the ziplist length threshold. */
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST &&
+ ziplistLen(subject->ptr) > server.list_max_ziplist_entries)
+ listTypeConvert(subject,REDIS_ENCODING_LINKEDLIST);
+ server.dirty++;
+ } else {
+ /* Notify client of a failed insert */
+ addReply(c,shared.cnegone);
+ return;
+ }
+ } else {
+ listTypePush(subject,val,where);
+ server.dirty++;
+ }
+
+ addReplyUlong(c,listTypeLength(subject));
+}
+
+void lpushxCommand(redisClient *c) {
+ pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
+}
+
+void rpushxCommand(redisClient *c) {
+ pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
+}
+
+void linsertCommand(redisClient *c) {
+ if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
+ pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
+ } else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
+ pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
+ } else {
+ addReply(c,shared.syntaxerr);
+ }
+}
+
+void llenCommand(redisClient *c) {
+ robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
+ if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ addReplyUlong(c,listTypeLength(o));
+}
+
+void lindexCommand(redisClient *c) {
+ robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
+ if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ int index = atoi(c->argv[2]->ptr);
+ robj *value = NULL;
+
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *p;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+ p = ziplistIndex(o->ptr,index);
+ if (ziplistGet(p,&vstr,&vlen,&vlong)) {
+ if (vstr) {
+ value = createStringObject((char*)vstr,vlen);
+ } else {
+ value = createStringObjectFromLongLong(vlong);
+ }
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
+ listNode *ln = listIndex(o->ptr,index);
+ if (ln != NULL) {
+ value = listNodeValue(ln);
+ addReplyBulk(c,value);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+void lsetCommand(redisClient *c) {
+ robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
+ if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ int index = atoi(c->argv[2]->ptr);
+ robj *value = c->argv[3];
+
+ listTypeTryConversion(o,value);
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *p, *zl = o->ptr;
+ p = ziplistIndex(zl,index);
+ if (p == NULL) {
+ addReply(c,shared.outofrangeerr);
+ } else {
+ o->ptr = ziplistDelete(o->ptr,&p);
+ value = getDecodedObject(value);
+ o->ptr = ziplistInsert(o->ptr,p,value->ptr,sdslen(value->ptr));
+ decrRefCount(value);
+ addReply(c,shared.ok);
+ server.dirty++;
+ }
+ } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
+ listNode *ln = listIndex(o->ptr,index);
+ if (ln == NULL) {
+ addReply(c,shared.outofrangeerr);
+ } else {
+ decrRefCount((robj*)listNodeValue(ln));
+ listNodeValue(ln) = value;
+ incrRefCount(value);
+ addReply(c,shared.ok);
+ server.dirty++;
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+}
+
+void popGenericCommand(redisClient *c, int where) {
+ robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
+ if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+
+ robj *value = listTypePop(o,where);
+ if (value == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ }
+}
+
+void lpopCommand(redisClient *c) {
+ popGenericCommand(c,REDIS_HEAD);
+}
+
+void rpopCommand(redisClient *c) {
+ popGenericCommand(c,REDIS_TAIL);
+}
+
+void lrangeCommand(redisClient *c) {
+ robj *o, *value;
+ int start = atoi(c->argv[2]->ptr);
+ int end = atoi(c->argv[3]->ptr);
+ int llen;
+ int rangelen, j;
+ listTypeEntry entry;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
+ || checkType(c,o,REDIS_LIST)) return;
+ llen = listTypeLength(o);
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || start >= llen) {
+ /* Out of range start or start > end result in empty list */
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+
+ /* Return the result in form of a multi-bulk reply */
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
+ listTypeIterator *li = listTypeInitIterator(o,start,REDIS_TAIL);
+ for (j = 0; j < rangelen; j++) {
+ redisAssert(listTypeNext(li,&entry));
+ value = listTypeGet(&entry);
+ addReplyBulk(c,value);
+ decrRefCount(value);
+ }
+ listTypeReleaseIterator(li);
+}
+
+void ltrimCommand(redisClient *c) {
+ robj *o;
+ int start = atoi(c->argv[2]->ptr);
+ int end = atoi(c->argv[3]->ptr);
+ int llen;
+ int j, ltrim, rtrim;
+ list *list;
+ listNode *ln;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
+ checkType(c,o,REDIS_LIST)) return;
+ llen = listTypeLength(o);
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || start >= llen) {
+ /* Out of range start or start > end result in empty list */
+ ltrim = llen;
+ rtrim = 0;
+ } else {
+ if (end >= llen) end = llen-1;
+ ltrim = start;
+ rtrim = llen-end-1;
+ }
+
+ /* Remove list elements to perform the trim */
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ o->ptr = ziplistDeleteRange(o->ptr,0,ltrim);
+ o->ptr = ziplistDeleteRange(o->ptr,-rtrim,rtrim);
+ } else if (o->encoding == REDIS_ENCODING_LINKEDLIST) {
+ list = o->ptr;
+ for (j = 0; j < ltrim; j++) {
+ ln = listFirst(list);
+ listDelNode(list,ln);
+ }
+ for (j = 0; j < rtrim; j++) {
+ ln = listLast(list);
+ listDelNode(list,ln);
+ }
+ } else {
+ redisPanic("Unknown list encoding");
+ }
+ if (listTypeLength(o) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ addReply(c,shared.ok);
+}
+
+void lremCommand(redisClient *c) {
+ robj *subject, *obj = c->argv[3];
+ int toremove = atoi(c->argv[2]->ptr);
+ int removed = 0;
+ listTypeEntry entry;
+
+ subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
+ if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
+
+ /* Make sure obj is raw when we're dealing with a ziplist */
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST)
+ obj = getDecodedObject(obj);
+
+ listTypeIterator *li;
+ if (toremove < 0) {
+ toremove = -toremove;
+ li = listTypeInitIterator(subject,-1,REDIS_HEAD);
+ } else {
+ li = listTypeInitIterator(subject,0,REDIS_TAIL);
+ }
+
+ while (listTypeNext(li,&entry)) {
+ if (listTypeEqual(&entry,obj)) {
+ listTypeDelete(&entry);
+ server.dirty++;
+ removed++;
+ if (toremove && removed == toremove) break;
+ }
+ }
+ listTypeReleaseIterator(li);
+
+ /* Clean up raw encoded object */
+ if (subject->encoding == REDIS_ENCODING_ZIPLIST)
+ decrRefCount(obj);
+
+ if (listTypeLength(subject) == 0) dbDelete(c->db,c->argv[1]);
+ addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
+}
+
+/* This is the semantic of this command:
+ * RPOPLPUSH srclist dstlist:
+ * IF LLEN(srclist) > 0
+ * element = RPOP srclist
+ * LPUSH dstlist element
+ * RETURN element
+ * ELSE
+ * RETURN nil
+ * END
+ * END
+ *
+ * The idea is to be able to get an element from a list in a reliable way
+ * since the element is not just returned but pushed against another list
+ * as well. This command was originally proposed by Ezra Zygmuntowicz.
+ */
+void rpoplpushcommand(redisClient *c) {
+ robj *sobj, *value;
+ if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,sobj,REDIS_LIST)) return;
+
+ if (listTypeLength(sobj) == 0) {
+ addReply(c,shared.nullbulk);
+ } else {
+ robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
+ if (dobj && checkType(c,dobj,REDIS_LIST)) return;
+ value = listTypePop(sobj,REDIS_TAIL);
+
+ /* Add the element to the target list (unless it's directly
+ * passed to some BLPOP-ing client */
+ if (!handleClientsWaitingListPush(c,c->argv[2],value)) {
+ /* Create the list if the key does not exist */
+ if (!dobj) {
+ dobj = createZiplistObject();
+ dbAdd(c->db,c->argv[2],dobj);
+ }
+ listTypePush(dobj,value,REDIS_HEAD);
+ }
+
+ /* Send the element to the client as reply as well */
+ addReplyBulk(c,value);
+
+ /* listTypePop returns an object with its refcount incremented */
+ decrRefCount(value);
+
+ /* Delete the source list when it is empty */
+ if (listTypeLength(sobj) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Blocking POP operations
+ *----------------------------------------------------------------------------*/
+
+/* Currently Redis blocking operations support is limited to list POP ops,
+ * so the current implementation is not fully generic, but it is also not
+ * completely specific so it will not require a rewrite to support new
+ * kind of blocking operations in the future.
+ *
+ * Still it's important to note that list blocking operations can be already
+ * used as a notification mechanism in order to implement other blocking
+ * operations at application level, so there must be a very strong evidence
+ * of usefulness and generality before new blocking operations are implemented.
+ *
+ * This is how the current blocking POP works, we use BLPOP as example:
+ * - If the user calls BLPOP and the key exists and contains a non empty list
+ * then LPOP is called instead. So BLPOP is semantically the same as LPOP
+ * if there is not to block.
+ * - If instead BLPOP is called and the key does not exists or the list is
+ * empty we need to block. In order to do so we remove the notification for
+ * new data to read in the client socket (so that we'll not serve new
+ * requests if the blocking request is not served). Also we put the client
+ * in a dictionary (db->blocking_keys) mapping keys to a list of clients
+ * blocking for this keys.
+ * - If a PUSH operation against a key with blocked clients waiting is
+ * performed, we serve the first in the list: basically instead to push
+ * the new element inside the list we return it to the (first / oldest)
+ * blocking client, unblock the client, and remove it form the list.
+ *
+ * The above comment and the source code should be enough in order to understand
+ * the implementation and modify / fix it later.
+ */
+
+/* Set a client in blocking mode for the specified key, with the specified
+ * timeout */
+void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
+ dictEntry *de;
+ list *l;
+ int j;
+
+ c->blocking_keys = zmalloc(sizeof(robj*)*numkeys);
+ c->blocking_keys_num = numkeys;
+ c->blockingto = timeout;
+ for (j = 0; j < numkeys; j++) {
+ /* Add the key in the client structure, to map clients -> keys */
+ c->blocking_keys[j] = keys[j];
+ incrRefCount(keys[j]);
+
+ /* And in the other "side", to map keys -> clients */
+ de = dictFind(c->db->blocking_keys,keys[j]);
+ if (de == NULL) {
+ int retval;
+
+ /* For every key we take a list of clients blocked for it */
+ l = listCreate();
+ retval = dictAdd(c->db->blocking_keys,keys[j],l);
+ incrRefCount(keys[j]);
+ redisAssert(retval == DICT_OK);
+ } else {
+ l = dictGetEntryVal(de);
+ }
+ listAddNodeTail(l,c);
+ }
+ /* Mark the client as a blocked client */
+ c->flags |= REDIS_BLOCKED;
+ server.blpop_blocked_clients++;
+}
+
+/* Unblock a client that's waiting in a blocking operation such as BLPOP */
+void unblockClientWaitingData(redisClient *c) {
+ dictEntry *de;
+ list *l;
+ int j;
+
+ redisAssert(c->blocking_keys != NULL);
+ /* The client may wait for multiple keys, so unblock it for every key. */
+ for (j = 0; j < c->blocking_keys_num; j++) {
+ /* Remove this client from the list of clients waiting for this key. */
+ de = dictFind(c->db->blocking_keys,c->blocking_keys[j]);
+ redisAssert(de != NULL);
+ l = dictGetEntryVal(de);
+ listDelNode(l,listSearchKey(l,c));
+ /* If the list is empty we need to remove it to avoid wasting memory */
+ if (listLength(l) == 0)
+ dictDelete(c->db->blocking_keys,c->blocking_keys[j]);
+ decrRefCount(c->blocking_keys[j]);
+ }
+ /* Cleanup the client structure */
+ zfree(c->blocking_keys);
+ c->blocking_keys = NULL;
+ c->flags &= (~REDIS_BLOCKED);
+ server.blpop_blocked_clients--;
+ /* We want to process data if there is some command waiting
+ * in the input buffer. Note that this is safe even if
+ * unblockClientWaitingData() gets called from freeClient() because
+ * freeClient() will be smart enough to call this function
+ * *after* c->querybuf was set to NULL. */
+ if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
+}
+
+/* This should be called from any function PUSHing into lists.
+ * 'c' is the "pushing client", 'key' is the key it is pushing data against,
+ * 'ele' is the element pushed.
+ *
+ * If the function returns 0 there was no client waiting for a list push
+ * against this key.
+ *
+ * If the function returns 1 there was a client waiting for a list push
+ * against this key, the element was passed to this client thus it's not
+ * needed to actually add it to the list and the caller should return asap. */
+int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
+ struct dictEntry *de;
+ redisClient *receiver;
+ list *l;
+ listNode *ln;
+
+ de = dictFind(c->db->blocking_keys,key);
+ if (de == NULL) return 0;
+ l = dictGetEntryVal(de);
+ ln = listFirst(l);
+ redisAssert(ln != NULL);
+ receiver = ln->value;
+
+ addReplySds(receiver,sdsnew("*2\r\n"));
+ addReplyBulk(receiver,key);
+ addReplyBulk(receiver,ele);
+ unblockClientWaitingData(receiver);
+ return 1;
+}
+
+/* Blocking RPOP/LPOP */
+void blockingPopGenericCommand(redisClient *c, int where) {
+ robj *o;
+ time_t timeout;
+ int j;
+
+ for (j = 1; j < c->argc-1; j++) {
+ o = lookupKeyWrite(c->db,c->argv[j]);
+ if (o != NULL) {
+ if (o->type != REDIS_LIST) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ } else {
+ if (listTypeLength(o) != 0) {
+ /* If the list contains elements fall back to the usual
+ * non-blocking POP operation */
+ robj *argv[2], **orig_argv;
+ int orig_argc;
+
+ /* We need to alter the command arguments before to call
+ * popGenericCommand() as the command takes a single key. */
+ orig_argv = c->argv;
+ orig_argc = c->argc;
+ argv[1] = c->argv[j];
+ c->argv = argv;
+ c->argc = 2;
+
+ /* Also the return value is different, we need to output
+ * the multi bulk reply header and the key name. The
+ * "real" command will add the last element (the value)
+ * for us. If this souds like an hack to you it's just
+ * because it is... */
+ addReplySds(c,sdsnew("*2\r\n"));
+ addReplyBulk(c,argv[1]);
+ popGenericCommand(c,where);
+
+ /* Fix the client structure with the original stuff */
+ c->argv = orig_argv;
+ c->argc = orig_argc;
+ return;
+ }
+ }
+ }
+ }
+ /* If the list is empty or the key does not exists we must block */
+ timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
+ if (timeout > 0) timeout += time(NULL);
+ blockForKeys(c,c->argv+1,c->argc-2,timeout);
+}
+
+void blpopCommand(redisClient *c) {
+ blockingPopGenericCommand(c,REDIS_HEAD);
+}
+
+void brpopCommand(redisClient *c) {
+ blockingPopGenericCommand(c,REDIS_TAIL);
+}
diff --git a/src/t_set.c b/src/t_set.c
new file mode 100644
index 000000000..808ef268e
--- /dev/null
+++ b/src/t_set.c
@@ -0,0 +1,349 @@
+#include "redis.h"
+
+/*-----------------------------------------------------------------------------
+ * Set Commands
+ *----------------------------------------------------------------------------*/
+
+void saddCommand(redisClient *c) {
+ robj *set;
+
+ set = lookupKeyWrite(c->db,c->argv[1]);
+ if (set == NULL) {
+ set = createSetObject();
+ dbAdd(c->db,c->argv[1],set);
+ } else {
+ if (set->type != REDIS_SET) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ }
+ if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
+ incrRefCount(c->argv[2]);
+ server.dirty++;
+ addReply(c,shared.cone);
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+void sremCommand(redisClient *c) {
+ robj *set;
+
+ if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,set,REDIS_SET)) return;
+
+ if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
+ server.dirty++;
+ if (htNeedsResize(set->ptr)) dictResize(set->ptr);
+ if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
+ addReply(c,shared.cone);
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+void smoveCommand(redisClient *c) {
+ robj *srcset, *dstset;
+
+ srcset = lookupKeyWrite(c->db,c->argv[1]);
+ dstset = lookupKeyWrite(c->db,c->argv[2]);
+
+ /* If the source key does not exist return 0, if it's of the wrong type
+ * raise an error */
+ if (srcset == NULL || srcset->type != REDIS_SET) {
+ addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
+ return;
+ }
+ /* Error if the destination key is not a set as well */
+ if (dstset && dstset->type != REDIS_SET) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ /* Remove the element from the source set */
+ if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
+ /* Key not found in the src set! return zero */
+ addReply(c,shared.czero);
+ return;
+ }
+ if (dictSize((dict*)srcset->ptr) == 0 && srcset != dstset)
+ dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ /* Add the element to the destination set */
+ if (!dstset) {
+ dstset = createSetObject();
+ dbAdd(c->db,c->argv[2],dstset);
+ }
+ if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
+ incrRefCount(c->argv[3]);
+ addReply(c,shared.cone);
+}
+
+void sismemberCommand(redisClient *c) {
+ robj *set;
+
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,set,REDIS_SET)) return;
+
+ if (dictFind(set->ptr,c->argv[2]))
+ addReply(c,shared.cone);
+ else
+ addReply(c,shared.czero);
+}
+
+void scardCommand(redisClient *c) {
+ robj *o;
+ dict *s;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,REDIS_SET)) return;
+
+ s = o->ptr;
+ addReplyUlong(c,dictSize(s));
+}
+
+void spopCommand(redisClient *c) {
+ robj *set;
+ dictEntry *de;
+
+ if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,set,REDIS_SET)) return;
+
+ de = dictGetRandomKey(set->ptr);
+ if (de == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ robj *ele = dictGetEntryKey(de);
+
+ addReplyBulk(c,ele);
+ dictDelete(set->ptr,ele);
+ if (htNeedsResize(set->ptr)) dictResize(set->ptr);
+ if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ }
+}
+
+void srandmemberCommand(redisClient *c) {
+ robj *set;
+ dictEntry *de;
+
+ if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,set,REDIS_SET)) return;
+
+ de = dictGetRandomKey(set->ptr);
+ if (de == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ robj *ele = dictGetEntryKey(de);
+
+ addReplyBulk(c,ele);
+ }
+}
+
+int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
+ dict **d1 = (void*) s1, **d2 = (void*) s2;
+
+ return dictSize(*d1)-dictSize(*d2);
+}
+
+void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
+ dict **dv = zmalloc(sizeof(dict*)*setsnum);
+ dictIterator *di;
+ dictEntry *de;
+ robj *lenobj = NULL, *dstset = NULL;
+ unsigned long j, cardinality = 0;
+
+ for (j = 0; j < setsnum; j++) {
+ robj *setobj;
+
+ setobj = dstkey ?
+ lookupKeyWrite(c->db,setskeys[j]) :
+ lookupKeyRead(c->db,setskeys[j]);
+ if (!setobj) {
+ zfree(dv);
+ if (dstkey) {
+ if (dbDelete(c->db,dstkey))
+ server.dirty++;
+ addReply(c,shared.czero);
+ } else {
+ addReply(c,shared.emptymultibulk);
+ }
+ return;
+ }
+ if (setobj->type != REDIS_SET) {
+ zfree(dv);
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ dv[j] = setobj->ptr;
+ }
+ /* Sort sets from the smallest to largest, this will improve our
+ * algorithm's performace */
+ qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
+
+ /* The first thing we should output is the total number of elements...
+ * since this is a multi-bulk write, but at this stage we don't know
+ * the intersection set size, so we use a trick, append an empty object
+ * to the output list and save the pointer to later modify it with the
+ * right length */
+ if (!dstkey) {
+ lenobj = createObject(REDIS_STRING,NULL);
+ addReply(c,lenobj);
+ decrRefCount(lenobj);
+ } else {
+ /* If we have a target key where to store the resulting set
+ * create this key with an empty set inside */
+ dstset = createSetObject();
+ }
+
+ /* Iterate all the elements of the first (smallest) set, and test
+ * the element against all the other sets, if at least one set does
+ * not include the element it is discarded */
+ di = dictGetIterator(dv[0]);
+
+ while((de = dictNext(di)) != NULL) {
+ robj *ele;
+
+ for (j = 1; j < setsnum; j++)
+ if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
+ if (j != setsnum)
+ continue; /* at least one set does not contain the member */
+ ele = dictGetEntryKey(de);
+ if (!dstkey) {
+ addReplyBulk(c,ele);
+ cardinality++;
+ } else {
+ dictAdd(dstset->ptr,ele,NULL);
+ incrRefCount(ele);
+ }
+ }
+ dictReleaseIterator(di);
+
+ if (dstkey) {
+ /* Store the resulting set into the target, if the intersection
+ * is not an empty set. */
+ dbDelete(c->db,dstkey);
+ if (dictSize((dict*)dstset->ptr) > 0) {
+ dbAdd(c->db,dstkey,dstset);
+ addReplyLongLong(c,dictSize((dict*)dstset->ptr));
+ } else {
+ decrRefCount(dstset);
+ addReply(c,shared.czero);
+ }
+ server.dirty++;
+ } else {
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
+ }
+ zfree(dv);
+}
+
+void sinterCommand(redisClient *c) {
+ sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
+}
+
+void sinterstoreCommand(redisClient *c) {
+ sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
+}
+
+void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
+ dict **dv = zmalloc(sizeof(dict*)*setsnum);
+ dictIterator *di;
+ dictEntry *de;
+ robj *dstset = NULL;
+ int j, cardinality = 0;
+
+ for (j = 0; j < setsnum; j++) {
+ robj *setobj;
+
+ setobj = dstkey ?
+ lookupKeyWrite(c->db,setskeys[j]) :
+ lookupKeyRead(c->db,setskeys[j]);
+ if (!setobj) {
+ dv[j] = NULL;
+ continue;
+ }
+ if (setobj->type != REDIS_SET) {
+ zfree(dv);
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ dv[j] = setobj->ptr;
+ }
+
+ /* We need a temp set object to store our union. If the dstkey
+ * is not NULL (that is, we are inside an SUNIONSTORE operation) then
+ * this set object will be the resulting object to set into the target key*/
+ dstset = createSetObject();
+
+ /* Iterate all the elements of all the sets, add every element a single
+ * time to the result set */
+ for (j = 0; j < setsnum; j++) {
+ if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
+ if (!dv[j]) continue; /* non existing keys are like empty sets */
+
+ di = dictGetIterator(dv[j]);
+
+ while((de = dictNext(di)) != NULL) {
+ robj *ele;
+
+ /* dictAdd will not add the same element multiple times */
+ ele = dictGetEntryKey(de);
+ if (op == REDIS_OP_UNION || j == 0) {
+ if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
+ incrRefCount(ele);
+ cardinality++;
+ }
+ } else if (op == REDIS_OP_DIFF) {
+ if (dictDelete(dstset->ptr,ele) == DICT_OK) {
+ cardinality--;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* result set is empty? Exit asap. */
+ if (op == REDIS_OP_DIFF && cardinality == 0) break;
+ }
+
+ /* Output the content of the resulting set, if not in STORE mode */
+ if (!dstkey) {
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
+ di = dictGetIterator(dstset->ptr);
+ while((de = dictNext(di)) != NULL) {
+ robj *ele;
+
+ ele = dictGetEntryKey(de);
+ addReplyBulk(c,ele);
+ }
+ dictReleaseIterator(di);
+ decrRefCount(dstset);
+ } else {
+ /* If we have a target key where to store the resulting set
+ * create this key with the result set inside */
+ dbDelete(c->db,dstkey);
+ if (dictSize((dict*)dstset->ptr) > 0) {
+ dbAdd(c->db,dstkey,dstset);
+ addReplyLongLong(c,dictSize((dict*)dstset->ptr));
+ } else {
+ decrRefCount(dstset);
+ addReply(c,shared.czero);
+ }
+ server.dirty++;
+ }
+ zfree(dv);
+}
+
+void sunionCommand(redisClient *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
+}
+
+void sunionstoreCommand(redisClient *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
+}
+
+void sdiffCommand(redisClient *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
+}
+
+void sdiffstoreCommand(redisClient *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
+}
diff --git a/src/t_string.c b/src/t_string.c
new file mode 100644
index 000000000..eaaec05be
--- /dev/null
+++ b/src/t_string.c
@@ -0,0 +1,251 @@
+#include "redis.h"
+
+/*-----------------------------------------------------------------------------
+ * String Commands
+ *----------------------------------------------------------------------------*/
+
+void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expire) {
+ int retval;
+ long seconds = 0; /* initialized to avoid an harmness warning */
+
+ if (expire) {
+ if (getLongFromObjectOrReply(c, expire, &seconds, NULL) != REDIS_OK)
+ return;
+ if (seconds <= 0) {
+ addReplySds(c,sdsnew("-ERR invalid expire time in SETEX\r\n"));
+ return;
+ }
+ }
+
+ touchWatchedKey(c->db,key);
+ if (nx) deleteIfVolatile(c->db,key);
+ retval = dbAdd(c->db,key,val);
+ if (retval == REDIS_ERR) {
+ if (!nx) {
+ dbReplace(c->db,key,val);
+ incrRefCount(val);
+ } else {
+ addReply(c,shared.czero);
+ return;
+ }
+ } else {
+ incrRefCount(val);
+ }
+ server.dirty++;
+ removeExpire(c->db,key);
+ if (expire) setExpire(c->db,key,time(NULL)+seconds);
+ addReply(c, nx ? shared.cone : shared.ok);
+}
+
+void setCommand(redisClient *c) {
+ setGenericCommand(c,0,c->argv[1],c->argv[2],NULL);
+}
+
+void setnxCommand(redisClient *c) {
+ setGenericCommand(c,1,c->argv[1],c->argv[2],NULL);
+}
+
+void setexCommand(redisClient *c) {
+ setGenericCommand(c,0,c->argv[1],c->argv[3],c->argv[2]);
+}
+
+int getGenericCommand(redisClient *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
+ return REDIS_OK;
+
+ if (o->type != REDIS_STRING) {
+ addReply(c,shared.wrongtypeerr);
+ return REDIS_ERR;
+ } else {
+ addReplyBulk(c,o);
+ return REDIS_OK;
+ }
+}
+
+void getCommand(redisClient *c) {
+ getGenericCommand(c);
+}
+
+void getsetCommand(redisClient *c) {
+ if (getGenericCommand(c) == REDIS_ERR) return;
+ dbReplace(c->db,c->argv[1],c->argv[2]);
+ incrRefCount(c->argv[2]);
+ server.dirty++;
+ removeExpire(c->db,c->argv[1]);
+}
+
+void mgetCommand(redisClient *c) {
+ int j;
+
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
+ for (j = 1; j < c->argc; j++) {
+ robj *o = lookupKeyRead(c->db,c->argv[j]);
+ if (o == NULL) {
+ addReply(c,shared.nullbulk);
+ } else {
+ if (o->type != REDIS_STRING) {
+ addReply(c,shared.nullbulk);
+ } else {
+ addReplyBulk(c,o);
+ }
+ }
+ }
+}
+
+void msetGenericCommand(redisClient *c, int nx) {
+ int j, busykeys = 0;
+
+ if ((c->argc % 2) == 0) {
+ addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
+ return;
+ }
+ /* Handle the NX flag. The MSETNX semantic is to return zero and don't
+ * set nothing at all if at least one already key exists. */
+ if (nx) {
+ for (j = 1; j < c->argc; j += 2) {
+ if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
+ busykeys++;
+ }
+ }
+ }
+ if (busykeys) {
+ addReply(c, shared.czero);
+ return;
+ }
+
+ for (j = 1; j < c->argc; j += 2) {
+ c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
+ dbReplace(c->db,c->argv[j],c->argv[j+1]);
+ incrRefCount(c->argv[j+1]);
+ removeExpire(c->db,c->argv[j]);
+ }
+ server.dirty += (c->argc-1)/2;
+ addReply(c, nx ? shared.cone : shared.ok);
+}
+
+void msetCommand(redisClient *c) {
+ msetGenericCommand(c,0);
+}
+
+void msetnxCommand(redisClient *c) {
+ msetGenericCommand(c,1);
+}
+
+void incrDecrCommand(redisClient *c, long long incr) {
+ long long value;
+ robj *o;
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (o != NULL && checkType(c,o,REDIS_STRING)) return;
+ if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;
+
+ value += incr;
+ o = createStringObjectFromLongLong(value);
+ dbReplace(c->db,c->argv[1],o);
+ server.dirty++;
+ addReply(c,shared.colon);
+ addReply(c,o);
+ addReply(c,shared.crlf);
+}
+
+void incrCommand(redisClient *c) {
+ incrDecrCommand(c,1);
+}
+
+void decrCommand(redisClient *c) {
+ incrDecrCommand(c,-1);
+}
+
+void incrbyCommand(redisClient *c) {
+ long long incr;
+
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ incrDecrCommand(c,incr);
+}
+
+void decrbyCommand(redisClient *c) {
+ long long incr;
+
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ incrDecrCommand(c,-incr);
+}
+
+void appendCommand(redisClient *c) {
+ int retval;
+ size_t totlen;
+ robj *o;
+
+ o = lookupKeyWrite(c->db,c->argv[1]);
+ if (o == NULL) {
+ /* Create the key */
+ retval = dbAdd(c->db,c->argv[1],c->argv[2]);
+ incrRefCount(c->argv[2]);
+ totlen = stringObjectLen(c->argv[2]);
+ } else {
+ if (o->type != REDIS_STRING) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ /* If the object is specially encoded or shared we have to make
+ * a copy */
+ if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
+ robj *decoded = getDecodedObject(o);
+
+ o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
+ decrRefCount(decoded);
+ dbReplace(c->db,c->argv[1],o);
+ }
+ /* APPEND! */
+ if (c->argv[2]->encoding == REDIS_ENCODING_RAW) {
+ o->ptr = sdscatlen(o->ptr,
+ c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+ } else {
+ o->ptr = sdscatprintf(o->ptr, "%ld",
+ (unsigned long) c->argv[2]->ptr);
+ }
+ totlen = sdslen(o->ptr);
+ }
+ server.dirty++;
+ addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen));
+}
+
+void substrCommand(redisClient *c) {
+ robj *o;
+ long start = atoi(c->argv[2]->ptr);
+ long end = atoi(c->argv[3]->ptr);
+ size_t rangelen, strlen;
+ sds range;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,o,REDIS_STRING)) return;
+
+ o = getDecodedObject(o);
+ strlen = sdslen(o->ptr);
+
+ /* convert negative indexes */
+ if (start < 0) start = strlen+start;
+ if (end < 0) end = strlen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || (size_t)start >= strlen) {
+ /* Out of range start or start > end result in null reply */
+ addReply(c,shared.nullbulk);
+ decrRefCount(o);
+ return;
+ }
+ if ((size_t)end >= strlen) end = strlen-1;
+ rangelen = (end-start)+1;
+
+ /* Return the result */
+ addReplySds(c,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen));
+ range = sdsnewlen((char*)o->ptr+start,rangelen);
+ addReplySds(c,range);
+ addReply(c,shared.crlf);
+ decrRefCount(o);
+}
+
+
diff --git a/src/t_zset.c b/src/t_zset.c
new file mode 100644
index 000000000..de32a8eed
--- /dev/null
+++ b/src/t_zset.c
@@ -0,0 +1,985 @@
+#include "redis.h"
+
+#include <math.h>
+
+/*-----------------------------------------------------------------------------
+ * Sorted set API
+ *----------------------------------------------------------------------------*/
+
+/* ZSETs are ordered sets using two data structures to hold the same elements
+ * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
+ * data structure.
+ *
+ * The elements are added to an hash table mapping Redis objects to scores.
+ * At the same time the elements are added to a skip list mapping scores
+ * to Redis objects (so objects are sorted by scores in this "view"). */
+
+/* This skiplist implementation is almost a C translation of the original
+ * algorithm described by William Pugh in "Skip Lists: A Probabilistic
+ * Alternative to Balanced Trees", modified in three ways:
+ * a) this implementation allows for repeated values.
+ * b) the comparison is not just by key (our 'score') but by satellite data.
+ * c) there is a back pointer, so it's a doubly linked list with the back
+ * pointers being only at "level 1". This allows to traverse the list
+ * from tail to head, useful for ZREVRANGE. */
+
+zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
+ zskiplistNode *zn = zmalloc(sizeof(*zn));
+
+ zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
+ if (level > 1)
+ zn->span = zmalloc(sizeof(unsigned int) * (level - 1));
+ else
+ zn->span = NULL;
+ zn->score = score;
+ zn->obj = obj;
+ return zn;
+}
+
+zskiplist *zslCreate(void) {
+ int j;
+ zskiplist *zsl;
+
+ zsl = zmalloc(sizeof(*zsl));
+ zsl->level = 1;
+ zsl->length = 0;
+ zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
+ for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
+ zsl->header->forward[j] = NULL;
+
+ /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
+ if (j < ZSKIPLIST_MAXLEVEL-1)
+ zsl->header->span[j] = 0;
+ }
+ zsl->header->backward = NULL;
+ zsl->tail = NULL;
+ return zsl;
+}
+
+void zslFreeNode(zskiplistNode *node) {
+ decrRefCount(node->obj);
+ zfree(node->forward);
+ zfree(node->span);
+ zfree(node);
+}
+
+void zslFree(zskiplist *zsl) {
+ zskiplistNode *node = zsl->header->forward[0], *next;
+
+ zfree(zsl->header->forward);
+ zfree(zsl->header->span);
+ zfree(zsl->header);
+ while(node) {
+ next = node->forward[0];
+ zslFreeNode(node);
+ node = next;
+ }
+ zfree(zsl);
+}
+
+int zslRandomLevel(void) {
+ int level = 1;
+ while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
+ level += 1;
+ return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
+}
+
+void zslInsert(zskiplist *zsl, double score, robj *obj) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned int rank[ZSKIPLIST_MAXLEVEL];
+ int i, level;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ /* store rank that is crossed to reach the insert position */
+ rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
+
+ while (x->forward[i] &&
+ (x->forward[i]->score < score ||
+ (x->forward[i]->score == score &&
+ compareStringObjects(x->forward[i]->obj,obj) < 0))) {
+ rank[i] += i > 0 ? x->span[i-1] : 1;
+ x = x->forward[i];
+ }
+ update[i] = x;
+ }
+ /* we assume the key is not already inside, since we allow duplicated
+ * scores, and the re-insertion of score and redis object should never
+ * happpen since the caller of zslInsert() should test in the hash table
+ * if the element is already inside or not. */
+ level = zslRandomLevel();
+ if (level > zsl->level) {
+ for (i = zsl->level; i < level; i++) {
+ rank[i] = 0;
+ update[i] = zsl->header;
+ update[i]->span[i-1] = zsl->length;
+ }
+ zsl->level = level;
+ }
+ x = zslCreateNode(level,score,obj);
+ for (i = 0; i < level; i++) {
+ x->forward[i] = update[i]->forward[i];
+ update[i]->forward[i] = x;
+
+ /* update span covered by update[i] as x is inserted here */
+ if (i > 0) {
+ x->span[i-1] = update[i]->span[i-1] - (rank[0] - rank[i]);
+ update[i]->span[i-1] = (rank[0] - rank[i]) + 1;
+ }
+ }
+
+ /* increment span for untouched levels */
+ for (i = level; i < zsl->level; i++) {
+ update[i]->span[i-1]++;
+ }
+
+ x->backward = (update[0] == zsl->header) ? NULL : update[0];
+ if (x->forward[0])
+ x->forward[0]->backward = x;
+ else
+ zsl->tail = x;
+ zsl->length++;
+}
+
+/* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
+void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
+ int i;
+ for (i = 0; i < zsl->level; i++) {
+ if (update[i]->forward[i] == x) {
+ if (i > 0) {
+ update[i]->span[i-1] += x->span[i-1] - 1;
+ }
+ update[i]->forward[i] = x->forward[i];
+ } else {
+ /* invariant: i > 0, because update[0]->forward[0]
+ * is always equal to x */
+ update[i]->span[i-1] -= 1;
+ }
+ }
+ if (x->forward[0]) {
+ x->forward[0]->backward = x->backward;
+ } else {
+ zsl->tail = x->backward;
+ }
+ while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
+ zsl->level--;
+ zsl->length--;
+}
+
+/* Delete an element with matching score/object from the skiplist. */
+int zslDelete(zskiplist *zsl, double score, robj *obj) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] &&
+ (x->forward[i]->score < score ||
+ (x->forward[i]->score == score &&
+ compareStringObjects(x->forward[i]->obj,obj) < 0)))
+ x = x->forward[i];
+ update[i] = x;
+ }
+ /* We may have multiple elements with the same score, what we need
+ * is to find the element with both the right score and object. */
+ x = x->forward[0];
+ if (x && score == x->score && equalStringObjects(x->obj,obj)) {
+ zslDeleteNode(zsl, x, update);
+ zslFreeNode(x);
+ return 1;
+ } else {
+ return 0; /* not found */
+ }
+ return 0; /* not found */
+}
+
+/* Delete all the elements with score between min and max from the skiplist.
+ * Min and mx are inclusive, so a score >= min || score <= max is deleted.
+ * Note that this function takes the reference to the hash table view of the
+ * sorted set, in order to remove the elements from the hash table too. */
+unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict *dict) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long removed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] && x->forward[i]->score < min)
+ x = x->forward[i];
+ update[i] = x;
+ }
+ /* We may have multiple elements with the same score, what we need
+ * is to find the element with both the right score and object. */
+ x = x->forward[0];
+ while (x && x->score <= max) {
+ zskiplistNode *next = x->forward[0];
+ zslDeleteNode(zsl, x, update);
+ dictDelete(dict,x->obj);
+ zslFreeNode(x);
+ removed++;
+ x = next;
+ }
+ return removed; /* not found */
+}
+
+/* Delete all the elements with rank between start and end from the skiplist.
+ * Start and end are inclusive. Note that start and end need to be 1-based */
+unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ unsigned long traversed = 0, removed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] && (traversed + (i > 0 ? x->span[i-1] : 1)) < start) {
+ traversed += i > 0 ? x->span[i-1] : 1;
+ x = x->forward[i];
+ }
+ update[i] = x;
+ }
+
+ traversed++;
+ x = x->forward[0];
+ while (x && traversed <= end) {
+ zskiplistNode *next = x->forward[0];
+ zslDeleteNode(zsl, x, update);
+ dictDelete(dict,x->obj);
+ zslFreeNode(x);
+ removed++;
+ traversed++;
+ x = next;
+ }
+ return removed;
+}
+
+/* Find the first node having a score equal or greater than the specified one.
+ * Returns NULL if there is no match. */
+zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
+ zskiplistNode *x;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] && x->forward[i]->score < score)
+ x = x->forward[i];
+ }
+ /* We may have multiple elements with the same score, what we need
+ * is to find the element with both the right score and object. */
+ return x->forward[0];
+}
+
+/* Find the rank for an element by both score and key.
+ * Returns 0 when the element cannot be found, rank otherwise.
+ * Note that the rank is 1-based due to the span of zsl->header to the
+ * first element. */
+unsigned long zslistTypeGetRank(zskiplist *zsl, double score, robj *o) {
+ zskiplistNode *x;
+ unsigned long rank = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] &&
+ (x->forward[i]->score < score ||
+ (x->forward[i]->score == score &&
+ compareStringObjects(x->forward[i]->obj,o) <= 0))) {
+ rank += i > 0 ? x->span[i-1] : 1;
+ x = x->forward[i];
+ }
+
+ /* x might be equal to zsl->header, so test if obj is non-NULL */
+ if (x->obj && equalStringObjects(x->obj,o)) {
+ return rank;
+ }
+ }
+ return 0;
+}
+
+/* Finds an element by its rank. The rank argument needs to be 1-based. */
+zskiplistNode* zslistTypeGetElementByRank(zskiplist *zsl, unsigned long rank) {
+ zskiplistNode *x;
+ unsigned long traversed = 0;
+ int i;
+
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->forward[i] && (traversed + (i>0 ? x->span[i-1] : 1)) <= rank)
+ {
+ traversed += i > 0 ? x->span[i-1] : 1;
+ x = x->forward[i];
+ }
+ if (traversed == rank) {
+ return x;
+ }
+ }
+ return NULL;
+}
+
+/*-----------------------------------------------------------------------------
+ * Sorted set commands
+ *----------------------------------------------------------------------------*/
+
+/* This generic command implements both ZADD and ZINCRBY.
+ * scoreval is the score if the operation is a ZADD (doincrement == 0) or
+ * the increment if the operation is a ZINCRBY (doincrement == 1). */
+void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
+ robj *zsetobj;
+ zset *zs;
+ double *score;
+
+ if (isnan(scoreval)) {
+ addReplySds(c,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
+ return;
+ }
+
+ zsetobj = lookupKeyWrite(c->db,key);
+ if (zsetobj == NULL) {
+ zsetobj = createZsetObject();
+ dbAdd(c->db,key,zsetobj);
+ } else {
+ if (zsetobj->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ }
+ zs = zsetobj->ptr;
+
+ /* Ok now since we implement both ZADD and ZINCRBY here the code
+ * needs to handle the two different conditions. It's all about setting
+ * '*score', that is, the new score to set, to the right value. */
+ score = zmalloc(sizeof(double));
+ if (doincrement) {
+ dictEntry *de;
+
+ /* Read the old score. If the element was not present starts from 0 */
+ de = dictFind(zs->dict,ele);
+ if (de) {
+ double *oldscore = dictGetEntryVal(de);
+ *score = *oldscore + scoreval;
+ } else {
+ *score = scoreval;
+ }
+ if (isnan(*score)) {
+ addReplySds(c,
+ sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
+ zfree(score);
+ /* Note that we don't need to check if the zset may be empty and
+ * should be removed here, as we can only obtain Nan as score if
+ * there was already an element in the sorted set. */
+ return;
+ }
+ } else {
+ *score = scoreval;
+ }
+
+ /* What follows is a simple remove and re-insert operation that is common
+ * to both ZADD and ZINCRBY... */
+ if (dictAdd(zs->dict,ele,score) == DICT_OK) {
+ /* case 1: New element */
+ incrRefCount(ele); /* added to hash */
+ zslInsert(zs->zsl,*score,ele);
+ incrRefCount(ele); /* added to skiplist */
+ server.dirty++;
+ if (doincrement)
+ addReplyDouble(c,*score);
+ else
+ addReply(c,shared.cone);
+ } else {
+ dictEntry *de;
+ double *oldscore;
+
+ /* case 2: Score update operation */
+ de = dictFind(zs->dict,ele);
+ redisAssert(de != NULL);
+ oldscore = dictGetEntryVal(de);
+ if (*score != *oldscore) {
+ int deleted;
+
+ /* Remove and insert the element in the skip list with new score */
+ deleted = zslDelete(zs->zsl,*oldscore,ele);
+ redisAssert(deleted != 0);
+ zslInsert(zs->zsl,*score,ele);
+ incrRefCount(ele);
+ /* Update the score in the hash table */
+ dictReplace(zs->dict,ele,score);
+ server.dirty++;
+ } else {
+ zfree(score);
+ }
+ if (doincrement)
+ addReplyDouble(c,*score);
+ else
+ addReply(c,shared.czero);
+ }
+}
+
+void zaddCommand(redisClient *c) {
+ double scoreval;
+
+ if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
+ zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
+}
+
+void zincrbyCommand(redisClient *c) {
+ double scoreval;
+
+ if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
+ zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
+}
+
+void zremCommand(redisClient *c) {
+ robj *zsetobj;
+ zset *zs;
+ dictEntry *de;
+ double *oldscore;
+ int deleted;
+
+ if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,zsetobj,REDIS_ZSET)) return;
+
+ zs = zsetobj->ptr;
+ de = dictFind(zs->dict,c->argv[2]);
+ if (de == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+ /* Delete from the skiplist */
+ oldscore = dictGetEntryVal(de);
+ deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
+ redisAssert(deleted != 0);
+
+ /* Delete from the hash table */
+ dictDelete(zs->dict,c->argv[2]);
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty++;
+ addReply(c,shared.cone);
+}
+
+void zremrangebyscoreCommand(redisClient *c) {
+ double min;
+ double max;
+ long deleted;
+ robj *zsetobj;
+ zset *zs;
+
+ if ((getDoubleFromObjectOrReply(c, c->argv[2], &min, NULL) != REDIS_OK) ||
+ (getDoubleFromObjectOrReply(c, c->argv[3], &max, NULL) != REDIS_OK)) return;
+
+ if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,zsetobj,REDIS_ZSET)) return;
+
+ zs = zsetobj->ptr;
+ deleted = zslDeleteRangeByScore(zs->zsl,min,max,zs->dict);
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty += deleted;
+ addReplyLongLong(c,deleted);
+}
+
+void zremrangebyrankCommand(redisClient *c) {
+ long start;
+ long end;
+ int llen;
+ long deleted;
+ robj *zsetobj;
+ zset *zs;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+
+ if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,zsetobj,REDIS_ZSET)) return;
+ zs = zsetobj->ptr;
+ llen = zs->zsl->length;
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || start >= llen) {
+ addReply(c,shared.czero);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+
+ /* increment start and end because zsl*Rank functions
+ * use 1-based rank */
+ deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
+ server.dirty += deleted;
+ addReplyLongLong(c, deleted);
+}
+
+typedef struct {
+ dict *dict;
+ double weight;
+} zsetopsrc;
+
+int qsortCompareZsetopsrcByCardinality(const void *s1, const void *s2) {
+ zsetopsrc *d1 = (void*) s1, *d2 = (void*) s2;
+ unsigned long size1, size2;
+ size1 = d1->dict ? dictSize(d1->dict) : 0;
+ size2 = d2->dict ? dictSize(d2->dict) : 0;
+ return size1 - size2;
+}
+
+#define REDIS_AGGR_SUM 1
+#define REDIS_AGGR_MIN 2
+#define REDIS_AGGR_MAX 3
+#define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
+
+inline static void zunionInterAggregate(double *target, double val, int aggregate) {
+ if (aggregate == REDIS_AGGR_SUM) {
+ *target = *target + val;
+ } else if (aggregate == REDIS_AGGR_MIN) {
+ *target = val < *target ? val : *target;
+ } else if (aggregate == REDIS_AGGR_MAX) {
+ *target = val > *target ? val : *target;
+ } else {
+ /* safety net */
+ redisPanic("Unknown ZUNION/INTER aggregate type");
+ }
+}
+
+void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
+ int i, j, setnum;
+ int aggregate = REDIS_AGGR_SUM;
+ zsetopsrc *src;
+ robj *dstobj;
+ zset *dstzset;
+ dictIterator *di;
+ dictEntry *de;
+
+ /* expect setnum input keys to be given */
+ setnum = atoi(c->argv[2]->ptr);
+ if (setnum < 1) {
+ addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
+ return;
+ }
+
+ /* test if the expected number of keys would overflow */
+ if (3+setnum > c->argc) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* read keys to be used for input */
+ src = zmalloc(sizeof(zsetopsrc) * setnum);
+ for (i = 0, j = 3; i < setnum; i++, j++) {
+ robj *obj = lookupKeyWrite(c->db,c->argv[j]);
+ if (!obj) {
+ src[i].dict = NULL;
+ } else {
+ if (obj->type == REDIS_ZSET) {
+ src[i].dict = ((zset*)obj->ptr)->dict;
+ } else if (obj->type == REDIS_SET) {
+ src[i].dict = (obj->ptr);
+ } else {
+ zfree(src);
+ addReply(c,shared.wrongtypeerr);
+ return;
+ }
+ }
+
+ /* default all weights to 1 */
+ src[i].weight = 1.0;
+ }
+
+ /* parse optional extra arguments */
+ if (j < c->argc) {
+ int remaining = c->argc - j;
+
+ while (remaining) {
+ if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) {
+ j++; remaining--;
+ for (i = 0; i < setnum; i++, j++, remaining--) {
+ if (getDoubleFromObjectOrReply(c, c->argv[j], &src[i].weight, NULL) != REDIS_OK)
+ return;
+ }
+ } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) {
+ j++; remaining--;
+ if (!strcasecmp(c->argv[j]->ptr,"sum")) {
+ aggregate = REDIS_AGGR_SUM;
+ } else if (!strcasecmp(c->argv[j]->ptr,"min")) {
+ aggregate = REDIS_AGGR_MIN;
+ } else if (!strcasecmp(c->argv[j]->ptr,"max")) {
+ aggregate = REDIS_AGGR_MAX;
+ } else {
+ zfree(src);
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ j++; remaining--;
+ } else {
+ zfree(src);
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* sort sets from the smallest to largest, this will improve our
+ * algorithm's performance */
+ qsort(src,setnum,sizeof(zsetopsrc),qsortCompareZsetopsrcByCardinality);
+
+ dstobj = createZsetObject();
+ dstzset = dstobj->ptr;
+
+ if (op == REDIS_OP_INTER) {
+ /* skip going over all entries if the smallest zset is NULL or empty */
+ if (src[0].dict && dictSize(src[0].dict) > 0) {
+ /* precondition: as src[0].dict is non-empty and the zsets are ordered
+ * from small to large, all src[i > 0].dict are non-empty too */
+ di = dictGetIterator(src[0].dict);
+ while((de = dictNext(di)) != NULL) {
+ double *score = zmalloc(sizeof(double)), value;
+ *score = src[0].weight * zunionInterDictValue(de);
+
+ for (j = 1; j < setnum; j++) {
+ dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
+ if (other) {
+ value = src[j].weight * zunionInterDictValue(other);
+ zunionInterAggregate(score, value, aggregate);
+ } else {
+ break;
+ }
+ }
+
+ /* skip entry when not present in every source dict */
+ if (j != setnum) {
+ zfree(score);
+ } else {
+ robj *o = dictGetEntryKey(de);
+ dictAdd(dstzset->dict,o,score);
+ incrRefCount(o); /* added to dictionary */
+ zslInsert(dstzset->zsl,*score,o);
+ incrRefCount(o); /* added to skiplist */
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ } else if (op == REDIS_OP_UNION) {
+ for (i = 0; i < setnum; i++) {
+ if (!src[i].dict) continue;
+
+ di = dictGetIterator(src[i].dict);
+ while((de = dictNext(di)) != NULL) {
+ /* skip key when already processed */
+ if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue;
+
+ double *score = zmalloc(sizeof(double)), value;
+ *score = src[i].weight * zunionInterDictValue(de);
+
+ /* because the zsets are sorted by size, its only possible
+ * for sets at larger indices to hold this entry */
+ for (j = (i+1); j < setnum; j++) {
+ dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
+ if (other) {
+ value = src[j].weight * zunionInterDictValue(other);
+ zunionInterAggregate(score, value, aggregate);
+ }
+ }
+
+ robj *o = dictGetEntryKey(de);
+ dictAdd(dstzset->dict,o,score);
+ incrRefCount(o); /* added to dictionary */
+ zslInsert(dstzset->zsl,*score,o);
+ incrRefCount(o); /* added to skiplist */
+ }
+ dictReleaseIterator(di);
+ }
+ } else {
+ /* unknown operator */
+ redisAssert(op == REDIS_OP_INTER || op == REDIS_OP_UNION);
+ }
+
+ dbDelete(c->db,dstkey);
+ if (dstzset->zsl->length) {
+ dbAdd(c->db,dstkey,dstobj);
+ addReplyLongLong(c, dstzset->zsl->length);
+ server.dirty++;
+ } else {
+ decrRefCount(dstobj);
+ addReply(c, shared.czero);
+ }
+ zfree(src);
+}
+
+void zunionstoreCommand(redisClient *c) {
+ zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
+}
+
+void zinterstoreCommand(redisClient *c) {
+ zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
+}
+
+void zrangeGenericCommand(redisClient *c, int reverse) {
+ robj *o;
+ long start;
+ long end;
+ int withscores = 0;
+ int llen;
+ int rangelen, j;
+ zset *zsetobj;
+ zskiplist *zsl;
+ zskiplistNode *ln;
+ robj *ele;
+
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+
+ if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
+ withscores = 1;
+ } else if (c->argc >= 5) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
+ || checkType(c,o,REDIS_ZSET)) return;
+ zsetobj = o->ptr;
+ zsl = zsetobj->zsl;
+ llen = zsl->length;
+
+ /* convert negative indexes */
+ if (start < 0) start = llen+start;
+ if (end < 0) end = llen+end;
+ if (start < 0) start = 0;
+ if (end < 0) end = 0;
+
+ /* indexes sanity checks */
+ if (start > end || start >= llen) {
+ /* Out of range start or start > end result in empty list */
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+ if (end >= llen) end = llen-1;
+ rangelen = (end-start)+1;
+
+ /* check if starting point is trivial, before searching
+ * the element in log(N) time */
+ if (reverse) {
+ ln = start == 0 ? zsl->tail : zslistTypeGetElementByRank(zsl, llen-start);
+ } else {
+ ln = start == 0 ?
+ zsl->header->forward[0] : zslistTypeGetElementByRank(zsl, start+1);
+ }
+
+ /* Return the result in form of a multi-bulk reply */
+ addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
+ withscores ? (rangelen*2) : rangelen));
+ for (j = 0; j < rangelen; j++) {
+ ele = ln->obj;
+ addReplyBulk(c,ele);
+ if (withscores)
+ addReplyDouble(c,ln->score);
+ ln = reverse ? ln->backward : ln->forward[0];
+ }
+}
+
+void zrangeCommand(redisClient *c) {
+ zrangeGenericCommand(c,0);
+}
+
+void zrevrangeCommand(redisClient *c) {
+ zrangeGenericCommand(c,1);
+}
+
+/* This command implements both ZRANGEBYSCORE and ZCOUNT.
+ * If justcount is non-zero, just the count is returned. */
+void genericZrangebyscoreCommand(redisClient *c, int justcount) {
+ robj *o;
+ double min, max;
+ int minex = 0, maxex = 0; /* are min or max exclusive? */
+ int offset = 0, limit = -1;
+ int withscores = 0;
+ int badsyntax = 0;
+
+ /* Parse the min-max interval. If one of the values is prefixed
+ * by the "(" character, it's considered "open". For instance
+ * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
+ * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
+ if (((char*)c->argv[2]->ptr)[0] == '(') {
+ min = strtod((char*)c->argv[2]->ptr+1,NULL);
+ minex = 1;
+ } else {
+ min = strtod(c->argv[2]->ptr,NULL);
+ }
+ if (((char*)c->argv[3]->ptr)[0] == '(') {
+ max = strtod((char*)c->argv[3]->ptr+1,NULL);
+ maxex = 1;
+ } else {
+ max = strtod(c->argv[3]->ptr,NULL);
+ }
+
+ /* Parse "WITHSCORES": note that if the command was called with
+ * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
+ * enter the following paths to parse WITHSCORES and LIMIT. */
+ if (c->argc == 5 || c->argc == 8) {
+ if (strcasecmp(c->argv[c->argc-1]->ptr,"withscores") == 0)
+ withscores = 1;
+ else
+ badsyntax = 1;
+ }
+ if (c->argc != (4 + withscores) && c->argc != (7 + withscores))
+ badsyntax = 1;
+ if (badsyntax) {
+ addReplySds(c,
+ sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
+ return;
+ }
+
+ /* Parse "LIMIT" */
+ if (c->argc == (7 + withscores) && strcasecmp(c->argv[4]->ptr,"limit")) {
+ addReply(c,shared.syntaxerr);
+ return;
+ } else if (c->argc == (7 + withscores)) {
+ offset = atoi(c->argv[5]->ptr);
+ limit = atoi(c->argv[6]->ptr);
+ if (offset < 0) offset = 0;
+ }
+
+ /* Ok, lookup the key and get the range */
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o == NULL) {
+ addReply(c,justcount ? shared.czero : shared.emptymultibulk);
+ } else {
+ if (o->type != REDIS_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ } else {
+ zset *zsetobj = o->ptr;
+ zskiplist *zsl = zsetobj->zsl;
+ zskiplistNode *ln;
+ robj *ele, *lenobj = NULL;
+ unsigned long rangelen = 0;
+
+ /* Get the first node with the score >= min, or with
+ * score > min if 'minex' is true. */
+ ln = zslFirstWithScore(zsl,min);
+ while (minex && ln && ln->score == min) ln = ln->forward[0];
+
+ if (ln == NULL) {
+ /* No element matching the speciifed interval */
+ addReply(c,justcount ? shared.czero : shared.emptymultibulk);
+ return;
+ }
+
+ /* We don't know in advance how many matching elements there
+ * are in the list, so we push this object that will represent
+ * the multi-bulk length in the output buffer, and will "fix"
+ * it later */
+ if (!justcount) {
+ lenobj = createObject(REDIS_STRING,NULL);
+ addReply(c,lenobj);
+ decrRefCount(lenobj);
+ }
+
+ while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) {
+ if (offset) {
+ offset--;
+ ln = ln->forward[0];
+ continue;
+ }
+ if (limit == 0) break;
+ if (!justcount) {
+ ele = ln->obj;
+ addReplyBulk(c,ele);
+ if (withscores)
+ addReplyDouble(c,ln->score);
+ }
+ ln = ln->forward[0];
+ rangelen++;
+ if (limit > 0) limit--;
+ }
+ if (justcount) {
+ addReplyLongLong(c,(long)rangelen);
+ } else {
+ lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",
+ withscores ? (rangelen*2) : rangelen);
+ }
+ }
+ }
+}
+
+void zrangebyscoreCommand(redisClient *c) {
+ genericZrangebyscoreCommand(c,0);
+}
+
+void zcountCommand(redisClient *c) {
+ genericZrangebyscoreCommand(c,1);
+}
+
+void zcardCommand(redisClient *c) {
+ robj *o;
+ zset *zs;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,REDIS_ZSET)) return;
+
+ zs = o->ptr;
+ addReplyUlong(c,zs->zsl->length);
+}
+
+void zscoreCommand(redisClient *c) {
+ robj *o;
+ zset *zs;
+ dictEntry *de;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,o,REDIS_ZSET)) return;
+
+ zs = o->ptr;
+ de = dictFind(zs->dict,c->argv[2]);
+ if (!de) {
+ addReply(c,shared.nullbulk);
+ } else {
+ double *score = dictGetEntryVal(de);
+
+ addReplyDouble(c,*score);
+ }
+}
+
+void zrankGenericCommand(redisClient *c, int reverse) {
+ robj *o;
+ zset *zs;
+ zskiplist *zsl;
+ dictEntry *de;
+ unsigned long rank;
+ double *score;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
+ checkType(c,o,REDIS_ZSET)) return;
+
+ zs = o->ptr;
+ zsl = zs->zsl;
+ de = dictFind(zs->dict,c->argv[2]);
+ if (!de) {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ score = dictGetEntryVal(de);
+ rank = zslistTypeGetRank(zsl, *score, c->argv[2]);
+ if (rank) {
+ if (reverse) {
+ addReplyLongLong(c, zsl->length - rank);
+ } else {
+ addReplyLongLong(c, rank-1);
+ }
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+}
+
+void zrankCommand(redisClient *c) {
+ zrankGenericCommand(c, 0);
+}
+
+void zrevrankCommand(redisClient *c) {
+ zrankGenericCommand(c, 1);
+}
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 000000000..cc2794f6a
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,223 @@
+#include "redis.h"
+#include <ctype.h>
+#include <limits.h>
+
+/* Glob-style pattern matching. */
+int stringmatchlen(const char *pattern, int patternLen,
+ const char *string, int stringLen, int nocase)
+{
+ while(patternLen) {
+ switch(pattern[0]) {
+ case '*':
+ while (pattern[1] == '*') {
+ pattern++;
+ patternLen--;
+ }
+ if (patternLen == 1)
+ return 1; /* match */
+ while(stringLen) {
+ if (stringmatchlen(pattern+1, patternLen-1,
+ string, stringLen, nocase))
+ return 1; /* match */
+ string++;
+ stringLen--;
+ }
+ return 0; /* no match */
+ break;
+ case '?':
+ if (stringLen == 0)
+ return 0; /* no match */
+ string++;
+ stringLen--;
+ break;
+ case '[':
+ {
+ int not, match;
+
+ pattern++;
+ patternLen--;
+ not = pattern[0] == '^';
+ if (not) {
+ pattern++;
+ patternLen--;
+ }
+ match = 0;
+ while(1) {
+ if (pattern[0] == '\\') {
+ pattern++;
+ patternLen--;
+ if (pattern[0] == string[0])
+ match = 1;
+ } else if (pattern[0] == ']') {
+ break;
+ } else if (patternLen == 0) {
+ pattern--;
+ patternLen++;
+ break;
+ } else if (pattern[1] == '-' && patternLen >= 3) {
+ int start = pattern[0];
+ int end = pattern[2];
+ int c = string[0];
+ if (start > end) {
+ int t = start;
+ start = end;
+ end = t;
+ }
+ if (nocase) {
+ start = tolower(start);
+ end = tolower(end);
+ c = tolower(c);
+ }
+ pattern += 2;
+ patternLen -= 2;
+ if (c >= start && c <= end)
+ match = 1;
+ } else {
+ if (!nocase) {
+ if (pattern[0] == string[0])
+ match = 1;
+ } else {
+ if (tolower((int)pattern[0]) == tolower((int)string[0]))
+ match = 1;
+ }
+ }
+ pattern++;
+ patternLen--;
+ }
+ if (not)
+ match = !match;
+ if (!match)
+ return 0; /* no match */
+ string++;
+ stringLen--;
+ break;
+ }
+ case '\\':
+ if (patternLen >= 2) {
+ pattern++;
+ patternLen--;
+ }
+ /* fall through */
+ default:
+ if (!nocase) {
+ if (pattern[0] != string[0])
+ return 0; /* no match */
+ } else {
+ if (tolower((int)pattern[0]) != tolower((int)string[0]))
+ return 0; /* no match */
+ }
+ string++;
+ stringLen--;
+ break;
+ }
+ pattern++;
+ patternLen--;
+ if (stringLen == 0) {
+ while(*pattern == '*') {
+ pattern++;
+ patternLen--;
+ }
+ break;
+ }
+ }
+ if (patternLen == 0 && stringLen == 0)
+ return 1;
+ return 0;
+}
+
+int stringmatch(const char *pattern, const char *string, int nocase) {
+ return stringmatchlen(pattern,strlen(pattern),string,strlen(string),nocase);
+}
+
+/* Convert a string representing an amount of memory into the number of
+ * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
+ * (1024*1024*1024).
+ *
+ * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
+ * set to 0 */
+long long memtoll(const char *p, int *err) {
+ const char *u;
+ char buf[128];
+ long mul; /* unit multiplier */
+ long long val;
+ unsigned int digits;
+
+ if (err) *err = 0;
+ /* Search the first non digit character. */
+ u = p;
+ if (*u == '-') u++;
+ while(*u && isdigit(*u)) u++;
+ if (*u == '\0' || !strcasecmp(u,"b")) {
+ mul = 1;
+ } else if (!strcasecmp(u,"k")) {
+ mul = 1000;
+ } else if (!strcasecmp(u,"kb")) {
+ mul = 1024;
+ } else if (!strcasecmp(u,"m")) {
+ mul = 1000*1000;
+ } else if (!strcasecmp(u,"mb")) {
+ mul = 1024*1024;
+ } else if (!strcasecmp(u,"g")) {
+ mul = 1000L*1000*1000;
+ } else if (!strcasecmp(u,"gb")) {
+ mul = 1024L*1024*1024;
+ } else {
+ if (err) *err = 1;
+ mul = 1;
+ }
+ digits = u-p;
+ if (digits >= sizeof(buf)) {
+ if (err) *err = 1;
+ return LLONG_MAX;
+ }
+ memcpy(buf,p,digits);
+ buf[digits] = '\0';
+ val = strtoll(buf,NULL,10);
+ return val*mul;
+}
+
+/* Convert a long long into a string. Returns the number of
+ * characters needed to represent the number, that can be shorter if passed
+ * buffer length is not enough to store the whole number. */
+int ll2string(char *s, size_t len, long long value) {
+ char buf[32], *p;
+ unsigned long long v;
+ size_t l;
+
+ if (len == 0) return 0;
+ v = (value < 0) ? -value : value;
+ p = buf+31; /* point to the last character */
+ do {
+ *p-- = '0'+(v%10);
+ v /= 10;
+ } while(v);
+ if (value < 0) *p-- = '-';
+ p++;
+ l = 32-(p-buf);
+ if (l+1 > len) l = len-1; /* Make sure it fits, including the nul term */
+ memcpy(s,p,l);
+ s[l] = '\0';
+ return l;
+}
+
+/* Check if the nul-terminated string 's' can be represented by a long
+ * (that is, is a number that fits into long without any other space or
+ * character before or after the digits).
+ *
+ * If so, the function returns REDIS_OK and *longval is set to the value
+ * of the number. Otherwise REDIS_ERR is returned */
+int isStringRepresentableAsLong(sds s, long *longval) {
+ char buf[32], *endptr;
+ long value;
+ int slen;
+
+ value = strtol(s, &endptr, 10);
+ if (endptr[0] != '\0') return REDIS_ERR;
+ slen = ll2string(buf,32,value);
+
+ /* If the number converted back into a string is not identical
+ * then it's not possible to encode the string as integer */
+ if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
+ if (longval) *longval = value;
+ return REDIS_OK;
+}
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 000000000..86d422474
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1 @@
+#define REDIS_VERSION "2.1.1"
diff --git a/src/vm.c b/src/vm.c
new file mode 100644
index 000000000..1aaa57eb5
--- /dev/null
+++ b/src/vm.c
@@ -0,0 +1,1126 @@
+#include "redis.h"
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <math.h>
+#include <signal.h>
+
+/* Virtual Memory is composed mainly of two subsystems:
+ * - Blocking Virutal Memory
+ * - Threaded Virtual Memory I/O
+ * The two parts are not fully decoupled, but functions are split among two
+ * different sections of the source code (delimited by comments) in order to
+ * make more clear what functionality is about the blocking VM and what about
+ * the threaded (not blocking) VM.
+ *
+ * Redis VM design:
+ *
+ * Redis VM is a blocking VM (one that blocks reading swapped values from
+ * disk into memory when a value swapped out is needed in memory) that is made
+ * unblocking by trying to examine the command argument vector in order to
+ * load in background values that will likely be needed in order to exec
+ * the command. The command is executed only once all the relevant keys
+ * are loaded into memory.
+ *
+ * This basically is almost as simple of a blocking VM, but almost as parallel
+ * as a fully non-blocking VM.
+ */
+
+/* =================== Virtual Memory - Blocking Side ====================== */
+
+/* Create a VM pointer object. This kind of objects are used in place of
+ * values in the key -> value hash table, for swapped out objects. */
+vmpointer *createVmPointer(int vtype) {
+ vmpointer *vp = zmalloc(sizeof(vmpointer));
+
+ vp->type = REDIS_VMPOINTER;
+ vp->storage = REDIS_VM_SWAPPED;
+ vp->vtype = vtype;
+ return vp;
+}
+
+void vmInit(void) {
+ off_t totsize;
+ int pipefds[2];
+ size_t stacksize;
+ struct flock fl;
+
+ if (server.vm_max_threads != 0)
+ zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
+
+ redisLog(REDIS_NOTICE,"Using '%s' as swap file",server.vm_swap_file);
+ /* Try to open the old swap file, otherwise create it */
+ if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) {
+ server.vm_fp = fopen(server.vm_swap_file,"w+b");
+ }
+ if (server.vm_fp == NULL) {
+ redisLog(REDIS_WARNING,
+ "Can't open the swap file: %s. Exiting.",
+ strerror(errno));
+ exit(1);
+ }
+ server.vm_fd = fileno(server.vm_fp);
+ /* Lock the swap file for writing, this is useful in order to avoid
+ * another instance to use the same swap file for a config error. */
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = fl.l_len = 0;
+ if (fcntl(server.vm_fd,F_SETLK,&fl) == -1) {
+ redisLog(REDIS_WARNING,
+ "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server.vm_swap_file, strerror(errno));
+ exit(1);
+ }
+ /* Initialize */
+ server.vm_next_page = 0;
+ server.vm_near_pages = 0;
+ server.vm_stats_used_pages = 0;
+ server.vm_stats_swapped_objects = 0;
+ server.vm_stats_swapouts = 0;
+ server.vm_stats_swapins = 0;
+ totsize = server.vm_pages*server.vm_page_size;
+ redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
+ if (ftruncate(server.vm_fd,totsize) == -1) {
+ redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
+ strerror(errno));
+ exit(1);
+ } else {
+ redisLog(REDIS_NOTICE,"Swap file allocated with success");
+ }
+ server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
+ redisLog(REDIS_VERBOSE,"Allocated %lld bytes page table for %lld pages",
+ (long long) (server.vm_pages+7)/8, server.vm_pages);
+ memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
+
+ /* Initialize threaded I/O (used by Virtual Memory) */
+ server.io_newjobs = listCreate();
+ server.io_processing = listCreate();
+ server.io_processed = listCreate();
+ server.io_ready_clients = listCreate();
+ pthread_mutex_init(&server.io_mutex,NULL);
+ pthread_mutex_init(&server.obj_freelist_mutex,NULL);
+ pthread_mutex_init(&server.io_swapfile_mutex,NULL);
+ server.io_active_threads = 0;
+ if (pipe(pipefds) == -1) {
+ redisLog(REDIS_WARNING,"Unable to intialized VM: pipe(2): %s. Exiting."
+ ,strerror(errno));
+ exit(1);
+ }
+ server.io_ready_pipe_read = pipefds[0];
+ server.io_ready_pipe_write = pipefds[1];
+ redisAssert(anetNonBlock(NULL,server.io_ready_pipe_read) != ANET_ERR);
+ /* LZF requires a lot of stack */
+ pthread_attr_init(&server.io_threads_attr);
+ pthread_attr_getstacksize(&server.io_threads_attr, &stacksize);
+ while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
+ pthread_attr_setstacksize(&server.io_threads_attr, stacksize);
+ /* Listen for events in the threaded I/O pipe */
+ if (aeCreateFileEvent(server.el, server.io_ready_pipe_read, AE_READABLE,
+ vmThreadedIOCompletedJob, NULL) == AE_ERR)
+ oom("creating file event");
+}
+
+/* Mark the page as used */
+void vmMarkPageUsed(off_t page) {
+ off_t byte = page/8;
+ int bit = page&7;
+ redisAssert(vmFreePage(page) == 1);
+ server.vm_bitmap[byte] |= 1<<bit;
+}
+
+/* Mark N contiguous pages as used, with 'page' being the first. */
+void vmMarkPagesUsed(off_t page, off_t count) {
+ off_t j;
+
+ for (j = 0; j < count; j++)
+ vmMarkPageUsed(page+j);
+ server.vm_stats_used_pages += count;
+ redisLog(REDIS_DEBUG,"Mark USED pages: %lld pages at %lld\n",
+ (long long)count, (long long)page);
+}
+
+/* Mark the page as free */
+void vmMarkPageFree(off_t page) {
+ off_t byte = page/8;
+ int bit = page&7;
+ redisAssert(vmFreePage(page) == 0);
+ server.vm_bitmap[byte] &= ~(1<<bit);
+}
+
+/* Mark N contiguous pages as free, with 'page' being the first. */
+void vmMarkPagesFree(off_t page, off_t count) {
+ off_t j;
+
+ for (j = 0; j < count; j++)
+ vmMarkPageFree(page+j);
+ server.vm_stats_used_pages -= count;
+ redisLog(REDIS_DEBUG,"Mark FREE pages: %lld pages at %lld\n",
+ (long long)count, (long long)page);
+}
+
+/* Test if the page is free */
+int vmFreePage(off_t page) {
+ off_t byte = page/8;
+ int bit = page&7;
+ return (server.vm_bitmap[byte] & (1<<bit)) == 0;
+}
+
+/* Find N contiguous free pages storing the first page of the cluster in *first.
+ * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
+ * REDIS_ERR is returned.
+ *
+ * This function uses a simple algorithm: we try to allocate
+ * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
+ * again from the start of the swap file searching for free spaces.
+ *
+ * If it looks pretty clear that there are no free pages near our offset
+ * we try to find less populated places doing a forward jump of
+ * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
+ * without hurry, and then we jump again and so forth...
+ *
+ * This function can be improved using a free list to avoid to guess
+ * too much, since we could collect data about freed pages.
+ *
+ * note: I implemented this function just after watching an episode of
+ * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
+ */
+int vmFindContiguousPages(off_t *first, off_t n) {
+ off_t base, offset = 0, since_jump = 0, numfree = 0;
+
+ if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
+ server.vm_near_pages = 0;
+ server.vm_next_page = 0;
+ }
+ server.vm_near_pages++; /* Yet another try for pages near to the old ones */
+ base = server.vm_next_page;
+
+ while(offset < server.vm_pages) {
+ off_t this = base+offset;
+
+ /* If we overflow, restart from page zero */
+ if (this >= server.vm_pages) {
+ this -= server.vm_pages;
+ if (this == 0) {
+ /* Just overflowed, what we found on tail is no longer
+ * interesting, as it's no longer contiguous. */
+ numfree = 0;
+ }
+ }
+ if (vmFreePage(this)) {
+ /* This is a free page */
+ numfree++;
+ /* Already got N free pages? Return to the caller, with success */
+ if (numfree == n) {
+ *first = this-(n-1);
+ server.vm_next_page = this+1;
+ redisLog(REDIS_DEBUG, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n, (long long) *first);
+ return REDIS_OK;
+ }
+ } else {
+ /* The current one is not a free page */
+ numfree = 0;
+ }
+
+ /* Fast-forward if the current page is not free and we already
+ * searched enough near this place. */
+ since_jump++;
+ if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
+ offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
+ since_jump = 0;
+ /* Note that even if we rewind after the jump, we are don't need
+ * to make sure numfree is set to zero as we only jump *if* it
+ * is set to zero. */
+ } else {
+ /* Otherwise just check the next page */
+ offset++;
+ }
+ }
+ return REDIS_ERR;
+}
+
+/* Write the specified object at the specified page of the swap file */
+int vmWriteObjectOnSwap(robj *o, off_t page) {
+ if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
+ if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
+ if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
+ redisLog(REDIS_WARNING,
+ "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
+ strerror(errno));
+ return REDIS_ERR;
+ }
+ rdbSaveObject(server.vm_fp,o);
+ fflush(server.vm_fp);
+ if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
+ return REDIS_OK;
+}
+
+/* Transfers the 'val' object to disk. Store all the information
+ * a 'vmpointer' object containing all the information needed to load the
+ * object back later is returned.
+ *
+ * If we can't find enough contiguous empty pages to swap the object on disk
+ * NULL is returned. */
+vmpointer *vmSwapObjectBlocking(robj *val) {
+ off_t pages = rdbSavedObjectPages(val,NULL);
+ off_t page;
+ vmpointer *vp;
+
+ redisAssert(val->storage == REDIS_VM_MEMORY);
+ redisAssert(val->refcount == 1);
+ if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return NULL;
+ if (vmWriteObjectOnSwap(val,page) == REDIS_ERR) return NULL;
+
+ vp = createVmPointer(val->type);
+ vp->page = page;
+ vp->usedpages = pages;
+ decrRefCount(val); /* Deallocate the object from memory. */
+ vmMarkPagesUsed(page,pages);
+ redisLog(REDIS_DEBUG,"VM: object %p swapped out at %lld (%lld pages)",
+ (void*) val,
+ (unsigned long long) page, (unsigned long long) pages);
+ server.vm_stats_swapped_objects++;
+ server.vm_stats_swapouts++;
+ return vp;
+}
+
+robj *vmReadObjectFromSwap(off_t page, int type) {
+ robj *o;
+
+ if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
+ if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
+ redisLog(REDIS_WARNING,
+ "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
+ strerror(errno));
+ _exit(1);
+ }
+ o = rdbLoadObject(type,server.vm_fp);
+ if (o == NULL) {
+ redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno));
+ _exit(1);
+ }
+ if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
+ return o;
+}
+
+/* Load the specified object from swap to memory.
+ * The newly allocated object is returned.
+ *
+ * If preview is true the unserialized object is returned to the caller but
+ * the pages are not marked as freed, nor the vp object is freed. */
+robj *vmGenericLoadObject(vmpointer *vp, int preview) {
+ robj *val;
+
+ redisAssert(vp->type == REDIS_VMPOINTER &&
+ (vp->storage == REDIS_VM_SWAPPED || vp->storage == REDIS_VM_LOADING));
+ val = vmReadObjectFromSwap(vp->page,vp->vtype);
+ if (!preview) {
+ redisLog(REDIS_DEBUG, "VM: object %p loaded from disk", (void*)vp);
+ vmMarkPagesFree(vp->page,vp->usedpages);
+ zfree(vp);
+ server.vm_stats_swapped_objects--;
+ } else {
+ redisLog(REDIS_DEBUG, "VM: object %p previewed from disk", (void*)vp);
+ }
+ server.vm_stats_swapins++;
+ return val;
+}
+
+/* Plain object loading, from swap to memory.
+ *
+ * 'o' is actually a redisVmPointer structure that will be freed by the call.
+ * The return value is the loaded object. */
+robj *vmLoadObject(robj *o) {
+ /* If we are loading the object in background, stop it, we
+ * need to load this object synchronously ASAP. */
+ if (o->storage == REDIS_VM_LOADING)
+ vmCancelThreadedIOJob(o);
+ return vmGenericLoadObject((vmpointer*)o,0);
+}
+
+/* Just load the value on disk, without to modify the key.
+ * This is useful when we want to perform some operation on the value
+ * without to really bring it from swap to memory, like while saving the
+ * dataset or rewriting the append only log. */
+robj *vmPreviewObject(robj *o) {
+ return vmGenericLoadObject((vmpointer*)o,1);
+}
+
+/* How a good candidate is this object for swapping?
+ * The better candidate it is, the greater the returned value.
+ *
+ * Currently we try to perform a fast estimation of the object size in
+ * memory, and combine it with aging informations.
+ *
+ * Basically swappability = idle-time * log(estimated size)
+ *
+ * Bigger objects are preferred over smaller objects, but not
+ * proportionally, this is why we use the logarithm. This algorithm is
+ * just a first try and will probably be tuned later. */
+double computeObjectSwappability(robj *o) {
+ /* actual age can be >= minage, but not < minage. As we use wrapping
+ * 21 bit clocks with minutes resolution for the LRU. */
+ time_t minage = abs(server.lruclock - o->lru);
+ long asize = 0, elesize;
+ robj *ele;
+ list *l;
+ listNode *ln;
+ dict *d;
+ struct dictEntry *de;
+ int z;
+
+ if (minage <= 0) return 0;
+ switch(o->type) {
+ case REDIS_STRING:
+ if (o->encoding != REDIS_ENCODING_RAW) {
+ asize = sizeof(*o);
+ } else {
+ asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
+ }
+ break;
+ case REDIS_LIST:
+ if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+ziplistSize(o->ptr);
+ } else {
+ l = o->ptr;
+ ln = listFirst(l);
+ asize = sizeof(list);
+ if (ln) {
+ ele = ln->value;
+ elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
+ (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
+ asize += (sizeof(listNode)+elesize)*listLength(l);
+ }
+ }
+ break;
+ case REDIS_SET:
+ case REDIS_ZSET:
+ z = (o->type == REDIS_ZSET);
+ d = z ? ((zset*)o->ptr)->dict : o->ptr;
+
+ asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ if (z) asize += sizeof(zset)-sizeof(dict);
+ if (dictSize(d)) {
+ de = dictGetRandomKey(d);
+ ele = dictGetEntryKey(de);
+ elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
+ (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
+ asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
+ if (z) asize += sizeof(zskiplistNode)*dictSize(d);
+ }
+ break;
+ case REDIS_HASH:
+ if (o->encoding == REDIS_ENCODING_ZIPMAP) {
+ unsigned char *p = zipmapRewind((unsigned char*)o->ptr);
+ unsigned int len = zipmapLen((unsigned char*)o->ptr);
+ unsigned int klen, vlen;
+ unsigned char *key, *val;
+
+ if ((p = zipmapNext(p,&key,&klen,&val,&vlen)) == NULL) {
+ klen = 0;
+ vlen = 0;
+ }
+ asize = len*(klen+vlen+3);
+ } else if (o->encoding == REDIS_ENCODING_HT) {
+ d = o->ptr;
+ asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ if (dictSize(d)) {
+ de = dictGetRandomKey(d);
+ ele = dictGetEntryKey(de);
+ elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
+ (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
+ ele = dictGetEntryVal(de);
+ elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
+ (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
+ asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
+ }
+ }
+ break;
+ }
+ return (double)minage*log(1+asize);
+}
+
+/* Try to swap an object that's a good candidate for swapping.
+ * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
+ * to swap any object at all.
+ *
+ * If 'usethreaded' is true, Redis will try to swap the object in background
+ * using I/O threads. */
+int vmSwapOneObject(int usethreads) {
+ int j, i;
+ struct dictEntry *best = NULL;
+ double best_swappability = 0;
+ redisDb *best_db = NULL;
+ robj *val;
+ sds key;
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+ /* Why maxtries is set to 100?
+ * Because this way (usually) we'll find 1 object even if just 1% - 2%
+ * are swappable objects */
+ int maxtries = 100;
+
+ if (dictSize(db->dict) == 0) continue;
+ for (i = 0; i < 5; i++) {
+ dictEntry *de;
+ double swappability;
+
+ if (maxtries) maxtries--;
+ de = dictGetRandomKey(db->dict);
+ val = dictGetEntryVal(de);
+ /* Only swap objects that are currently in memory.
+ *
+ * Also don't swap shared objects: not a good idea in general and
+ * we need to ensure that the main thread does not touch the
+ * object while the I/O thread is using it, but we can't
+ * control other keys without adding additional mutex. */
+ if (val->storage != REDIS_VM_MEMORY || val->refcount != 1) {
+ if (maxtries) i--; /* don't count this try */
+ continue;
+ }
+ swappability = computeObjectSwappability(val);
+ if (!best || swappability > best_swappability) {
+ best = de;
+ best_swappability = swappability;
+ best_db = db;
+ }
+ }
+ }
+ if (best == NULL) return REDIS_ERR;
+ key = dictGetEntryKey(best);
+ val = dictGetEntryVal(best);
+
+ redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f",
+ key, best_swappability);
+
+ /* Swap it */
+ if (usethreads) {
+ robj *keyobj = createStringObject(key,sdslen(key));
+ vmSwapObjectThreaded(keyobj,val,best_db);
+ decrRefCount(keyobj);
+ return REDIS_OK;
+ } else {
+ vmpointer *vp;
+
+ if ((vp = vmSwapObjectBlocking(val)) != NULL) {
+ dictGetEntryVal(best) = vp;
+ return REDIS_OK;
+ } else {
+ return REDIS_ERR;
+ }
+ }
+}
+
+int vmSwapOneObjectBlocking() {
+ return vmSwapOneObject(0);
+}
+
+int vmSwapOneObjectThreaded() {
+ return vmSwapOneObject(1);
+}
+
+/* Return true if it's safe to swap out objects in a given moment.
+ * Basically we don't want to swap objects out while there is a BGSAVE
+ * or a BGAEOREWRITE running in backgroud. */
+int vmCanSwapOut(void) {
+ return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1);
+}
+
+/* =================== Virtual Memory - Threaded I/O ======================= */
+
+void freeIOJob(iojob *j) {
+ if ((j->type == REDIS_IOJOB_PREPARE_SWAP ||
+ j->type == REDIS_IOJOB_DO_SWAP ||
+ j->type == REDIS_IOJOB_LOAD) && j->val != NULL)
+ {
+ /* we fix the storage type, otherwise decrRefCount() will try to
+ * kill the I/O thread Job (that does no longer exists). */
+ if (j->val->storage == REDIS_VM_SWAPPING)
+ j->val->storage = REDIS_VM_MEMORY;
+ decrRefCount(j->val);
+ }
+ decrRefCount(j->key);
+ zfree(j);
+}
+
+/* Every time a thread finished a Job, it writes a byte into the write side
+ * of an unix pipe in order to "awake" the main thread, and this function
+ * is called. */
+void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
+ int mask)
+{
+ char buf[1];
+ int retval, processed = 0, toprocess = -1, trytoswap = 1;
+ REDIS_NOTUSED(el);
+ REDIS_NOTUSED(mask);
+ REDIS_NOTUSED(privdata);
+
+ /* For every byte we read in the read side of the pipe, there is one
+ * I/O job completed to process. */
+ while((retval = read(fd,buf,1)) == 1) {
+ iojob *j;
+ listNode *ln;
+ struct dictEntry *de;
+
+ redisLog(REDIS_DEBUG,"Processing I/O completed job");
+
+ /* Get the processed element (the oldest one) */
+ lockThreadedIO();
+ redisAssert(listLength(server.io_processed) != 0);
+ if (toprocess == -1) {
+ toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100;
+ if (toprocess <= 0) toprocess = 1;
+ }
+ ln = listFirst(server.io_processed);
+ j = ln->value;
+ listDelNode(server.io_processed,ln);
+ unlockThreadedIO();
+ /* If this job is marked as canceled, just ignore it */
+ if (j->canceled) {
+ freeIOJob(j);
+ continue;
+ }
+ /* Post process it in the main thread, as there are things we
+ * can do just here to avoid race conditions and/or invasive locks */
+ redisLog(REDIS_DEBUG,"COMPLETED Job type: %d, ID %p, key: %s", j->type, (void*)j->id, (unsigned char*)j->key->ptr);
+ de = dictFind(j->db->dict,j->key->ptr);
+ redisAssert(de != NULL);
+ if (j->type == REDIS_IOJOB_LOAD) {
+ redisDb *db;
+ vmpointer *vp = dictGetEntryVal(de);
+
+ /* Key loaded, bring it at home */
+ vmMarkPagesFree(vp->page,vp->usedpages);
+ redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)",
+ (unsigned char*) j->key->ptr);
+ server.vm_stats_swapped_objects--;
+ server.vm_stats_swapins++;
+ dictGetEntryVal(de) = j->val;
+ incrRefCount(j->val);
+ db = j->db;
+ /* Handle clients waiting for this key to be loaded. */
+ handleClientsBlockedOnSwappedKey(db,j->key);
+ freeIOJob(j);
+ zfree(vp);
+ } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
+ /* Now we know the amount of pages required to swap this object.
+ * Let's find some space for it, and queue this task again
+ * rebranded as REDIS_IOJOB_DO_SWAP. */
+ if (!vmCanSwapOut() ||
+ vmFindContiguousPages(&j->page,j->pages) == REDIS_ERR)
+ {
+ /* Ooops... no space or we can't swap as there is
+ * a fork()ed Redis trying to save stuff on disk. */
+ j->val->storage = REDIS_VM_MEMORY; /* undo operation */
+ freeIOJob(j);
+ } else {
+ /* Note that we need to mark this pages as used now,
+ * if the job will be canceled, we'll mark them as freed
+ * again. */
+ vmMarkPagesUsed(j->page,j->pages);
+ j->type = REDIS_IOJOB_DO_SWAP;
+ lockThreadedIO();
+ queueIOJob(j);
+ unlockThreadedIO();
+ }
+ } else if (j->type == REDIS_IOJOB_DO_SWAP) {
+ vmpointer *vp;
+
+ /* Key swapped. We can finally free some memory. */
+ if (j->val->storage != REDIS_VM_SWAPPING) {
+ vmpointer *vp = (vmpointer*) j->id;
+ printf("storage: %d\n",vp->storage);
+ printf("key->name: %s\n",(char*)j->key->ptr);
+ printf("val: %p\n",(void*)j->val);
+ printf("val->type: %d\n",j->val->type);
+ printf("val->ptr: %s\n",(char*)j->val->ptr);
+ }
+ redisAssert(j->val->storage == REDIS_VM_SWAPPING);
+ vp = createVmPointer(j->val->type);
+ vp->page = j->page;
+ vp->usedpages = j->pages;
+ dictGetEntryVal(de) = vp;
+ /* Fix the storage otherwise decrRefCount will attempt to
+ * remove the associated I/O job */
+ j->val->storage = REDIS_VM_MEMORY;
+ decrRefCount(j->val);
+ redisLog(REDIS_DEBUG,
+ "VM: object %s swapped out at %lld (%lld pages) (threaded)",
+ (unsigned char*) j->key->ptr,
+ (unsigned long long) j->page, (unsigned long long) j->pages);
+ server.vm_stats_swapped_objects++;
+ server.vm_stats_swapouts++;
+ freeIOJob(j);
+ /* Put a few more swap requests in queue if we are still
+ * out of memory */
+ if (trytoswap && vmCanSwapOut() &&
+ zmalloc_used_memory() > server.vm_max_memory)
+ {
+ int more = 1;
+ while(more) {
+ lockThreadedIO();
+ more = listLength(server.io_newjobs) <
+ (unsigned) server.vm_max_threads;
+ unlockThreadedIO();
+ /* Don't waste CPU time if swappable objects are rare. */
+ if (vmSwapOneObjectThreaded() == REDIS_ERR) {
+ trytoswap = 0;
+ break;
+ }
+ }
+ }
+ }
+ processed++;
+ if (processed == toprocess) return;
+ }
+ if (retval < 0 && errno != EAGAIN) {
+ redisLog(REDIS_WARNING,
+ "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
+ strerror(errno));
+ }
+}
+
+void lockThreadedIO(void) {
+ pthread_mutex_lock(&server.io_mutex);
+}
+
+void unlockThreadedIO(void) {
+ pthread_mutex_unlock(&server.io_mutex);
+}
+
+/* Remove the specified object from the threaded I/O queue if still not
+ * processed, otherwise make sure to flag it as canceled. */
+void vmCancelThreadedIOJob(robj *o) {
+ list *lists[3] = {
+ server.io_newjobs, /* 0 */
+ server.io_processing, /* 1 */
+ server.io_processed /* 2 */
+ };
+ int i;
+
+ redisAssert(o->storage == REDIS_VM_LOADING || o->storage == REDIS_VM_SWAPPING);
+again:
+ lockThreadedIO();
+ /* Search for a matching object in one of the queues */
+ for (i = 0; i < 3; i++) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(lists[i],&li);
+ while ((ln = listNext(&li)) != NULL) {
+ iojob *job = ln->value;
+
+ if (job->canceled) continue; /* Skip this, already canceled. */
+ if (job->id == o) {
+ redisLog(REDIS_DEBUG,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
+ (void*)job, (char*)job->key->ptr, job->type, i);
+ /* Mark the pages as free since the swap didn't happened
+ * or happened but is now discarded. */
+ if (i != 1 && job->type == REDIS_IOJOB_DO_SWAP)
+ vmMarkPagesFree(job->page,job->pages);
+ /* Cancel the job. It depends on the list the job is
+ * living in. */
+ switch(i) {
+ case 0: /* io_newjobs */
+ /* If the job was yet not processed the best thing to do
+ * is to remove it from the queue at all */
+ freeIOJob(job);
+ listDelNode(lists[i],ln);
+ break;
+ case 1: /* io_processing */
+ /* Oh Shi- the thread is messing with the Job:
+ *
+ * Probably it's accessing the object if this is a
+ * PREPARE_SWAP or DO_SWAP job.
+ * If it's a LOAD job it may be reading from disk and
+ * if we don't wait for the job to terminate before to
+ * cancel it, maybe in a few microseconds data can be
+ * corrupted in this pages. So the short story is:
+ *
+ * Better to wait for the job to move into the
+ * next queue (processed)... */
+
+ /* We try again and again until the job is completed. */
+ unlockThreadedIO();
+ /* But let's wait some time for the I/O thread
+ * to finish with this job. After all this condition
+ * should be very rare. */
+ usleep(1);
+ goto again;
+ case 2: /* io_processed */
+ /* The job was already processed, that's easy...
+ * just mark it as canceled so that we'll ignore it
+ * when processing completed jobs. */
+ job->canceled = 1;
+ break;
+ }
+ /* Finally we have to adjust the storage type of the object
+ * in order to "UNDO" the operaiton. */
+ if (o->storage == REDIS_VM_LOADING)
+ o->storage = REDIS_VM_SWAPPED;
+ else if (o->storage == REDIS_VM_SWAPPING)
+ o->storage = REDIS_VM_MEMORY;
+ unlockThreadedIO();
+ redisLog(REDIS_DEBUG,"*** DONE");
+ return;
+ }
+ }
+ }
+ unlockThreadedIO();
+ printf("Not found: %p\n", (void*)o);
+ redisAssert(1 != 1); /* We should never reach this */
+}
+
+void *IOThreadEntryPoint(void *arg) {
+ iojob *j;
+ listNode *ln;
+ REDIS_NOTUSED(arg);
+
+ pthread_detach(pthread_self());
+ while(1) {
+ /* Get a new job to process */
+ lockThreadedIO();
+ if (listLength(server.io_newjobs) == 0) {
+ /* No new jobs in queue, exit. */
+ redisLog(REDIS_DEBUG,"Thread %ld exiting, nothing to do",
+ (long) pthread_self());
+ server.io_active_threads--;
+ unlockThreadedIO();
+ return NULL;
+ }
+ ln = listFirst(server.io_newjobs);
+ j = ln->value;
+ listDelNode(server.io_newjobs,ln);
+ /* Add the job in the processing queue */
+ j->thread = pthread_self();
+ listAddNodeTail(server.io_processing,j);
+ ln = listLast(server.io_processing); /* We use ln later to remove it */
+ unlockThreadedIO();
+ redisLog(REDIS_DEBUG,"Thread %ld got a new job (type %d): %p about key '%s'",
+ (long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr);
+
+ /* Process the Job */
+ if (j->type == REDIS_IOJOB_LOAD) {
+ vmpointer *vp = (vmpointer*)j->id;
+ j->val = vmReadObjectFromSwap(j->page,vp->vtype);
+ } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
+ FILE *fp = fopen("/dev/null","w+");
+ j->pages = rdbSavedObjectPages(j->val,fp);
+ fclose(fp);
+ } else if (j->type == REDIS_IOJOB_DO_SWAP) {
+ if (vmWriteObjectOnSwap(j->val,j->page) == REDIS_ERR)
+ j->canceled = 1;
+ }
+
+ /* Done: insert the job into the processed queue */
+ redisLog(REDIS_DEBUG,"Thread %ld completed the job: %p (key %s)",
+ (long) pthread_self(), (void*)j, (char*)j->key->ptr);
+ lockThreadedIO();
+ listDelNode(server.io_processing,ln);
+ listAddNodeTail(server.io_processed,j);
+ unlockThreadedIO();
+
+ /* Signal the main thread there is new stuff to process */
+ redisAssert(write(server.io_ready_pipe_write,"x",1) == 1);
+ }
+ return NULL; /* never reached */
+}
+
+void spawnIOThread(void) {
+ pthread_t thread;
+ sigset_t mask, omask;
+ int err;
+
+ sigemptyset(&mask);
+ sigaddset(&mask,SIGCHLD);
+ sigaddset(&mask,SIGHUP);
+ sigaddset(&mask,SIGPIPE);
+ pthread_sigmask(SIG_SETMASK, &mask, &omask);
+ while ((err = pthread_create(&thread,&server.io_threads_attr,IOThreadEntryPoint,NULL)) != 0) {
+ redisLog(REDIS_WARNING,"Unable to spawn an I/O thread: %s",
+ strerror(err));
+ usleep(1000000);
+ }
+ pthread_sigmask(SIG_SETMASK, &omask, NULL);
+ server.io_active_threads++;
+}
+
+/* We need to wait for the last thread to exit before we are able to
+ * fork() in order to BGSAVE or BGREWRITEAOF. */
+void waitEmptyIOJobsQueue(void) {
+ while(1) {
+ int io_processed_len;
+
+ lockThreadedIO();
+ if (listLength(server.io_newjobs) == 0 &&
+ listLength(server.io_processing) == 0 &&
+ server.io_active_threads == 0)
+ {
+ unlockThreadedIO();
+ return;
+ }
+ /* While waiting for empty jobs queue condition we post-process some
+ * finshed job, as I/O threads may be hanging trying to write against
+ * the io_ready_pipe_write FD but there are so much pending jobs that
+ * it's blocking. */
+ io_processed_len = listLength(server.io_processed);
+ unlockThreadedIO();
+ if (io_processed_len) {
+ vmThreadedIOCompletedJob(NULL,server.io_ready_pipe_read,NULL,0);
+ usleep(1000); /* 1 millisecond */
+ } else {
+ usleep(10000); /* 10 milliseconds */
+ }
+ }
+}
+
+void vmReopenSwapFile(void) {
+ /* Note: we don't close the old one as we are in the child process
+ * and don't want to mess at all with the original file object. */
+ server.vm_fp = fopen(server.vm_swap_file,"r+b");
+ if (server.vm_fp == NULL) {
+ redisLog(REDIS_WARNING,"Can't re-open the VM swap file: %s. Exiting.",
+ server.vm_swap_file);
+ _exit(1);
+ }
+ server.vm_fd = fileno(server.vm_fp);
+}
+
+/* This function must be called while with threaded IO locked */
+void queueIOJob(iojob *j) {
+ redisLog(REDIS_DEBUG,"Queued IO Job %p type %d about key '%s'\n",
+ (void*)j, j->type, (char*)j->key->ptr);
+ listAddNodeTail(server.io_newjobs,j);
+ if (server.io_active_threads < server.vm_max_threads)
+ spawnIOThread();
+}
+
+int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db) {
+ iojob *j;
+
+ j = zmalloc(sizeof(*j));
+ j->type = REDIS_IOJOB_PREPARE_SWAP;
+ j->db = db;
+ j->key = key;
+ incrRefCount(key);
+ j->id = j->val = val;
+ incrRefCount(val);
+ j->canceled = 0;
+ j->thread = (pthread_t) -1;
+ val->storage = REDIS_VM_SWAPPING;
+
+ lockThreadedIO();
+ queueIOJob(j);
+ unlockThreadedIO();
+ return REDIS_OK;
+}
+
+/* ============ Virtual Memory - Blocking clients on missing keys =========== */
+
+/* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
+ * If there is not already a job loading the key, it is craeted.
+ * The key is added to the io_keys list in the client structure, and also
+ * in the hash table mapping swapped keys to waiting clients, that is,
+ * server.io_waited_keys. */
+int waitForSwappedKey(redisClient *c, robj *key) {
+ struct dictEntry *de;
+ robj *o;
+ list *l;
+
+ /* If the key does not exist or is already in RAM we don't need to
+ * block the client at all. */
+ de = dictFind(c->db->dict,key->ptr);
+ if (de == NULL) return 0;
+ o = dictGetEntryVal(de);
+ if (o->storage == REDIS_VM_MEMORY) {
+ return 0;
+ } else if (o->storage == REDIS_VM_SWAPPING) {
+ /* We were swapping the key, undo it! */
+ vmCancelThreadedIOJob(o);
+ return 0;
+ }
+
+ /* OK: the key is either swapped, or being loaded just now. */
+
+ /* Add the key to the list of keys this client is waiting for.
+ * This maps clients to keys they are waiting for. */
+ listAddNodeTail(c->io_keys,key);
+ incrRefCount(key);
+
+ /* Add the client to the swapped keys => clients waiting map. */
+ de = dictFind(c->db->io_keys,key);
+ if (de == NULL) {
+ int retval;
+
+ /* For every key we take a list of clients blocked for it */
+ l = listCreate();
+ retval = dictAdd(c->db->io_keys,key,l);
+ incrRefCount(key);
+ redisAssert(retval == DICT_OK);
+ } else {
+ l = dictGetEntryVal(de);
+ }
+ listAddNodeTail(l,c);
+
+ /* Are we already loading the key from disk? If not create a job */
+ if (o->storage == REDIS_VM_SWAPPED) {
+ iojob *j;
+ vmpointer *vp = (vmpointer*)o;
+
+ o->storage = REDIS_VM_LOADING;
+ j = zmalloc(sizeof(*j));
+ j->type = REDIS_IOJOB_LOAD;
+ j->db = c->db;
+ j->id = (robj*)vp;
+ j->key = key;
+ incrRefCount(key);
+ j->page = vp->page;
+ j->val = NULL;
+ j->canceled = 0;
+ j->thread = (pthread_t) -1;
+ lockThreadedIO();
+ queueIOJob(j);
+ unlockThreadedIO();
+ }
+ return 1;
+}
+
+/* Preload keys for any command with first, last and step values for
+ * the command keys prototype, as defined in the command table. */
+void waitForMultipleSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
+ int j, last;
+ if (cmd->vm_firstkey == 0) return;
+ last = cmd->vm_lastkey;
+ if (last < 0) last = argc+last;
+ for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) {
+ redisAssert(j < argc);
+ waitForSwappedKey(c,argv[j]);
+ }
+}
+
+/* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
+ * Note that the number of keys to preload is user-defined, so we need to
+ * apply a sanity check against argc. */
+void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
+ int i, num;
+ REDIS_NOTUSED(cmd);
+
+ num = atoi(argv[2]->ptr);
+ if (num > (argc-3)) return;
+ for (i = 0; i < num; i++) {
+ waitForSwappedKey(c,argv[3+i]);
+ }
+}
+
+/* Preload keys needed to execute the entire MULTI/EXEC block.
+ *
+ * This function is called by blockClientOnSwappedKeys when EXEC is issued,
+ * and will block the client when any command requires a swapped out value. */
+void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
+ int i, margc;
+ struct redisCommand *mcmd;
+ robj **margv;
+ REDIS_NOTUSED(cmd);
+ REDIS_NOTUSED(argc);
+ REDIS_NOTUSED(argv);
+
+ if (!(c->flags & REDIS_MULTI)) return;
+ for (i = 0; i < c->mstate.count; i++) {
+ mcmd = c->mstate.commands[i].cmd;
+ margc = c->mstate.commands[i].argc;
+ margv = c->mstate.commands[i].argv;
+
+ if (mcmd->vm_preload_proc != NULL) {
+ mcmd->vm_preload_proc(c,mcmd,margc,margv);
+ } else {
+ waitForMultipleSwappedKeys(c,mcmd,margc,margv);
+ }
+ }
+}
+
+/* Is this client attempting to run a command against swapped keys?
+ * If so, block it ASAP, load the keys in background, then resume it.
+ *
+ * The important idea about this function is that it can fail! If keys will
+ * still be swapped when the client is resumed, this key lookups will
+ * just block loading keys from disk. In practical terms this should only
+ * happen with SORT BY command or if there is a bug in this function.
+ *
+ * Return 1 if the client is marked as blocked, 0 if the client can
+ * continue as the keys it is going to access appear to be in memory. */
+int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd) {
+ if (cmd->vm_preload_proc != NULL) {
+ cmd->vm_preload_proc(c,cmd,c->argc,c->argv);
+ } else {
+ waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv);
+ }
+
+ /* If the client was blocked for at least one key, mark it as blocked. */
+ if (listLength(c->io_keys)) {
+ c->flags |= REDIS_IO_WAIT;
+ aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ server.vm_blocked_clients++;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Remove the 'key' from the list of blocked keys for a given client.
+ *
+ * The function returns 1 when there are no longer blocking keys after
+ * the current one was removed (and the client can be unblocked). */
+int dontWaitForSwappedKey(redisClient *c, robj *key) {
+ list *l;
+ listNode *ln;
+ listIter li;
+ struct dictEntry *de;
+
+ /* Remove the key from the list of keys this client is waiting for. */
+ listRewind(c->io_keys,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ if (equalStringObjects(ln->value,key)) {
+ listDelNode(c->io_keys,ln);
+ break;
+ }
+ }
+ redisAssert(ln != NULL);
+
+ /* Remove the client form the key => waiting clients map. */
+ de = dictFind(c->db->io_keys,key);
+ redisAssert(de != NULL);
+ l = dictGetEntryVal(de);
+ ln = listSearchKey(l,c);
+ redisAssert(ln != NULL);
+ listDelNode(l,ln);
+ if (listLength(l) == 0)
+ dictDelete(c->db->io_keys,key);
+
+ return listLength(c->io_keys) == 0;
+}
+
+/* Every time we now a key was loaded back in memory, we handle clients
+ * waiting for this key if any. */
+void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) {
+ struct dictEntry *de;
+ list *l;
+ listNode *ln;
+ int len;
+
+ de = dictFind(db->io_keys,key);
+ if (!de) return;
+
+ l = dictGetEntryVal(de);
+ len = listLength(l);
+ /* Note: we can't use something like while(listLength(l)) as the list
+ * can be freed by the calling function when we remove the last element. */
+ while (len--) {
+ ln = listFirst(l);
+ redisClient *c = ln->value;
+
+ if (dontWaitForSwappedKey(c,key)) {
+ /* Put the client in the list of clients ready to go as we
+ * loaded all the keys about it. */
+ listAddNodeTail(server.io_ready_clients,c);
+ }
+ }
+}
diff --git a/ziplist.c b/src/ziplist.c
index 4b9d0fadc..4b9d0fadc 100644
--- a/ziplist.c
+++ b/src/ziplist.c
diff --git a/ziplist.h b/src/ziplist.h
index 311257256..311257256 100644
--- a/ziplist.h
+++ b/src/ziplist.h
diff --git a/zipmap.c b/src/zipmap.c
index 35faeabef..35faeabef 100644
--- a/zipmap.c
+++ b/src/zipmap.c
diff --git a/zipmap.h b/src/zipmap.h
index e5f6c9f28..e5f6c9f28 100644
--- a/zipmap.h
+++ b/src/zipmap.h
diff --git a/zmalloc.c b/src/zmalloc.c
index 8658376a3..8658376a3 100644
--- a/zmalloc.c
+++ b/src/zmalloc.c
diff --git a/zmalloc.h b/src/zmalloc.h
index 193e7eda5..193e7eda5 100644
--- a/zmalloc.h
+++ b/src/zmalloc.h
diff --git a/staticsymbols.h b/staticsymbols.h
deleted file mode 100644
index 0bf3723e6..000000000
--- a/staticsymbols.h
+++ /dev/null
@@ -1,374 +0,0 @@
-static struct redisFunctionSym symsTable[] = {
-{"IOThreadEntryPoint",(unsigned long)IOThreadEntryPoint},
-{"_redisAssert",(unsigned long)_redisAssert},
-{"_redisPanic",(unsigned long)_redisPanic},
-{"acceptHandler",(unsigned long)acceptHandler},
-{"addReply",(unsigned long)addReply},
-{"addReplyBulk",(unsigned long)addReplyBulk},
-{"addReplyBulkCString",(unsigned long)addReplyBulkCString},
-{"addReplyBulkLen",(unsigned long)addReplyBulkLen},
-{"addReplyBulkSds",(unsigned long)addReplyBulkSds},
-{"addReplyDouble",(unsigned long)addReplyDouble},
-{"addReplyLongLong",(unsigned long)addReplyLongLong},
-{"addReplySds",(unsigned long)addReplySds},
-{"addReplyUlong",(unsigned long)addReplyUlong},
-{"aofRemoveTempFile",(unsigned long)aofRemoveTempFile},
-{"appendCommand",(unsigned long)appendCommand},
-{"appendServerSaveParams",(unsigned long)appendServerSaveParams},
-{"authCommand",(unsigned long)authCommand},
-{"beforeSleep",(unsigned long)beforeSleep},
-{"bgrewriteaofCommand",(unsigned long)bgrewriteaofCommand},
-{"bgsaveCommand",(unsigned long)bgsaveCommand},
-{"blockClientOnSwappedKeys",(unsigned long)blockClientOnSwappedKeys},
-{"blockForKeys",(unsigned long)blockForKeys},
-{"blockingPopGenericCommand",(unsigned long)blockingPopGenericCommand},
-{"blpopCommand",(unsigned long)blpopCommand},
-{"brpopCommand",(unsigned long)brpopCommand},
-{"bytesToHuman",(unsigned long)bytesToHuman},
-{"call",(unsigned long)call},
-{"catAppendOnlyExpireAtCommand",(unsigned long)catAppendOnlyExpireAtCommand},
-{"catAppendOnlyGenericCommand",(unsigned long)catAppendOnlyGenericCommand},
-{"checkType",(unsigned long)checkType},
-{"closeTimedoutClients",(unsigned long)closeTimedoutClients},
-{"compareStringObjects",(unsigned long)compareStringObjects},
-{"computeDatasetDigest",(unsigned long)computeDatasetDigest},
-{"computeObjectSwappability",(unsigned long)computeObjectSwappability},
-{"configCommand",(unsigned long)configCommand},
-{"configGetCommand",(unsigned long)configGetCommand},
-{"configSetCommand",(unsigned long)configSetCommand},
-{"convertToRealHash",(unsigned long)convertToRealHash},
-{"createClient",(unsigned long)createClient},
-{"createHashObject",(unsigned long)createHashObject},
-{"createListObject",(unsigned long)createListObject},
-{"createObject",(unsigned long)createObject},
-{"createSetObject",(unsigned long)createSetObject},
-{"createSharedObjects",(unsigned long)createSharedObjects},
-{"createSortOperation",(unsigned long)createSortOperation},
-{"createStringObject",(unsigned long)createStringObject},
-{"createStringObjectFromLongLong",(unsigned long)createStringObjectFromLongLong},
-{"createVmPointer",(unsigned long)createVmPointer},
-{"createZsetObject",(unsigned long)createZsetObject},
-{"daemonize",(unsigned long)daemonize},
-{"dbAdd",(unsigned long)dbAdd},
-{"dbDelete",(unsigned long)dbDelete},
-{"dbExists",(unsigned long)dbExists},
-{"dbRandomKey",(unsigned long)dbRandomKey},
-{"dbReplace",(unsigned long)dbReplace},
-{"dbsizeCommand",(unsigned long)dbsizeCommand},
-{"debugCommand",(unsigned long)debugCommand},
-{"decrCommand",(unsigned long)decrCommand},
-{"decrRefCount",(unsigned long)decrRefCount},
-{"decrbyCommand",(unsigned long)decrbyCommand},
-{"delCommand",(unsigned long)delCommand},
-{"deleteIfVolatile",(unsigned long)deleteIfVolatile},
-{"dictEncObjKeyCompare",(unsigned long)dictEncObjKeyCompare},
-{"dictListDestructor",(unsigned long)dictListDestructor},
-{"dictObjKeyCompare",(unsigned long)dictObjKeyCompare},
-{"dictRedisObjectDestructor",(unsigned long)dictRedisObjectDestructor},
-{"dictSdsDestructor",(unsigned long)dictSdsDestructor},
-{"dictSdsKeyCompare",(unsigned long)dictSdsKeyCompare},
-{"dictVanillaFree",(unsigned long)dictVanillaFree},
-{"discardCommand",(unsigned long)discardCommand},
-{"dontWaitForSwappedKey",(unsigned long)dontWaitForSwappedKey},
-{"dupClientReplyValue",(unsigned long)dupClientReplyValue},
-{"dupStringObject",(unsigned long)dupStringObject},
-{"echoCommand",(unsigned long)echoCommand},
-{"equalStringObjects",(unsigned long)equalStringObjects},
-{"execBlockClientOnSwappedKeys",(unsigned long)execBlockClientOnSwappedKeys},
-{"execCommand",(unsigned long)execCommand},
-{"execCommandReplicateMulti",(unsigned long)execCommandReplicateMulti},
-{"existsCommand",(unsigned long)existsCommand},
-{"expireCommand",(unsigned long)expireCommand},
-{"expireGenericCommand",(unsigned long)expireGenericCommand},
-{"expireIfNeeded",(unsigned long)expireIfNeeded},
-{"expireatCommand",(unsigned long)expireatCommand},
-{"feedAppendOnlyFile",(unsigned long)feedAppendOnlyFile},
-{"findFuncName",(unsigned long)findFuncName},
-{"flushAppendOnlyFile",(unsigned long)flushAppendOnlyFile},
-{"flushallCommand",(unsigned long)flushallCommand},
-{"flushdbCommand",(unsigned long)flushdbCommand},
-{"freeClient",(unsigned long)freeClient},
-{"freeClientArgv",(unsigned long)freeClientArgv},
-{"freeClientMultiState",(unsigned long)freeClientMultiState},
-{"freeFakeClient",(unsigned long)freeFakeClient},
-{"freeHashObject",(unsigned long)freeHashObject},
-{"freeIOJob",(unsigned long)freeIOJob},
-{"freeListObject",(unsigned long)freeListObject},
-{"freeMemoryIfNeeded",(unsigned long)freeMemoryIfNeeded},
-{"freePubsubPattern",(unsigned long)freePubsubPattern},
-{"freeSetObject",(unsigned long)freeSetObject},
-{"freeStringObject",(unsigned long)freeStringObject},
-{"freeZsetObject",(unsigned long)freeZsetObject},
-{"fwriteBulkDouble",(unsigned long)fwriteBulkDouble},
-{"fwriteBulkLongLong",(unsigned long)fwriteBulkLongLong},
-{"fwriteBulkObject",(unsigned long)fwriteBulkObject},
-{"fwriteBulkString",(unsigned long)fwriteBulkString},
-{"genRedisInfoString",(unsigned long)genRedisInfoString},
-{"genericHgetallCommand",(unsigned long)genericHgetallCommand},
-{"genericZrangebyscoreCommand",(unsigned long)genericZrangebyscoreCommand},
-{"getCommand",(unsigned long)getCommand},
-{"getDecodedObject",(unsigned long)getDecodedObject},
-{"getDoubleFromObject",(unsigned long)getDoubleFromObject},
-{"getDoubleFromObjectOrReply",(unsigned long)getDoubleFromObjectOrReply},
-{"getExpire",(unsigned long)getExpire},
-{"getGenericCommand",(unsigned long)getGenericCommand},
-{"getLongFromObjectOrReply",(unsigned long)getLongFromObjectOrReply},
-{"getLongLongFromObject",(unsigned long)getLongLongFromObject},
-{"getLongLongFromObjectOrReply",(unsigned long)getLongLongFromObjectOrReply},
-{"getMcontextEip",(unsigned long)getMcontextEip},
-{"getsetCommand",(unsigned long)getsetCommand},
-{"glueReplyBuffersIfNeeded",(unsigned long)glueReplyBuffersIfNeeded},
-{"handleClientsBlockedOnSwappedKey",(unsigned long)handleClientsBlockedOnSwappedKey},
-{"handleClientsWaitingListPush",(unsigned long)handleClientsWaitingListPush},
-{"hashTypeCurrent",(unsigned long)hashTypeCurrent},
-{"hashTypeDelete",(unsigned long)hashTypeDelete},
-{"hashTypeExists",(unsigned long)hashTypeExists},
-{"hashTypeGet",(unsigned long)hashTypeGet},
-{"hashTypeInitIterator",(unsigned long)hashTypeInitIterator},
-{"hashTypeLookupWriteOrCreate",(unsigned long)hashTypeLookupWriteOrCreate},
-{"hashTypeNext",(unsigned long)hashTypeNext},
-{"hashTypeReleaseIterator",(unsigned long)hashTypeReleaseIterator},
-{"hashTypeSet",(unsigned long)hashTypeSet},
-{"hashTypeTryConversion",(unsigned long)hashTypeTryConversion},
-{"hashTypeTryObjectEncoding",(unsigned long)hashTypeTryObjectEncoding},
-{"hdelCommand",(unsigned long)hdelCommand},
-{"hexistsCommand",(unsigned long)hexistsCommand},
-{"hgetCommand",(unsigned long)hgetCommand},
-{"hgetallCommand",(unsigned long)hgetallCommand},
-{"hincrbyCommand",(unsigned long)hincrbyCommand},
-{"hkeysCommand",(unsigned long)hkeysCommand},
-{"hlenCommand",(unsigned long)hlenCommand},
-{"hmgetCommand",(unsigned long)hmgetCommand},
-{"hmsetCommand",(unsigned long)hmsetCommand},
-{"hsetCommand",(unsigned long)hsetCommand},
-{"hsetnxCommand",(unsigned long)hsetnxCommand},
-{"htNeedsResize",(unsigned long)htNeedsResize},
-{"hvalsCommand",(unsigned long)hvalsCommand},
-{"incrCommand",(unsigned long)incrCommand},
-{"incrDecrCommand",(unsigned long)incrDecrCommand},
-{"incrRefCount",(unsigned long)incrRefCount},
-{"incrbyCommand",(unsigned long)incrbyCommand},
-{"incrementallyRehash",(unsigned long)incrementallyRehash},
-{"infoCommand",(unsigned long)infoCommand},
-{"initClientMultiState",(unsigned long)initClientMultiState},
-{"initServer",(unsigned long)initServer},
-{"initServerConfig",(unsigned long)initServerConfig},
-{"isStringRepresentableAsLong",(unsigned long)isStringRepresentableAsLong},
-{"keysCommand",(unsigned long)keysCommand},
-{"lastsaveCommand",(unsigned long)lastsaveCommand},
-{"lindexCommand",(unsigned long)lindexCommand},
-{"listMatchObjects",(unsigned long)listMatchObjects},
-{"listMatchPubsubPattern",(unsigned long)listMatchPubsubPattern},
-{"ll2string",(unsigned long)ll2string},
-{"llenCommand",(unsigned long)llenCommand},
-{"loadServerConfig",(unsigned long)loadServerConfig},
-{"lockThreadedIO",(unsigned long)lockThreadedIO},
-{"lookupKey",(unsigned long)lookupKey},
-{"lookupKeyByPattern",(unsigned long)lookupKeyByPattern},
-{"lookupKeyRead",(unsigned long)lookupKeyRead},
-{"lookupKeyReadOrReply",(unsigned long)lookupKeyReadOrReply},
-{"lookupKeyWrite",(unsigned long)lookupKeyWrite},
-{"lookupKeyWriteOrReply",(unsigned long)lookupKeyWriteOrReply},
-{"lpopCommand",(unsigned long)lpopCommand},
-{"lpushCommand",(unsigned long)lpushCommand},
-{"lrangeCommand",(unsigned long)lrangeCommand},
-{"lremCommand",(unsigned long)lremCommand},
-{"lsetCommand",(unsigned long)lsetCommand},
-{"ltrimCommand",(unsigned long)ltrimCommand},
-{"mgetCommand",(unsigned long)mgetCommand},
-{"mixDigest",(unsigned long)mixDigest},
-{"mixObjectDigest",(unsigned long)mixObjectDigest},
-{"monitorCommand",(unsigned long)monitorCommand},
-{"moveCommand",(unsigned long)moveCommand},
-{"msetCommand",(unsigned long)msetCommand},
-{"msetGenericCommand",(unsigned long)msetGenericCommand},
-{"msetnxCommand",(unsigned long)msetnxCommand},
-{"multiCommand",(unsigned long)multiCommand},
-{"oom",(unsigned long)oom},
-{"pingCommand",(unsigned long)pingCommand},
-{"popGenericCommand",(unsigned long)popGenericCommand},
-{"prepareForShutdown",(unsigned long)prepareForShutdown},
-{"processCommand",(unsigned long)processCommand},
-{"processInputBuffer",(unsigned long)processInputBuffer},
-{"psubscribeCommand",(unsigned long)psubscribeCommand},
-{"publishCommand",(unsigned long)publishCommand},
-{"pubsubPublishMessage",(unsigned long)pubsubPublishMessage},
-{"pubsubSubscribeChannel",(unsigned long)pubsubSubscribeChannel},
-{"pubsubSubscribePattern",(unsigned long)pubsubSubscribePattern},
-{"pubsubUnsubscribeAllChannels",(unsigned long)pubsubUnsubscribeAllChannels},
-{"pubsubUnsubscribeAllPatterns",(unsigned long)pubsubUnsubscribeAllPatterns},
-{"pubsubUnsubscribeChannel",(unsigned long)pubsubUnsubscribeChannel},
-{"pubsubUnsubscribePattern",(unsigned long)pubsubUnsubscribePattern},
-{"punsubscribeCommand",(unsigned long)punsubscribeCommand},
-{"pushGenericCommand",(unsigned long)pushGenericCommand},
-{"qsortCompareSetsByCardinality",(unsigned long)qsortCompareSetsByCardinality},
-{"qsortCompareZsetopsrcByCardinality",(unsigned long)qsortCompareZsetopsrcByCardinality},
-{"qsortRedisCommands",(unsigned long)qsortRedisCommands},
-{"queueIOJob",(unsigned long)queueIOJob},
-{"queueMultiCommand",(unsigned long)queueMultiCommand},
-{"randomkeyCommand",(unsigned long)randomkeyCommand},
-{"rdbEncodeInteger",(unsigned long)rdbEncodeInteger},
-{"rdbGenericLoadStringObject",(unsigned long)rdbGenericLoadStringObject},
-{"rdbLoad",(unsigned long)rdbLoad},
-{"rdbLoadDoubleValue",(unsigned long)rdbLoadDoubleValue},
-{"rdbLoadEncodedStringObject",(unsigned long)rdbLoadEncodedStringObject},
-{"rdbLoadIntegerObject",(unsigned long)rdbLoadIntegerObject},
-{"rdbLoadLen",(unsigned long)rdbLoadLen},
-{"rdbLoadLzfStringObject",(unsigned long)rdbLoadLzfStringObject},
-{"rdbLoadObject",(unsigned long)rdbLoadObject},
-{"rdbLoadStringObject",(unsigned long)rdbLoadStringObject},
-{"rdbLoadTime",(unsigned long)rdbLoadTime},
-{"rdbLoadType",(unsigned long)rdbLoadType},
-{"rdbRemoveTempFile",(unsigned long)rdbRemoveTempFile},
-{"rdbSave",(unsigned long)rdbSave},
-{"rdbSaveBackground",(unsigned long)rdbSaveBackground},
-{"rdbSaveDoubleValue",(unsigned long)rdbSaveDoubleValue},
-{"rdbSaveLen",(unsigned long)rdbSaveLen},
-{"rdbSaveLzfStringObject",(unsigned long)rdbSaveLzfStringObject},
-{"rdbSaveObject",(unsigned long)rdbSaveObject},
-{"rdbSaveRawString",(unsigned long)rdbSaveRawString},
-{"rdbSaveStringObject",(unsigned long)rdbSaveStringObject},
-{"rdbSaveTime",(unsigned long)rdbSaveTime},
-{"rdbSaveType",(unsigned long)rdbSaveType},
-{"rdbSavedObjectLen",(unsigned long)rdbSavedObjectLen},
-{"rdbSavedObjectPages",(unsigned long)rdbSavedObjectPages},
-{"rdbTryIntegerEncoding",(unsigned long)rdbTryIntegerEncoding},
-{"readQueryFromClient",(unsigned long)readQueryFromClient},
-{"redisLog",(unsigned long)redisLog},
-{"removeExpire",(unsigned long)removeExpire},
-{"renameCommand",(unsigned long)renameCommand},
-{"renameGenericCommand",(unsigned long)renameGenericCommand},
-{"renamenxCommand",(unsigned long)renamenxCommand},
-{"replicationFeedMonitors",(unsigned long)replicationFeedMonitors},
-{"replicationFeedSlaves",(unsigned long)replicationFeedSlaves},
-{"resetClient",(unsigned long)resetClient},
-{"resetServerSaveParams",(unsigned long)resetServerSaveParams},
-{"rewriteAppendOnlyFile",(unsigned long)rewriteAppendOnlyFile},
-{"rewriteAppendOnlyFileBackground",(unsigned long)rewriteAppendOnlyFileBackground},
-{"rpopCommand",(unsigned long)rpopCommand},
-{"rpoplpushcommand",(unsigned long)rpoplpushcommand},
-{"rpushCommand",(unsigned long)rpushCommand},
-{"saddCommand",(unsigned long)saddCommand},
-{"saveCommand",(unsigned long)saveCommand},
-{"scardCommand",(unsigned long)scardCommand},
-{"sdiffCommand",(unsigned long)sdiffCommand},
-{"sdiffstoreCommand",(unsigned long)sdiffstoreCommand},
-{"sdscatrepr",(unsigned long)sdscatrepr},
-{"segvHandler",(unsigned long)segvHandler},
-{"selectCommand",(unsigned long)selectCommand},
-{"selectDb",(unsigned long)selectDb},
-{"sendBulkToSlave",(unsigned long)sendBulkToSlave},
-{"sendReplyToClient",(unsigned long)sendReplyToClient},
-{"sendReplyToClientWritev",(unsigned long)sendReplyToClientWritev},
-{"serverCron",(unsigned long)serverCron},
-{"setCommand",(unsigned long)setCommand},
-{"setExpire",(unsigned long)setExpire},
-{"setGenericCommand",(unsigned long)setGenericCommand},
-{"setexCommand",(unsigned long)setexCommand},
-{"setnxCommand",(unsigned long)setnxCommand},
-{"setupSigSegvAction",(unsigned long)setupSigSegvAction},
-{"shutdownCommand",(unsigned long)shutdownCommand},
-{"sigtermHandler",(unsigned long)sigtermHandler},
-{"sinterCommand",(unsigned long)sinterCommand},
-{"sinterGenericCommand",(unsigned long)sinterGenericCommand},
-{"sinterstoreCommand",(unsigned long)sinterstoreCommand},
-{"sismemberCommand",(unsigned long)sismemberCommand},
-{"slaveofCommand",(unsigned long)slaveofCommand},
-{"smoveCommand",(unsigned long)smoveCommand},
-{"sortCommand",(unsigned long)sortCommand},
-{"sortCommandTable",(unsigned long)sortCommandTable},
-{"sortCompare",(unsigned long)sortCompare},
-{"spawnIOThread",(unsigned long)spawnIOThread},
-{"spopCommand",(unsigned long)spopCommand},
-{"srandmemberCommand",(unsigned long)srandmemberCommand},
-{"sremCommand",(unsigned long)sremCommand},
-{"startAppendOnly",(unsigned long)startAppendOnly},
-{"stopAppendOnly",(unsigned long)stopAppendOnly},
-{"stringObjectLen",(unsigned long)stringObjectLen},
-{"stringmatch",(unsigned long)stringmatch},
-{"stringmatchlen",(unsigned long)stringmatchlen},
-{"subscribeCommand",(unsigned long)subscribeCommand},
-{"substrCommand",(unsigned long)substrCommand},
-{"sunionCommand",(unsigned long)sunionCommand},
-{"sunionDiffGenericCommand",(unsigned long)sunionDiffGenericCommand},
-{"sunionstoreCommand",(unsigned long)sunionstoreCommand},
-{"syncCommand",(unsigned long)syncCommand},
-{"syncRead",(unsigned long)syncRead},
-{"syncReadLine",(unsigned long)syncReadLine},
-{"syncWithMaster",(unsigned long)syncWithMaster},
-{"syncWrite",(unsigned long)syncWrite},
-{"touchWatchedKey",(unsigned long)touchWatchedKey},
-{"touchWatchedKeysOnFlush",(unsigned long)touchWatchedKeysOnFlush},
-{"tryFreeOneObjectFromFreelist",(unsigned long)tryFreeOneObjectFromFreelist},
-{"tryObjectEncoding",(unsigned long)tryObjectEncoding},
-{"tryResizeHashTables",(unsigned long)tryResizeHashTables},
-{"ttlCommand",(unsigned long)ttlCommand},
-{"typeCommand",(unsigned long)typeCommand},
-{"unblockClientWaitingData",(unsigned long)unblockClientWaitingData},
-{"unlockThreadedIO",(unsigned long)unlockThreadedIO},
-{"unsubscribeCommand",(unsigned long)unsubscribeCommand},
-{"unwatchAllKeys",(unsigned long)unwatchAllKeys},
-{"unwatchCommand",(unsigned long)unwatchCommand},
-{"updateDictResizePolicy",(unsigned long)updateDictResizePolicy},
-{"updateSlavesWaitingBgsave",(unsigned long)updateSlavesWaitingBgsave},
-{"usage",(unsigned long)usage},
-{"version",(unsigned long)version},
-{"vmCanSwapOut",(unsigned long)vmCanSwapOut},
-{"vmCancelThreadedIOJob",(unsigned long)vmCancelThreadedIOJob},
-{"vmFindContiguousPages",(unsigned long)vmFindContiguousPages},
-{"vmFreePage",(unsigned long)vmFreePage},
-{"vmGenericLoadObject",(unsigned long)vmGenericLoadObject},
-{"vmInit",(unsigned long)vmInit},
-{"vmLoadObject",(unsigned long)vmLoadObject},
-{"vmMarkPageFree",(unsigned long)vmMarkPageFree},
-{"vmMarkPageUsed",(unsigned long)vmMarkPageUsed},
-{"vmMarkPagesFree",(unsigned long)vmMarkPagesFree},
-{"vmMarkPagesUsed",(unsigned long)vmMarkPagesUsed},
-{"vmPreviewObject",(unsigned long)vmPreviewObject},
-{"vmReadObjectFromSwap",(unsigned long)vmReadObjectFromSwap},
-{"vmReopenSwapFile",(unsigned long)vmReopenSwapFile},
-{"vmSwapObjectBlocking",(unsigned long)vmSwapObjectBlocking},
-{"vmSwapObjectThreaded",(unsigned long)vmSwapObjectThreaded},
-{"vmSwapOneObject",(unsigned long)vmSwapOneObject},
-{"vmSwapOneObjectBlocking",(unsigned long)vmSwapOneObjectBlocking},
-{"vmSwapOneObjectThreaded",(unsigned long)vmSwapOneObjectThreaded},
-{"vmThreadedIOCompletedJob",(unsigned long)vmThreadedIOCompletedJob},
-{"vmWriteObjectOnSwap",(unsigned long)vmWriteObjectOnSwap},
-{"waitEmptyIOJobsQueue",(unsigned long)waitEmptyIOJobsQueue},
-{"waitForMultipleSwappedKeys",(unsigned long)waitForMultipleSwappedKeys},
-{"waitForSwappedKey",(unsigned long)waitForSwappedKey},
-{"watchCommand",(unsigned long)watchCommand},
-{"watchForKey",(unsigned long)watchForKey},
-{"xorDigest",(unsigned long)xorDigest},
-{"xorObjectDigest",(unsigned long)xorObjectDigest},
-{"yesnotoi",(unsigned long)yesnotoi},
-{"zaddCommand",(unsigned long)zaddCommand},
-{"zaddGenericCommand",(unsigned long)zaddGenericCommand},
-{"zcardCommand",(unsigned long)zcardCommand},
-{"zcountCommand",(unsigned long)zcountCommand},
-{"zincrbyCommand",(unsigned long)zincrbyCommand},
-{"zinterstoreCommand",(unsigned long)zinterstoreCommand},
-{"zrangeCommand",(unsigned long)zrangeCommand},
-{"zrangeGenericCommand",(unsigned long)zrangeGenericCommand},
-{"zrangebyscoreCommand",(unsigned long)zrangebyscoreCommand},
-{"zrankCommand",(unsigned long)zrankCommand},
-{"zrankGenericCommand",(unsigned long)zrankGenericCommand},
-{"zremCommand",(unsigned long)zremCommand},
-{"zremrangebyrankCommand",(unsigned long)zremrangebyrankCommand},
-{"zremrangebyscoreCommand",(unsigned long)zremrangebyscoreCommand},
-{"zrevrangeCommand",(unsigned long)zrevrangeCommand},
-{"zrevrankCommand",(unsigned long)zrevrankCommand},
-{"zscoreCommand",(unsigned long)zscoreCommand},
-{"zslCreate",(unsigned long)zslCreate},
-{"zslCreateNode",(unsigned long)zslCreateNode},
-{"zslDelete",(unsigned long)zslDelete},
-{"zslFirstWithScore",(unsigned long)zslFirstWithScore},
-{"zslFree",(unsigned long)zslFree},
-{"zslFreeNode",(unsigned long)zslFreeNode},
-{"zslInsert",(unsigned long)zslInsert},
-{"zslRandomLevel",(unsigned long)zslRandomLevel},
-{"zunionInterBlockClientOnSwappedKeys",(unsigned long)zunionInterBlockClientOnSwappedKeys},
-{"zunionInterGenericCommand",(unsigned long)zunionInterGenericCommand},
-{"zunionstoreCommand",(unsigned long)zunionstoreCommand},
-{NULL,0}
-};
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index abcebe130..4cbe6eaae 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -59,13 +59,13 @@ tags {"aof"} {
## Test that redis-check-aof indeed sees this AOF is not valid
test {Short read: Utility should confirm the AOF is not valid} {
catch {
- exec ./redis-check-aof $aof_path
+ exec src/redis-check-aof $aof_path
} str
set _ $str
} {*not valid*}
test {Short read: Utility should be able to fix the AOF} {
- exec echo y | ./redis-check-aof --fix $aof_path
+ exec echo y | src/redis-check-aof --fix $aof_path
} {*Successfully truncated AOF*}
## Test that the server can be started using the truncated AOF
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index eb5fe6214..8e226a7dd 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -169,10 +169,10 @@ proc start_server {options {code undefined}} {
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
if {$::valgrind} {
- exec valgrind ./redis-server $config_file > $stdout 2> $stderr &
+ exec valgrind src/redis-server $config_file > $stdout 2> $stderr &
after 2000
} else {
- exec ./redis-server $config_file > $stdout 2> $stderr &
+ exec src/redis-server $config_file > $stdout 2> $stderr &
after 500
}