summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2018-02-19 11:12:49 +0100
committerantirez <antirez@gmail.com>2018-02-19 11:12:49 +0100
commitffde73c57d0aa5cd58869c13cb1b76d6c27ef5b7 (patch)
tree4f0f3075ebabae20011f08b170df6951b4ac38f0
parentaa57481d8ce69938b20a2a7ea8ba06daadbc2dcd (diff)
downloadredis-ffde73c57d0aa5cd58869c13cb1b76d6c27ef5b7.tar.gz
Track number of logically expired keys still in memory.
This commit adds two new fields in the INFO output, stats section: expired_stale_perc:0.34 expired_time_cap_reached_count:58 The first field is an estimate of the number of keys that are yet in memory but are already logically expired. They reason why those keys are yet not reclaimed is because the active expire cycle can't spend more time on the process of reclaiming the keys, and at the same time nobody is accessing such keys. However as the active expire cycle runs, while it will eventually have to return to the caller, because of time limit or because there are less than 25% of keys logically expired in each given database, it collects the stats in order to populate this INFO field. Note that expired_stale_perc is a running average, where the current sample accounts for 5% and the history for 95%, so you'll see it changing smoothly over time. The other field, expired_time_cap_reached_count, counts the number of times the expire cycle had to stop, even if still it was finding a sizeable number of keys yet to expire, because of the time limit. This allows people handling operations to understand if the Redis server, during mass-expiration events, is able to collect keys fast enough usually. It is normal for this field to increment during mass expires, but normally it should very rarely increment. When instead it constantly increments, it means that the current workloads is using a very important percentage of CPU time to expire keys. This feature was created thanks to the hints of Rashmi Ramesh and Bart Robinson from Twitter. In private email exchanges, they noted how it was important to improve the observability of this parameter in the Redis server. Actually in big deployments, the amount of keys that are yet to expire in each server, even if they are logically expired, may account for a very big amount of wasted memory.
-rw-r--r--src/expire.c21
-rw-r--r--src/server.c6
-rw-r--r--src/server.h2
3 files changed, 28 insertions, 1 deletions
diff --git a/src/expire.c b/src/expire.c
index 81c9e23f5..ce7882e4c 100644
--- a/src/expire.c
+++ b/src/expire.c
@@ -111,7 +111,7 @@ void activeExpireCycle(int type) {
if (clientsArePaused()) return;
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
- /* Don't start a fast cycle if the previous cycle did not exited
+ /* Don't start a fast cycle if the previous cycle did not exit
* for time limt. Also don't repeat a fast cycle for the same period
* as the fast cycle total duration itself. */
if (!timelimit_exit) return;
@@ -140,6 +140,12 @@ void activeExpireCycle(int type) {
if (type == ACTIVE_EXPIRE_CYCLE_FAST)
timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
+ /* Accumulate some global stats as we expire keys, to have some idea
+ * about the number of keys that are already logically expired, but still
+ * existing inside the database. */
+ long total_sampled = 0;
+ long total_expired = 0;
+
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
int expired;
redisDb *db = server.db+(current_db % server.dbnum);
@@ -192,7 +198,9 @@ void activeExpireCycle(int type) {
ttl_sum += ttl;
ttl_samples++;
}
+ total_sampled++;
}
+ total_expired += expired;
/* Update the average TTL stats for this database. */
if (ttl_samples) {
@@ -212,6 +220,7 @@ void activeExpireCycle(int type) {
elapsed = ustime()-start;
if (elapsed > timelimit) {
timelimit_exit = 1;
+ server.stat_expired_time_cap_reached_count++;
break;
}
}
@@ -222,6 +231,16 @@ void activeExpireCycle(int type) {
elapsed = ustime()-start;
latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
+
+ /* Update our estimate of keys existing but yet to be expired.
+ * Running average with this sample accounting for 5%. */
+ double current_perc;
+ if (total_sampled) {
+ current_perc = (double)total_expired/total_sampled;
+ } else
+ current_perc = 0;
+ server.stat_expired_stale_perc = (current_perc*0.05)+
+ (server.stat_expired_stale_perc*0.95);
}
/*-----------------------------------------------------------------------------
diff --git a/src/server.c b/src/server.c
index c14255db1..1a6f30381 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1799,6 +1799,8 @@ void resetServerStats(void) {
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
+ server.stat_expired_stale_perc = 0;
+ server.stat_expired_time_cap_reached_count = 0;
server.stat_evictedkeys = 0;
server.stat_keyspace_misses = 0;
server.stat_keyspace_hits = 0;
@@ -3132,6 +3134,8 @@ sds genRedisInfoString(char *section) {
"sync_partial_ok:%lld\r\n"
"sync_partial_err:%lld\r\n"
"expired_keys:%lld\r\n"
+ "expired_stale_perc:%.2f\r\n"
+ "expired_time_cap_reached_count:%lld\r\n"
"evicted_keys:%lld\r\n"
"keyspace_hits:%lld\r\n"
"keyspace_misses:%lld\r\n"
@@ -3156,6 +3160,8 @@ sds genRedisInfoString(char *section) {
server.stat_sync_partial_ok,
server.stat_sync_partial_err,
server.stat_expiredkeys,
+ server.stat_expired_stale_perc*100,
+ server.stat_expired_time_cap_reached_count,
server.stat_evictedkeys,
server.stat_keyspace_hits,
server.stat_keyspace_misses,
diff --git a/src/server.h b/src/server.h
index 5b6074a8c..29919f5ee 100644
--- a/src/server.h
+++ b/src/server.h
@@ -950,6 +950,8 @@ struct redisServer {
long long stat_numcommands; /* Number of processed commands */
long long stat_numconnections; /* Number of connections received */
long long stat_expiredkeys; /* Number of expired keys */
+ double stat_expired_stale_perc; /* Percentage of keys probably expired */
+ long long stat_expired_time_cap_reached_count; /* Early expire cylce stops.*/
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
long long stat_keyspace_hits; /* Number of successful lookups of keys */
long long stat_keyspace_misses; /* Number of failed lookups of keys */