From f0c5052aa8de084f54b3eda7848f17b122907509 Mon Sep 17 00:00:00 2001
From: filipe oliveira <filipecosta.90@gmail.com>
Date: Fri, 29 Jan 2021 13:38:30 +0000
Subject: Enabled background and reply time tracking on blocked on keys/blocked
 on background work clients (#7491)

This commit enables tracking time of the background tasks and on replies,
opening the door for properly tracking commands that rely on blocking / background
 work via the slowlog, latency history, and commandstats.

Some notes:
- The time spent blocked waiting for key changes, or blocked on synchronous
  replication is not accounted for.

- **This commit does not affect latency tracking of commands that are non-blocking
  or do not have background work.** ( meaning that it all stays the same with exception to
  `BZPOPMIN`,`BZPOPMAX`,`BRPOP`,`BLPOP`, etc... and module's commands that rely
  on background threads ).

-  Specifically for latency history command we've added a new event class named
  `command-unblocking` that will enable latency monitoring on commands that spawn
  background threads to do the work.

- For blocking commands we're now considering the total time of a command as the
  time spent on call() + the time spent on replying when unblocked.

- For Modules commands that rely on background threads we're now considering the
  total time of a command as the time spent on call (main thread) + the time spent on
  the background thread ( if marked within `RedisModule_MeasureTimeStart()` and
  `RedisModule_MeasureTimeEnd()` ) + the time spent on replying (main thread)

To test for this feature we've added a `unit/moduleapi/blockonbackground` test that relies on
a module that blocks the client and sleeps on the background for a given time.
- check blocked command that uses RedisModule_MeasureTimeStart() is tracking background time
- check blocked command that uses RedisModule_MeasureTimeStart() is tracking background time even in timeout
- check blocked command with multiple calls RedisModule_MeasureTimeStart()  is tracking the total background time
- check blocked command without calling RedisModule_MeasureTimeStart() is not reporting background time
---
 src/blocked.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'src/blocked.c')

diff --git a/src/blocked.c b/src/blocked.c
index 1d69ff902..09e17213c 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -61,6 +61,9 @@
  */
 
 #include "server.h"
+#include "slowlog.h"
+#include "latency.h"
+#include "monotonic.h"
 
 int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int wherefrom, int whereto);
 int getListPositionFromObjectOrReply(client *c, robj *arg, int *position);
@@ -97,6 +100,20 @@ void blockClient(client *c, int btype) {
     }
 }
 
+/* This function is called after a client has finished a blocking operation
+ * in order to update the total command duration, log the command into
+ * the Slow log if needed, and log the reply duration event if needed. */
+void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){
+    const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
+    c->lastcmd->microseconds += total_cmd_duration;
+    /* Log the command into the Slow log if needed. */
+    if (!(c->lastcmd->flags & CMD_SKIP_SLOWLOG)) {
+        slowlogPushEntryIfNeeded(c,c->argv,c->argc,total_cmd_duration);
+        /* Log the reply duration event. */
+        latencyAddSampleIfNeeded("command-unblocking",reply_us/1000);
+    }
+}
+
 /* This function is called in the beforeSleep() function of the event loop
  * in order to process the pending input buffer of clients that were
  * unblocked after a blocking operation. */
@@ -264,6 +281,8 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
                 if (dstkey) incrRefCount(dstkey);
                 unblockClient(receiver);
 
+                monotime replyTimer;
+                elapsedStart(&replyTimer);
                 if (serveClientBlockedOnList(receiver,
                     rl->key,dstkey,rl->db,value,
                     wherefrom, whereto) == C_ERR)
@@ -272,6 +291,7 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
                      * to also undo the POP operation. */
                     listTypePush(o,value,wherefrom);
                 }
+                updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
 
                 if (dstkey) decrRefCount(dstkey);
                 decrRefCount(value);
@@ -316,7 +336,10 @@ void serveClientsBlockedOnSortedSetKey(robj *o, readyList *rl) {
                          receiver->lastcmd->proc == bzpopminCommand)
                          ? ZSET_MIN : ZSET_MAX;
             unblockClient(receiver);
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
             genericZpopCommand(receiver,&rl->key,1,where,1,NULL);
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
             zcard--;
 
             /* Replicate the command. */
@@ -406,6 +429,8 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
                     }
                 }
 
+                monotime replyTimer;
+                elapsedStart(&replyTimer);
                 /* Emit the two elements sub-array consisting of
                  * the name of the stream and the data we
                  * extracted from it. Wrapped in a single-item
@@ -425,6 +450,7 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
                 streamReplyWithRange(receiver,s,&start,NULL,
                                      receiver->bpop.xread_count,
                                      0, group, consumer, noack, &pi);
+                updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
 
                 /* Note that after we unblock the client, 'gt'
                  * and other receiver->bpop stuff are no longer
@@ -471,7 +497,10 @@ void serveClientsBlockedOnKeyByModule(readyList *rl) {
              * different modules with different triggers to consider if a key
              * is ready or not. This means we can't exit the loop but need
              * to continue after the first failure. */
+            monotime replyTimer;
+            elapsedStart(&replyTimer);
             if (!moduleTryServeClientBlockedOnKey(receiver, rl->key)) continue;
+            updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
 
             moduleUnblockClient(receiver);
         }
-- 
cgit v1.2.1