summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYossi Gottlieb <yossigo@gmail.com>2021-03-22 13:25:58 +0200
committerGitHub <noreply@github.com>2021-03-22 13:25:58 +0200
commitc3df27d1ea3eeff8ad4d0bec2bfc2646efb9843f (patch)
tree511ce3909a2b689e8bdaa1ad0563cec411893481
parent2f717c156a0bca757b8a8dfacf27e9cbeb60f99d (diff)
downloadredis-c3df27d1ea3eeff8ad4d0bec2bfc2646efb9843f.tar.gz
Fix slowdown due to child reporting CoW. (#8645)
Reading CoW from /proc/<pid>/smaps can be slow with large processes on some platforms. This measures the time it takes to read CoW info and limits the duty cycle of future updates to roughly 1/100. As current_cow_size no longer represnets a current, fixed interval value there is also a new current_cow_size_age field that provides information about the age of the size value, in seconds.
-rw-r--r--src/childinfo.c49
-rw-r--r--src/server.c5
-rw-r--r--src/server.h6
-rw-r--r--src/zmalloc.c5
4 files changed, 53 insertions, 12 deletions
diff --git a/src/childinfo.c b/src/childinfo.c
index e3f33a96c..4f0a42001 100644
--- a/src/childinfo.c
+++ b/src/childinfo.c
@@ -33,6 +33,7 @@
typedef struct {
size_t keys;
size_t cow;
+ monotime cow_updated;
double progress;
childInfoType information_type; /* Type of information */
} child_info_data;
@@ -69,18 +70,39 @@ void closeChildInfoPipe(void) {
void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname) {
if (server.child_info_pipe[1] == -1) return;
- child_info_data data = {0}; /* zero everything, including padding to sattisfy valgrind */
+ static monotime cow_updated = 0;
+ static uint64_t cow_update_cost = 0;
+ static size_t cow = 0;
+
+ child_info_data data = {0}; /* zero everything, including padding to satisfy valgrind */
+
+ /* When called to report current info, we need to throttle down CoW updates as they
+ * can be very expensive. To do that, we measure the time it takes to get a reading
+ * and schedule the next reading to happen not before time*CHILD_COW_COST_FACTOR
+ * passes. */
+
+ monotime now = getMonotonicUs();
+ if (info_type != CHILD_INFO_TYPE_CURRENT_INFO ||
+ !cow_updated ||
+ now - cow_updated > cow_update_cost * CHILD_COW_DUTY_CYCLE)
+ {
+ cow = zmalloc_get_private_dirty(-1);
+ cow_updated = getMonotonicUs();
+ cow_update_cost = cow_updated - now;
+
+ if (cow) {
+ serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
+ "%s: %zu MB of memory used by copy-on-write",
+ pname, data.cow / (1024 * 1024));
+ }
+ }
+
data.information_type = info_type;
data.keys = keys;
- data.cow = zmalloc_get_private_dirty(-1);
+ data.cow = cow;
+ data.cow_updated = cow_updated;
data.progress = progress;
- if (data.cow) {
- serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
- "%s: %zu MB of memory used by copy-on-write",
- pname, data.cow/(1024*1024));
- }
-
ssize_t wlen = sizeof(data);
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
@@ -89,9 +111,10 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
}
/* Update Child info. */
-void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, double progress) {
+void updateChildInfo(childInfoType information_type, size_t cow, monotime cow_updated, size_t keys, double progress) {
if (information_type == CHILD_INFO_TYPE_CURRENT_INFO) {
server.stat_current_cow_bytes = cow;
+ server.stat_current_cow_updated = cow_updated;
server.stat_current_save_keys_processed = keys;
if (progress != -1) server.stat_module_progress = progress;
} else if (information_type == CHILD_INFO_TYPE_AOF_COW_SIZE) {
@@ -107,7 +130,7 @@ void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, do
* if complete data read into the buffer,
* data is stored into *buffer, and returns 1.
* otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */
-int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, double* progress) {
+int readChildInfo(childInfoType *information_type, size_t *cow, monotime *cow_updated, size_t *keys, double* progress) {
/* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */
static child_info_data buffer;
ssize_t wlen = sizeof(buffer);
@@ -124,6 +147,7 @@ int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, do
if (server.child_info_nread == wlen) {
*information_type = buffer.information_type;
*cow = buffer.cow;
+ *cow_updated = buffer.cow_updated;
*keys = buffer.keys;
*progress = buffer.progress;
return 1;
@@ -137,12 +161,13 @@ void receiveChildInfo(void) {
if (server.child_info_pipe[0] == -1) return;
size_t cow;
+ monotime cow_updated;
size_t keys;
double progress;
childInfoType information_type;
/* Drain the pipe and update child info so that we get the final message. */
- while (readChildInfo(&information_type, &cow, &keys, &progress)) {
- updateChildInfo(information_type, cow, keys, progress);
+ while (readChildInfo(&information_type, &cow, &cow_updated, &keys, &progress)) {
+ updateChildInfo(information_type, cow, cow_updated, keys, progress);
}
}
diff --git a/src/server.c b/src/server.c
index 0ed58c07f..e19ddbed8 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1614,6 +1614,7 @@ void resetChildState() {
server.child_type = CHILD_TYPE_NONE;
server.child_pid = -1;
server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_module_progress = 0;
server.stat_current_save_keys_total = 0;
@@ -3267,6 +3268,7 @@ void initServer(void) {
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_current_save_keys_total = 0;
server.stat_rdb_cow_bytes = 0;
@@ -4835,6 +4837,7 @@ sds genRedisInfoString(const char *section) {
"# Persistence\r\n"
"loading:%d\r\n"
"current_cow_size:%zu\r\n"
+ "current_cow_size_age:%lu\r\n"
"current_fork_perc:%.2f\r\n"
"current_save_keys_processed:%zu\r\n"
"current_save_keys_total:%zu\r\n"
@@ -4857,6 +4860,7 @@ sds genRedisInfoString(const char *section) {
"module_fork_last_cow_size:%zu\r\n",
(int)server.loading,
server.stat_current_cow_bytes,
+ server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
fork_perc,
server.stat_current_save_keys_processed,
server.stat_current_save_keys_total,
@@ -5829,6 +5833,7 @@ int redisFork(int purpose) {
server.child_pid = childpid;
server.child_type = purpose;
server.stat_current_cow_bytes = 0;
+ server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_module_progress = 0;
server.stat_current_save_keys_total = dbTotalServerKeyCount();
diff --git a/src/server.h b/src/server.h
index 81395f4bb..f0fbefef0 100644
--- a/src/server.h
+++ b/src/server.h
@@ -130,6 +130,11 @@ typedef long long ustime_t; /* microsecond time type. */
* special code. */
#define SERVER_CHILD_NOERROR_RETVAL 255
+/* Reading copy-on-write info is sometimes expensive and may slow down child
+ * processes that report it continuously. We measure the cost of obtaining it
+ * and hold back additional reading based on this factor. */
+#define CHILD_COW_DUTY_CYCLE 100
+
/* Instantaneous metrics tracking. */
#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
@@ -1279,6 +1284,7 @@ struct redisServer {
redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
+ monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
size_t stat_current_save_keys_processed; /* Processed keys while child is active. */
size_t stat_current_save_keys_total; /* Number of keys when child started. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 77eeea174..3645efcf1 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -613,6 +613,11 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
}
#endif
+/* Return the total number bytes in pages marked as Private Dirty.
+ *
+ * Note: depending on the platform and memory footprint of the process, this
+ * call can be slow, exceeding 1000ms!
+ */
size_t zmalloc_get_private_dirty(long pid) {
return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
}