summaryrefslogtreecommitdiff
path: root/src/zmalloc.c
diff options
context:
space:
mode:
authoryoav-steinberg <yoav@redislabs.com>2022-05-22 17:10:31 +0300
committerGitHub <noreply@github.com>2022-05-22 17:10:31 +0300
commit843a4cdc075a5b251e1b154f8013a9e0abe1038b (patch)
tree92f3ddd15a21e9e04ea77799ee5964899cc07809 /src/zmalloc.c
parentb0e18f804d016dd6664c71920d6c290c0af08909 (diff)
downloadredis-843a4cdc075a5b251e1b154f8013a9e0abe1038b.tar.gz
Add warning for suspected slow system clocksource setting (#10636)
This PR does 2 main things: 1) Add warning for suspected slow system clocksource setting. This is Linux specific. 2) Add a `--check-system` argument to redis which runs all system checks and prints a report. ## System checks Add a command line option `--check-system` which runs all known system checks and provides a report to stdout of which systems checks have failed with details on how to reconfigure the system for optimized redis performance. The `--system-check` mode exists with an appropriate error code after running all the checks. ## Slow clocksource details We check the system's clocksource performance by running `clock_gettime()` in a loop and then checking how much time was spent in a system call (via `getrusage()`). If we spend more than 10% of the time in the kernel then we print a warning. I verified that using the slow clock sources: `acpi_pm` (~90% in the kernel on my laptop) and `xen` (~30% in the kernel on an ec2 `m4.large`) we get this warning. The check runs 5 system ticks so we can detect time spent in kernel at 20% jumps (0%,20%,40%...). Anything more accurate will require the test to run longer. Typically 5 ticks are 50ms. This means running the test on startup will delay startup by 50ms. To avoid this we make sure the test is only executed in the `--check-system` mode. For a quick startup check, we specifically warn if the we see the system is using the `xen` clocksource which we know has bad performance and isn't recommended (at least on ec2). In such a case the user should manually rung redis with `--check-system` to force the slower clocksource test described above. ## Other changes in the PR * All the system checks are now implemented as functions in _syscheck.c_. They are implemented using a standard interface (see details in _syscheck.c_). To do this I moved the checking functions `linuxOvercommitMemoryValue()`, `THPIsEnabled()`, `linuxMadvFreeForkBugCheck()` out of _server.c_ and _latency.c_ and into the new _syscheck.c_. When moving these functions I made sure they don't depend on other functionality provided in _server.c_ and made them use a standard "check functions" interface. Specifically: * I removed all logging out of `linuxMadvFreeForkBugCheck()`. In case there's some unexpected error during the check aborts as before, but without any logging. It returns an error code 0 meaning the check didn't not complete. * All these functions now return 1 on success, -1 on failure, 0 in case the check itself cannot be completed. * The `linuxMadvFreeForkBugCheck()` function now internally calls `exit()` and not `exitFromChild()` because the latter is only available in _server.c_ and I wanted to remove that dependency. This isn't an because we don't need to worry about the child process created by the test doing anything related to the rdb/aof files which is why `exitFromChild()` was created. * This also fixes parsing of other /proc/\<pid\>/stat fields to correctly handle spaces in the process name and be more robust in general. Not that before this fix the rss info in `INFO memory` was corrupt in case of spaces in the process name. To recreate just rename `redis-server` to `redis server`, start it, and run `INFO memory`.
Diffstat (limited to 'src/zmalloc.c')
-rw-r--r--src/zmalloc.c58
1 files changed, 41 insertions, 17 deletions
diff --git a/src/zmalloc.c b/src/zmalloc.c
index d19d87b7d..857baa8ac 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -55,6 +55,8 @@ void zlibc_free(void *ptr) {
#include "zmalloc.h"
#include "atomicvar.h"
+#define UNUSED(x) ((void)(x))
+
#ifdef HAVE_MALLOC_SIZE
#define PREFIX_SIZE (0)
#define ASSERT_NO_SIZE_OVERFLOW(sz)
@@ -395,35 +397,58 @@ void zmadvise_dontneed(void *ptr) {
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#endif
-size_t zmalloc_get_rss(void) {
- int page = sysconf(_SC_PAGESIZE);
- size_t rss;
+/* Get the i'th field from "/proc/self/stats" note i is 1 based as appears in the 'proc' man page */
+int get_proc_stat_ll(int i, long long *res) {
+#if defined(HAVE_PROC_STAT)
char buf[4096];
- char filename[256];
- int fd, count;
+ int fd, l;
char *p, *x;
- snprintf(filename,256,"/proc/%ld/stat",(long) getpid());
- if ((fd = open(filename,O_RDONLY)) == -1) return 0;
- if (read(fd,buf,4096) <= 0) {
+ if ((fd = open("/proc/self/stat",O_RDONLY)) == -1) return 0;
+ if ((l = read(fd,buf,sizeof(buf)-1)) <= 0) {
close(fd);
return 0;
}
close(fd);
+ buf[l] = '\0';
+ if (buf[l-1] == '\n') buf[l-1] = '\0';
- p = buf;
- count = 23; /* RSS is the 24th field in /proc/<pid>/stat */
- while(p && count--) {
- p = strchr(p,' ');
+ /* Skip pid and process name (surrounded with parentheses) */
+ p = strrchr(buf, ')');
+ if (!p) return 0;
+ p++;
+ while (*p == ' ') p++;
+ if (*p == '\0') return 0;
+ i -= 3;
+ if (i < 0) return 0;
+
+ while (p && i--) {
+ p = strchr(p, ' ');
if (p) p++;
+ else return 0;
}
- if (!p) return 0;
x = strchr(p,' ');
- if (!x) return 0;
- *x = '\0';
+ if (x) *x = '\0';
- rss = strtoll(p,NULL,10);
+ *res = strtoll(p,&x,10);
+ if (*x != '\0') return 0;
+ return 1;
+#else
+ UNUSED(i);
+ UNUSED(res);
+ return 0;
+#endif
+}
+
+#if defined(HAVE_PROC_STAT)
+size_t zmalloc_get_rss(void) {
+ int page = sysconf(_SC_PAGESIZE);
+ long long rss;
+
+ /* RSS is the 24th field in /proc/<pid>/stat */
+ if (!get_proc_stat_ll(24, &rss)) return 0;
rss *= page;
return rss;
}
@@ -748,7 +773,6 @@ size_t zmalloc_get_memory_size(void) {
}
#ifdef REDIS_TEST
-#define UNUSED(x) ((void)(x))
int zmalloc_test(int argc, char **argv, int flags) {
void *ptr;