diff options
author | yoav-steinberg <yoav@redislabs.com> | 2022-05-22 17:10:31 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-22 17:10:31 +0300 |
commit | 843a4cdc075a5b251e1b154f8013a9e0abe1038b (patch) | |
tree | 92f3ddd15a21e9e04ea77799ee5964899cc07809 /src/zmalloc.c | |
parent | b0e18f804d016dd6664c71920d6c290c0af08909 (diff) | |
download | redis-843a4cdc075a5b251e1b154f8013a9e0abe1038b.tar.gz |
Add warning for suspected slow system clocksource setting (#10636)
This PR does 2 main things:
1) Add warning for suspected slow system clocksource setting. This is Linux specific.
2) Add a `--check-system` argument to redis which runs all system checks and prints a report.
## System checks
Add a command line option `--check-system` which runs all known system checks and provides
a report to stdout of which systems checks have failed with details on how to reconfigure the
system for optimized redis performance.
The `--system-check` mode exists with an appropriate error code after running all the checks.
## Slow clocksource details
We check the system's clocksource performance by running `clock_gettime()` in a loop and then
checking how much time was spent in a system call (via `getrusage()`). If we spend more than
10% of the time in the kernel then we print a warning. I verified that using the slow clock sources:
`acpi_pm` (~90% in the kernel on my laptop) and `xen` (~30% in the kernel on an ec2 `m4.large`)
we get this warning.
The check runs 5 system ticks so we can detect time spent in kernel at 20% jumps (0%,20%,40%...).
Anything more accurate will require the test to run longer. Typically 5 ticks are 50ms. This means
running the test on startup will delay startup by 50ms. To avoid this we make sure the test is only
executed in the `--check-system` mode.
For a quick startup check, we specifically warn if the we see the system is using the `xen` clocksource
which we know has bad performance and isn't recommended (at least on ec2). In such a case the
user should manually rung redis with `--check-system` to force the slower clocksource test described
above.
## Other changes in the PR
* All the system checks are now implemented as functions in _syscheck.c_.
They are implemented using a standard interface (see details in _syscheck.c_).
To do this I moved the checking functions `linuxOvercommitMemoryValue()`,
`THPIsEnabled()`, `linuxMadvFreeForkBugCheck()` out of _server.c_ and _latency.c_
and into the new _syscheck.c_. When moving these functions I made sure they don't
depend on other functionality provided in _server.c_ and made them use a standard
"check functions" interface. Specifically:
* I removed all logging out of `linuxMadvFreeForkBugCheck()`. In case there's some
unexpected error during the check aborts as before, but without any logging.
It returns an error code 0 meaning the check didn't not complete.
* All these functions now return 1 on success, -1 on failure, 0 in case the check itself
cannot be completed.
* The `linuxMadvFreeForkBugCheck()` function now internally calls `exit()` and not
`exitFromChild()` because the latter is only available in _server.c_ and I wanted to
remove that dependency. This isn't an because we don't need to worry about the
child process created by the test doing anything related to the rdb/aof files which
is why `exitFromChild()` was created.
* This also fixes parsing of other /proc/\<pid\>/stat fields to correctly handle spaces
in the process name and be more robust in general. Not that before this fix the rss
info in `INFO memory` was corrupt in case of spaces in the process name. To
recreate just rename `redis-server` to `redis server`, start it, and run `INFO memory`.
Diffstat (limited to 'src/zmalloc.c')
-rw-r--r-- | src/zmalloc.c | 58 |
1 files changed, 41 insertions, 17 deletions
diff --git a/src/zmalloc.c b/src/zmalloc.c index d19d87b7d..857baa8ac 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -55,6 +55,8 @@ void zlibc_free(void *ptr) { #include "zmalloc.h" #include "atomicvar.h" +#define UNUSED(x) ((void)(x)) + #ifdef HAVE_MALLOC_SIZE #define PREFIX_SIZE (0) #define ASSERT_NO_SIZE_OVERFLOW(sz) @@ -395,35 +397,58 @@ void zmadvise_dontneed(void *ptr) { #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#endif -size_t zmalloc_get_rss(void) { - int page = sysconf(_SC_PAGESIZE); - size_t rss; +/* Get the i'th field from "/proc/self/stats" note i is 1 based as appears in the 'proc' man page */ +int get_proc_stat_ll(int i, long long *res) { +#if defined(HAVE_PROC_STAT) char buf[4096]; - char filename[256]; - int fd, count; + int fd, l; char *p, *x; - snprintf(filename,256,"/proc/%ld/stat",(long) getpid()); - if ((fd = open(filename,O_RDONLY)) == -1) return 0; - if (read(fd,buf,4096) <= 0) { + if ((fd = open("/proc/self/stat",O_RDONLY)) == -1) return 0; + if ((l = read(fd,buf,sizeof(buf)-1)) <= 0) { close(fd); return 0; } close(fd); + buf[l] = '\0'; + if (buf[l-1] == '\n') buf[l-1] = '\0'; - p = buf; - count = 23; /* RSS is the 24th field in /proc/<pid>/stat */ - while(p && count--) { - p = strchr(p,' '); + /* Skip pid and process name (surrounded with parentheses) */ + p = strrchr(buf, ')'); + if (!p) return 0; + p++; + while (*p == ' ') p++; + if (*p == '\0') return 0; + i -= 3; + if (i < 0) return 0; + + while (p && i--) { + p = strchr(p, ' '); if (p) p++; + else return 0; } - if (!p) return 0; x = strchr(p,' '); - if (!x) return 0; - *x = '\0'; + if (x) *x = '\0'; - rss = strtoll(p,NULL,10); + *res = strtoll(p,&x,10); + if (*x != '\0') return 0; + return 1; +#else + UNUSED(i); + UNUSED(res); + return 0; +#endif +} + +#if defined(HAVE_PROC_STAT) +size_t zmalloc_get_rss(void) { + int page = sysconf(_SC_PAGESIZE); + long long rss; + + /* RSS is the 24th field in /proc/<pid>/stat */ + if (!get_proc_stat_ll(24, &rss)) return 0; rss *= page; return rss; } @@ -748,7 +773,6 @@ size_t zmalloc_get_memory_size(void) { } #ifdef REDIS_TEST -#define UNUSED(x) ((void)(x)) int zmalloc_test(int argc, char **argv, int flags) { void *ptr; |