diff options
author | Klearchos Chaloulos <klearchos.chaloulos@nokia.com> | 2016-04-05 13:47:04 +0300 |
---|---|---|
committer | Klearchos Chaloulos <klearchos.chaloulos@nokia.com> | 2016-04-05 17:37:00 +0300 |
commit | d79ca7a622abbb0df6f5166cc0e4669373d9a614 (patch) | |
tree | f49167fcfbbdb02c267b0026e1c1022fa4fab7ae /src | |
parent | 050d7e19983e6123cba650907d7d33acf2640956 (diff) | |
download | systemd-d79ca7a622abbb0df6f5166cc0e4669373d9a614.tar.gz |
journal-upload: Update watchdog while in curl_easy_perform
It is observed that a combination of high log throughput, low I/O speed on journal remote side and many nodes uploading simultaneously caused the journal-upload process to dump core because of watchdog starvation. This is caused because journal-upload stays in curl_easy_perform(), because it cannot upload fast enough to reach the end of the journal. Currently journal-upload will return from curl_easy_perform() only when the end of the journal is reached. Therefore a check is added in journal_input_callback(), which will update the watchdog if the elapsed time since the start of the uploading process is greater than WATCHDOG_USEC/2.
Diffstat (limited to 'src')
-rw-r--r-- | src/journal-remote/journal-upload-journal.c | 25 | ||||
-rw-r--r-- | src/journal-remote/journal-upload.c | 1 | ||||
-rw-r--r-- | src/journal-remote/journal-upload.h | 1 |
3 files changed, 27 insertions, 0 deletions
diff --git a/src/journal-remote/journal-upload-journal.c b/src/journal-remote/journal-upload-journal.c index e61b6bc68f..ac6eb58a9f 100644 --- a/src/journal-remote/journal-upload-journal.c +++ b/src/journal-remote/journal-upload-journal.c @@ -25,6 +25,7 @@ #include "log.h" #include "utf8.h" #include "util.h" +#include "sd-daemon.h" /** * Write up to size bytes to buf. Return negative on error, and number of @@ -242,6 +243,28 @@ static ssize_t write_entry(char *buf, size_t size, Uploader *u) { assert_not_reached("WTF?"); } +static inline void check_update_watchdog(Uploader *u) { + usec_t watchdog_usec; + static usec_t before; + usec_t after; + usec_t elapsed_time; + + if (sd_watchdog_enabled(false, &watchdog_usec) < 0) + return; + if (u->reset_reference_timestamp) { + before = now(CLOCK_MONOTONIC); + u->reset_reference_timestamp = false; + } else { + after = now(CLOCK_MONOTONIC); + elapsed_time = usec_sub(after, before); + if (elapsed_time > watchdog_usec / 2) { + log_debug("Update watchdog timer"); + sd_notify(false, "WATCHDOG=1"); + u->reset_reference_timestamp = true; + } + } +} + static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void *userp) { Uploader *u = userp; int r; @@ -252,6 +275,8 @@ static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void assert(u); assert(nmemb <= SSIZE_MAX / size); + check_update_watchdog(u); + j = u->journal; while (j && filled < size * nmemb) { diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c index 6e1c3bb9ef..f2e9117f9f 100644 --- a/src/journal-remote/journal-upload.c +++ b/src/journal-remote/journal-upload.c @@ -494,6 +494,7 @@ static int perform_upload(Uploader *u) { assert(u); + u->reset_reference_timestamp = true; code = curl_easy_perform(u->easy); if (code) { if (u->error[0]) diff --git a/src/journal-remote/journal-upload.h b/src/journal-remote/journal-upload.h index b8cd04d527..a31735bd08 100644 --- a/src/journal-remote/journal-upload.h +++ b/src/journal-remote/journal-upload.h @@ -48,6 +48,7 @@ typedef struct Uploader { size_t entries_sent; char *last_cursor, *current_cursor; + bool reset_reference_timestamp; } Uploader; #define JOURNAL_UPLOAD_POLL_TIMEOUT (10 * USEC_PER_SEC) |