mirror of
https://github.com/systemd/systemd.git
synced 2024-12-26 03:22:00 +03:00
journal-upload: Update watchdog while in curl_easy_perform
It is observed that a combination of high log throughput, low I/O speed on journal remote side and many nodes uploading simultaneously caused the journal-upload process to dump core because of watchdog starvation. This is caused because journal-upload stays in curl_easy_perform(), because it cannot upload fast enough to reach the end of the journal. Currently journal-upload will return from curl_easy_perform() only when the end of the journal is reached. Therefore a check is added in journal_input_callback(), which will update the watchdog if the elapsed time since the start of the uploading process is greater than WATCHDOG_USEC/2.
This commit is contained in:
parent
050d7e1998
commit
d79ca7a622
@ -25,6 +25,7 @@
|
||||
#include "log.h"
|
||||
#include "utf8.h"
|
||||
#include "util.h"
|
||||
#include "sd-daemon.h"
|
||||
|
||||
/**
|
||||
* Write up to size bytes to buf. Return negative on error, and number of
|
||||
@ -242,6 +243,28 @@ static ssize_t write_entry(char *buf, size_t size, Uploader *u) {
|
||||
assert_not_reached("WTF?");
|
||||
}
|
||||
|
||||
static inline void check_update_watchdog(Uploader *u) {
|
||||
usec_t watchdog_usec;
|
||||
static usec_t before;
|
||||
usec_t after;
|
||||
usec_t elapsed_time;
|
||||
|
||||
if (sd_watchdog_enabled(false, &watchdog_usec) < 0)
|
||||
return;
|
||||
if (u->reset_reference_timestamp) {
|
||||
before = now(CLOCK_MONOTONIC);
|
||||
u->reset_reference_timestamp = false;
|
||||
} else {
|
||||
after = now(CLOCK_MONOTONIC);
|
||||
elapsed_time = usec_sub(after, before);
|
||||
if (elapsed_time > watchdog_usec / 2) {
|
||||
log_debug("Update watchdog timer");
|
||||
sd_notify(false, "WATCHDOG=1");
|
||||
u->reset_reference_timestamp = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void *userp) {
|
||||
Uploader *u = userp;
|
||||
int r;
|
||||
@ -252,6 +275,8 @@ static size_t journal_input_callback(void *buf, size_t size, size_t nmemb, void
|
||||
assert(u);
|
||||
assert(nmemb <= SSIZE_MAX / size);
|
||||
|
||||
check_update_watchdog(u);
|
||||
|
||||
j = u->journal;
|
||||
|
||||
while (j && filled < size * nmemb) {
|
||||
|
@ -494,6 +494,7 @@ static int perform_upload(Uploader *u) {
|
||||
|
||||
assert(u);
|
||||
|
||||
u->reset_reference_timestamp = true;
|
||||
code = curl_easy_perform(u->easy);
|
||||
if (code) {
|
||||
if (u->error[0])
|
||||
|
@ -48,6 +48,7 @@ typedef struct Uploader {
|
||||
|
||||
size_t entries_sent;
|
||||
char *last_cursor, *current_cursor;
|
||||
bool reset_reference_timestamp;
|
||||
} Uploader;
|
||||
|
||||
#define JOURNAL_UPLOAD_POLL_TIMEOUT (10 * USEC_PER_SEC)
|
||||
|
Loading…
Reference in New Issue
Block a user