summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Kozina <okozina@redhat.com>2015-05-06 16:30:17 +0200
committerOndrej Kozina <okozina@redhat.com>2015-05-07 15:50:30 +0200
commitc6819e322b72ede4ac208d44817422e6fd8a24b4 (patch)
tree368254fc2708bf37869381e7420f230a10dcfc2b
parentc1d65cca9e4ffb096f0a8584375af0b9a3edc002 (diff)
downloadlvm2-c6819e322b72ede4ac208d44817422e6fd8a24b4.tar.gz
lvmpolld: make internal polling timeout more robust
lvmpolld reports lvm2 command run in its domain as unresponsive after (2 * 'interval') or at least MIN_POLLING_TIMEOUT (currently 60) seconds. Let's call such event 'timeout'. If 'timeout' happens MAX_TIMEOUT times (currently 3) in a row lvmpolld will consider lvpoll command unresponsive for too long and will respond with error to next progress_info request. lvm2 commands considered dead or unresponsive for too long are not signaled to stop or killed but lvmpolld is not interested in such commands anymore and stops to collect their statuses 'inerval' means intervall in seconds passed by lvmpolld client
-rw-r--r--daemons/lvmpolld/lvmpolld-core.c15
-rw-r--r--daemons/lvmpolld/lvmpolld-data-utils.c2
-rw-r--r--daemons/lvmpolld/lvmpolld-data-utils.h2
3 files changed, 11 insertions, 8 deletions
diff --git a/daemons/lvmpolld/lvmpolld-core.c b/daemons/lvmpolld/lvmpolld-core.c
index 693d80667..8844d0cd5 100644
--- a/daemons/lvmpolld/lvmpolld-core.c
+++ b/daemons/lvmpolld/lvmpolld-core.c
@@ -250,12 +250,15 @@ static int poll_for_output(lvmpolld_lv_t *pdlv, lvmpolld_thread_data_t *data)
timeout++;
WARN(pdlv->ls, "%s: %s (PID %d) %s", PD_LOG_PREFIX,
- "polling for output of lvm cmd", pdlv->cmd_pid, "has timed out");
+ "polling for output of the lvm cmd", pdlv->cmd_pid,
+ "has timed out");
if (timeout > MAX_TIMEOUT) {
- ERROR(pdlv->ls, "%s: %s (PID %d)", PD_LOG_PREFIX,
- "Exceeded maximum number of allowed timeouts for lvm cmd",
- pdlv->cmd_pid);
+ ERROR(pdlv->ls, "%s: %s (PID %d) (no output for %d seconds)",
+ PD_LOG_PREFIX,
+ "LVM2 cmd is unresponsive too long",
+ pdlv->cmd_pid,
+ timeout * pdlv_get_timeout(pdlv));
goto out;
}
@@ -555,7 +558,7 @@ static lvmpolld_lv_t *construct_pdlv(request req, lvmpolld_state_t *ls,
unsigned handle_missing_pvs = daemon_request_int(req, LVMPD_PARM_HANDLE_MISSING_PVS, 0);
pdlv = pdlv_create(ls, id, vgname, lvname, sysdir, type,
- interval, 2 * uinterval, pdst);
+ interval, uinterval, pdst);
if (!pdlv) {
ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to create internal LV data structure.");
@@ -668,7 +671,7 @@ static response poll_init(client_handle h, lvmpolld_state_t *ls, request req, en
pdlv->init_rq_count++; /* safe. protected by store lock */
} else {
pdlv = construct_pdlv(req, ls, pdst, interval, id, vgname,
- lvname, sysdir, type, abort, uinterval);
+ lvname, sysdir, type, abort, 2 * uinterval);
if (!pdlv) {
pdst_unlock(pdst);
dm_free(id);
diff --git a/daemons/lvmpolld/lvmpolld-data-utils.c b/daemons/lvmpolld/lvmpolld-data-utils.c
index 154837602..c1b2725c2 100644
--- a/daemons/lvmpolld/lvmpolld-data-utils.c
+++ b/daemons/lvmpolld/lvmpolld-data-utils.c
@@ -109,7 +109,7 @@ lvmpolld_lv_t *pdlv_create(lvmpolld_state_t *ls, const char *id,
.lvname = full_lvname,
.lvm_system_dir_env = lvm_system_dir_env,
.sinterval = dm_strdup(sinterval), /* copy */
- .pdtimeout = pdtimeout ?: PDTIMEOUT_DEF,
+ .pdtimeout = pdtimeout < MIN_POLLING_TIMEOUT ? MIN_POLLING_TIMEOUT : pdtimeout,
.cmd_state = { .retcode = -1, .signal = 0 },
.pdst = pdst,
.init_rq_count = 1
diff --git a/daemons/lvmpolld/lvmpolld-data-utils.h b/daemons/lvmpolld/lvmpolld-data-utils.h
index 9fffeec89..8e39aaf24 100644
--- a/daemons/lvmpolld/lvmpolld-data-utils.h
+++ b/daemons/lvmpolld/lvmpolld-data-utils.h
@@ -25,7 +25,7 @@ typedef struct lvmpolld_state lvmpolld_state_t;
typedef void (*lvmpolld_parse_output_fn_t) (lvmpolld_lv_t *pdlv, const char *line);
/* TODO: replace with configuration option */
-#define PDTIMEOUT_DEF 60
+#define MIN_POLLING_TIMEOUT 60
enum poll_type {
PVMOVE = 0,