summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2019-02-18 09:56:58 -0600
committerDavid Teigland <teigland@redhat.com>2019-02-19 15:24:12 -0600
commitc82d05c5ec4928e1c9d875cd28c9ac1397f8941c (patch)
treec23547b2d2e7ba432e4efcec8778c61eaf2e69f8
parentbe1f54d206fbdc58a094b20bd011afb8206d9552 (diff)
downloadlvm2-dev-dct-lvmetad-init-b.tar.gz
pvscan: fix lvmetad init racedev-dct-lvmetad-init-b
Given bad timing between two concurrent pvscan --cache -aay dev commands, one could initialize lvmetad, then the other could initialize lvmetad. The second init could potentially be based on an earlier pvscan command where there were fewer devices, which could cause a device to be missing from lvmetad in the end. This sends an extra flag from pvscan to lvmetad indicating that the pvscan is initializing lvmetad from an empty state. lvmetad checks this flag before allowing the initialization, and if it's not currently in an empty state (doesn't require initialization) then it will not accept initialization again. This prevents lvmetad from being re-initialized by an earlier instance of pvscan that did not know about more recent device additions. examples: "token none" in lvmetad means that lvmetad has no info and needs to be initialized. Any process that sees this will first initialize lvmetad. device a comes online, triggers pvscan-a. device b comes online, triggers pvscan-b. case 1: pvscan-a sees lvmetad token "none" pvscan-b sees lvmetad token "none" pvscan-b sets token "updating" (for "none") lvmetad sees that its token is still "none", allows update pvscan-b sends device info for a and b pvscan-b sets token to "filter" pvscan-a sets token "updating" (for "none") lvmetad sees that its token is now "filter", disallows update pvscan-a sends nothing The device from pvscan-a should still be included in lvmetad by pvscan-b because pvscan-b gets the device list after a has come online. If pvscan-b gets the device list before a has come online, then pvscan-b has already set the token, and pvscan-a cannot see token "none". case 2: pvscan-a sees lvmetad token "none" pvscan-b sees lvmetad token "none" pvscan-b sets token "updating" (for "none") lvmetad sees that its token is still "none", allows update pvscan-a sets token "updating" (for "none") lvmetad sees that its token is now "updating", disallows update pvscan-b sends device info for a and b pvscan-b sets token to "filter" case 3: pvscan-a sees lvmetad token "none" pvscan-b sees lvmetad token "none" pvscan-a sets token "updating" (for "none") lvmetad sees that its token is still "none", allows update pvscan-b sets token "updating" (for "none") lvmetad sees that its token is now "updating", disallows update pvscan-a sends device info for a and b pvscan-a sets token to "filter" If b came online after pvscan-a set token "updating", then pvscan-a may not send b to lvmetad. But in that case, pvscan-b would not see token "none" and would wait for pvscan-a to finish (if pvscan-a was still running.)
-rw-r--r--daemons/lvmetad/lvmetad-core.c21
-rw-r--r--lib/cache/lvmetad.c44
-rw-r--r--lib/cache/lvmetad.h5
-rw-r--r--tools/lvmcmdline.c2
-rw-r--r--tools/lvscan.c2
-rw-r--r--tools/pvscan.c7
-rw-r--r--tools/vgscan.c2
7 files changed, 68 insertions, 15 deletions
diff --git a/daemons/lvmetad/lvmetad-core.c b/daemons/lvmetad/lvmetad-core.c
index 72473d7b3..414aa1f52 100644
--- a/daemons/lvmetad/lvmetad-core.c
+++ b/daemons/lvmetad/lvmetad-core.c
@@ -2666,6 +2666,7 @@ static response handler(daemon_state s, client_handle h, request r)
const char *cmd;
int prev_in_progress, this_in_progress;
int update_timeout;
+ int init_none;
int pid;
int cache_lock = 0;
int info_lock = 0;
@@ -2675,6 +2676,7 @@ static response handler(daemon_state s, client_handle h, request r)
pid = (int)daemon_request_int(r, "pid", 0);
cmd = daemon_request_str(r, "cmd", "NONE");
update_timeout = (int)daemon_request_int(r, "update_timeout", 0);
+ init_none = (int)daemon_request_int(r, "init_none", 0);
pthread_mutex_lock(&state->token_lock);
@@ -2694,7 +2696,24 @@ static response handler(daemon_state s, client_handle h, request r)
prev_in_progress = !strcmp(state->token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
this_in_progress = !strcmp(token, LVMETAD_TOKEN_UPDATE_IN_PROGRESS);
- if (!prev_in_progress && this_in_progress) {
+ if (init_none && state->token[0]) {
+ /*
+ * token has been set to something (not "none" / empty), and
+ * init_none means we expected it to be empty
+ */
+ pthread_mutex_unlock(&state->token_lock);
+
+ DEBUGLOG(state, "token_mismatch current \"%s\" got init_none from pid %d cmd %s",
+ state->token, pid, cmd ?: "none");
+
+ return daemon_reply_simple("token_init_race",
+ "expected = %s", state->token,
+ "received = %s", token,
+ "update_pid = " FMTd64, (int64_t)state->update_pid,
+ "reason = %s", "another command is doing init",
+ NULL);
+
+ } else if (!prev_in_progress && this_in_progress) {
/* New update is starting (filter token is replaced by update token) */
(void) dm_strncpy(prev_token, state->token, sizeof(prev_token));
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c
index 86a880adb..5d2409f28 100644
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -37,6 +37,7 @@ static char *_lvmetad_token = NULL;
static const char *_lvmetad_socket = NULL;
static struct cmd_context *_lvmetad_cmd = NULL;
static int64_t _lvmetad_update_timeout;
+static int _send_init_for_none;
static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg, const char *vgid, struct format_type *fmt);
@@ -242,7 +243,7 @@ void lvmetad_release_token(void)
* command's use of lvmetad and return 0.
*/
-int lvmetad_token_matches(struct cmd_context *cmd)
+int lvmetad_token_matches(struct cmd_context *cmd, int *init_none)
{
daemon_reply reply;
const char *daemon_token;
@@ -314,6 +315,8 @@ retry:
*/
if (!strcmp(daemon_token, "none")) {
log_debug_lvmetad("lvmetad initialization needed.");
+ if (init_none)
+ *init_none = 1;
ret = 0;
goto out;
}
@@ -433,6 +436,7 @@ retry:
if (!daemon_request_extend(req,
"token = %s", _lvmetad_token ?: "none",
"update_timeout = " FMTd64, (int64_t)wait_sec,
+ "init_none = " FMTd64, _send_init_for_none,
"pid = " FMTd64, (int64_t)getpid(),
"cmd = %s", get_cmd_name(),
NULL)) {
@@ -554,7 +558,7 @@ out:
* some failure cases.
*/
-static int _token_update(int *replaced_update)
+static int _token_update(int *replaced_update, int init_for_none)
{
daemon_reply reply;
const char *token_expected;
@@ -563,9 +567,16 @@ static int _token_update(int *replaced_update)
int update_pid;
int ending_our_update;
- log_debug_lvmetad("Sending lvmetad token_update %s", _lvmetad_token);
+ log_debug_lvmetad("Sending lvmetad token_update %s init_for_none %d",
+ _lvmetad_token, init_for_none);
+
+ if (init_for_none)
+ _send_init_for_none = 1;
+
reply = _lvmetad_send(NULL, "token_update", NULL);
+ _send_init_for_none = 0;
+
if (replaced_update)
*replaced_update = 0;
@@ -579,6 +590,17 @@ static int _token_update(int *replaced_update)
reply_str = daemon_reply_str(reply, "response", "");
/*
+ * Two pvscans saw that lvmetad had token "none" and each decided to
+ * initialize the state. When we tried to set the token to updating,
+ * lvmetad had already left the "none" state.
+ */
+ if (!strcmp(reply_str, "token_init_race")) {
+ log_warn("WARNING: lvmetad is being initialized by another command (pid %d).", update_pid);
+ daemon_reply_destroy(reply);
+ return 0;
+ }
+
+ /*
* A mismatch can only happen when this command attempts to set the
* token to filter:<hash> at the end of its update, but the update has
* been preempted in lvmetad by a new one (from update_pid).
@@ -2330,7 +2352,7 @@ bad:
* generally revert disk scanning and not use lvmetad.
*/
-int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
+static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait, int init_for_none)
{
struct device_list *devl, *devl2;
struct dm_list scan_devs;
@@ -2363,7 +2385,7 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
future_token = _lvmetad_token;
_lvmetad_token = (char *) LVMETAD_TOKEN_UPDATE_IN_PROGRESS;
- if (!_token_update(&replaced_update)) {
+ if (!_token_update(&replaced_update, init_for_none)) {
log_error("Failed to start lvmetad update.");
_lvmetad_token = future_token;
return 0;
@@ -2438,7 +2460,7 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
if (!ret)
return 0;
- if (!_token_update(NULL)) {
+ if (!_token_update(NULL, 0)) {
log_error("Failed to update lvmetad token after device scan.");
return 0;
}
@@ -2461,6 +2483,16 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
return ret;
}
+int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
+{
+ return _lvmetad_pvscan_all_devs(cmd, do_wait, 0);
+}
+
+int lvmetad_pvscan_all_devs_init(struct cmd_context *cmd, int do_wait, int init_for_none)
+{
+ return _lvmetad_pvscan_all_devs(cmd, do_wait, init_for_none);
+}
+
int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg)
{
char uuid[64];
diff --git a/lib/cache/lvmetad.h b/lib/cache/lvmetad.h
index 73c26453b..90c2f3c13 100644
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -152,10 +152,11 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
struct dm_list *changed_vgnames);
int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait);
+int lvmetad_pvscan_all_devs_init(struct cmd_context *cmd, int do_wait, int init_for_none);
int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);
-int lvmetad_token_matches(struct cmd_context *cmd);
+int lvmetad_token_matches(struct cmd_context *cmd, int *init_none);
int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid);
@@ -200,7 +201,7 @@ static inline int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
static inline int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg) {return 0;}
static inline void lvmetad_validate_global_cache(struct cmd_context *cmd, int force) {}
-static inline int lvmetad_token_matches(struct cmd_context *cmd) {return 1;}
+static inline int lvmetad_token_matches(struct cmd_context *cmd, int *init_none) {return 1;}
static inline int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid) {return 0;}
diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c
index a9c3e4180..68977a943 100644
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -2980,7 +2980,7 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
* disk scanning.
*/
if (lvmetad_used() && !_cmd_no_lvmetad_autoscan(cmd)) {
- if (cmd->include_foreign_vgs || !lvmetad_token_matches(cmd)) {
+ if (cmd->include_foreign_vgs || !lvmetad_token_matches(cmd, NULL)) {
if (lvmetad_used() && !lvmetad_pvscan_all_devs(cmd, cmd->include_foreign_vgs ? 1 : 0)) {
log_warn("WARNING: Not using lvmetad because cache update failed.");
lvmetad_make_unused(cmd);
diff --git a/tools/lvscan.c b/tools/lvscan.c
index c38208ab0..10eed69d0 100644
--- a/tools/lvscan.c
+++ b/tools/lvscan.c
@@ -102,7 +102,7 @@ int lvscan(struct cmd_context *cmd, int argc, char **argv)
log_verbose("Ignoring lvscan --cache because lvmetad is not in use.");
/* Needed because this command has NO_LVMETAD_AUTOSCAN. */
- if (lvmetad_used() && (!lvmetad_token_matches(cmd) || lvmetad_is_disabled(cmd, &reason))) {
+ if (lvmetad_used() && (!lvmetad_token_matches(cmd, NULL) || lvmetad_is_disabled(cmd, &reason))) {
if (lvmetad_used() && !lvmetad_pvscan_all_devs(cmd, 0)) {
log_warn("WARNING: Not using lvmetad because cache update failed.");
lvmetad_make_unused(cmd);
diff --git a/tools/pvscan.c b/tools/pvscan.c
index 3fb702110..3a21cd727 100644
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -316,6 +316,7 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
int all_vgs = 0;
int remove_errors = 0;
int add_errors = 0;
+ int init_for_none = 0;
int ret = ECMD_PROCESSED;
dm_list_init(&found_vgnames);
@@ -389,8 +390,8 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
* the need for this case so that 'pvscan --cache dev' is guaranteed to
* never scan any devices other than those specified.
*/
- if (!lvmetad_token_matches(cmd)) {
- if (lvmetad_used() && !lvmetad_pvscan_all_devs(cmd, 0)) {
+ if (!lvmetad_token_matches(cmd, &init_for_none)) {
+ if (lvmetad_used() && !lvmetad_pvscan_all_devs_init(cmd, 0, init_for_none)) {
log_warn("WARNING: Not updating lvmetad because cache update failed.");
ret = ECMD_FAILED;
goto out;
@@ -708,7 +709,7 @@ int pvscan(struct cmd_context *cmd, int argc, char **argv)
"of exported volume group(s)" : "in no volume group");
/* Needed because this command has NO_LVMETAD_AUTOSCAN. */
- if (lvmetad_used() && (!lvmetad_token_matches(cmd) || lvmetad_is_disabled(cmd, &reason))) {
+ if (lvmetad_used() && (!lvmetad_token_matches(cmd, NULL) || lvmetad_is_disabled(cmd, &reason))) {
if (lvmetad_used() && !lvmetad_pvscan_all_devs(cmd, 0)) {
log_warn("WARNING: Not using lvmetad because cache update failed.");
lvmetad_make_unused(cmd);
diff --git a/tools/vgscan.c b/tools/vgscan.c
index f9fa3821c..b79dd2ad6 100644
--- a/tools/vgscan.c
+++ b/tools/vgscan.c
@@ -100,7 +100,7 @@ int vgscan(struct cmd_context *cmd, int argc, char **argv)
if (!lvmetad_used() && arg_is_set(cmd, cache_long_ARG))
log_verbose("Ignoring vgscan --cache command because lvmetad is not in use.");
- if (lvmetad_used() && (arg_is_set(cmd, cache_long_ARG) || !lvmetad_token_matches(cmd) || lvmetad_is_disabled(cmd, &reason))) {
+ if (lvmetad_used() && (arg_is_set(cmd, cache_long_ARG) || !lvmetad_token_matches(cmd, NULL) || lvmetad_is_disabled(cmd, &reason))) {
if (lvmetad_used() && !lvmetad_pvscan_all_devs(cmd, arg_is_set(cmd, cache_long_ARG))) {
log_warn("WARNING: Not using lvmetad because cache update failed.");
lvmetad_make_unused(cmd);