summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2016-01-28 16:40:26 -0600
committerDavid Teigland <teigland@redhat.com>2016-04-06 15:04:29 -0500
commit9621982a649816904e84845f2b42dc25ce03bb48 (patch)
tree27b4432d031915f2ef2a7310a8f512cdea285648
parent12105f9e269df7e2d5fc9944c29e3082bc0e94cf (diff)
downloadlvm2-9621982a649816904e84845f2b42dc25ce03bb48.tar.gz
lvmetad: preemptively check and rescan in commands
Move checking the lvmetad state, and the possible rescan, out of lvmetad_send() to the start of the command. Previously, the token mismatch and rescan would occur within lvmetad_send() for some other request. Now, the token mismatch is detected earlier, so the rescan can be done before the main command is in progress. Rescanning deep within the processing of another command will disturb the lvmcache state of that other command. A rescan already exists at the start of the command for the case where foreign VGs are going to be read. This same rescan is now also performed when there is an lvmetad token mismatch (from a changed global_filter). The commands pvscan/vgscan/lvscan/vgimport are excluded from this preemptive checking/rescanning for lvmetad because they want to do rescanning themselves explicitly. If rescanning devices fails, then lvmetad has not been correctly repopulated and should not be used, so make the command revert to not using lvmetad.
-rw-r--r--lib/cache/lvmetad.c152
-rw-r--r--lib/cache/lvmetad.h2
-rw-r--r--tools/commands.h8
-rw-r--r--tools/lvmcmdline.c25
-rw-r--r--tools/lvscan.c22
-rw-r--r--tools/pvscan.c32
-rw-r--r--tools/tools.h2
-rw-r--r--tools/vgimport.c8
-rw-r--r--tools/vgscan.c23
9 files changed, 222 insertions, 52 deletions
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c
index a483e04e3..6d1cf9ab6 100644
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -210,6 +210,107 @@ void lvmetad_set_socket(const char *sock)
_lvmetad_socket = sock;
}
+/*
+ * Check if lvmetad's token matches our token. The token is a hash
+ * of the global filter used to populate lvmetad. The lvmetad token
+ * was set by the last command to populate lvmetad, and it was set to
+ * the hash of the global filter that command used when scanning to
+ * populate lvmetad.
+ *
+ * Our token is a hash of the global filter this command is using.
+ *
+ * If the lvmetad token is not set (or "none"), then lvmetad has not
+ * been populated. If the lvmetad token is "update in progress", then
+ * lvmetad is currently being populated (this should be temporary).
+ * If the lvmetad token otherwise differs from ours, then lvmetad was
+ * populated using a different global filter that we are using.
+ *
+ * Return 1 if the lvmetad token matches ours. We can use it as is.
+ *
+ * Return 0 if the lvmetad token does not match ours (lvmetad is
+ * empty or populated using a different global filter).
+ * We cannot use the lvmetad cache until we repopulate it
+ * (and set lvmetad's token to match ours.)
+ *
+ * Return an error if lvmetad is stuck being updated.
+ * We can't use it. This shouldn't happen, but could if
+ * the command updating lvmetad gets stuck, e.g. trying to
+ * read a bad device.
+ */
+
+int lvmetad_token_matches(struct cmd_context *cmd)
+{
+ daemon_reply reply;
+ const char *daemon_token;
+ int retries = 0;
+ int ret = 1;
+
+retry:
+ log_debug_lvmetad("lvmetad send get_global_info");
+
+ reply = daemon_send_simple(_lvmetad, "get_global_info",
+ "token = %s", "skip",
+ NULL);
+ if (reply.error) {
+ log_error("lvmetad_token_matches get_global_info error %d", reply.error);
+ ret = 0;
+ goto out;
+ }
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("lvmetad_token_matches get_global_info not ok");
+ ret = 0;
+ goto out;
+ }
+
+ daemon_token = daemon_reply_str(reply, "token", NULL);
+
+ if (!daemon_token) {
+ log_error("lvmetad_token_matches no token returned");
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * If lvmetad is being updated by another command, then sleep and retry
+ * until the token shows the update is done, and go on to the token
+ * comparison. FIXME: after retrying enough, quit and disable the use
+ * of lvmetad for this command.
+ */
+ if (!strcmp(daemon_token, "update in progress")) {
+ if (retries > 120) {
+ /* FIXME: disable lvmetad for this command. */
+ log_error("Not using lvmetad which is busy.");
+ ret = 0;
+ goto out;
+ }
+ log_warn("lvmetad is being updated, retrying...");
+ usleep(500000);
+ retries++;
+ goto retry;
+ }
+
+ /*
+ * lvmetad is empty, not yet populated.
+ */
+ if (!strcmp(daemon_token, "none")) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * lvmetad has an unmatching token; it was last populated using
+ * a different global filter.
+ */
+ if (strcmp(daemon_token, _lvmetad_token)) {
+ ret = 0;
+ goto out;
+ }
+out:
+ daemon_reply_destroy(reply);
+ return ret;
+}
+
static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
int ignore_obsolete);
@@ -223,6 +324,8 @@ static daemon_reply _lvmetad_send(const char *id, ...)
unsigned max_remaining_sleep_times = 1;
unsigned wait_usecs;
+ log_debug_lvmetad("lvmetad_send %s", id);
+
retry:
req = daemon_request_make(id);
@@ -239,37 +342,28 @@ retry:
daemon_request_destroy(req);
- /*
- * If another process is trying to scan, it might have the
- * same future token id and it's better to wait and avoid doing
- * the work multiple times. For the case where the future token is
- * different, the wait is randomized so that multiple waiting
- * processes do not start scanning all at once.
- *
- * If the token is mismatched because of global_filter changes,
- * we re-scan immediately, but if we lose the potential race for
- * the update, we back off for a short while (0.05-0.5 seconds) and
- * try again.
- */
if (!repl.error && !strcmp(daemon_reply_str(repl, "response", ""), "token_mismatch") &&
num_rescans < MAX_RESCANS && total_usecs_waited < (SCAN_TIMEOUT_SECONDS * 1000000) && !test_mode()) {
- if (!strcmp(daemon_reply_str(repl, "expected", ""), "update in progress") ||
- max_remaining_sleep_times) {
+
+ /*
+ * The other command should finish updating lvmetad soon.
+ * Sleep to give it a chance to finish, then retry.
+ */
+ if (!strcmp(daemon_reply_str(repl, "expected", ""), "update in progress") || max_remaining_sleep_times) {
+ log_debug_lvmetad("lvmetad is not ready, retrying...");
wait_usecs = 50000 + lvm_even_rand(&_lvmetad_cmd->rand_seed, 450000); /* between 0.05s and 0.5s */
(void) usleep(wait_usecs);
total_usecs_waited += wait_usecs;
if (max_remaining_sleep_times)
max_remaining_sleep_times--; /* Sleep once before rescanning the first time, then 5 times each time after that. */
} else {
- /* If the re-scan fails here, we try again later. */
- (void) _lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL, 0);
- num_rescans++;
- max_remaining_sleep_times = 5;
+ log_error("lvmetad cache is not usable, update lvmetad and retry command.");
+ goto out;
}
daemon_reply_destroy(repl);
goto retry;
}
-
+out:
return repl;
}
@@ -1453,6 +1547,8 @@ static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler
return 0;
}
+ log_debug_lvmetad("Scanning all devices to update lvmetad.");
+
if (!(iter = dev_iter_create(cmd->lvmetad_filter, 1))) {
log_error("dev_iter creation failed");
return 0;
@@ -1752,6 +1848,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
if (force)
goto do_scan;
+ log_debug_lvmetad("lvmetad validate send get_global_info");
+
reply = daemon_send_simple(_lvmetad, "get_global_info",
"token = %s", "skip",
NULL);
@@ -1782,10 +1880,18 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
/*
* Update the local lvmetad cache so it correctly reflects any
- * changes made on remote hosts.
+ * changes made on remote hosts. (It's possible that this command
+ * already refreshed the local lvmetad because of a token change,
+ * but we need to do it again here since we now hold the global
+ * lock. Another host may have changed things between the time
+ * we rescanned for the token, and the time we acquired the global
+ * lock.)
*/
- if (!lvmetad_pvscan_all_devs(cmd, NULL))
- stack; /* FIXME: Anything more on this error path ? */
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
+ return;
+ }
/*
* Clear the global_invalid flag in lvmetad.
@@ -1793,6 +1899,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
* from lvmetad will not see global_invalid until
* another host makes another global change.
*/
+ log_debug_lvmetad("lvmetad validate send set_global_info");
+
reply = daemon_send_simple(_lvmetad, "set_global_info",
"token = %s", "skip",
"global_invalid = " FMTd64, INT64_C(0),
diff --git a/lib/cache/lvmetad.h b/lib/cache/lvmetad.h
index ce4affa8e..d5f3215bf 100644
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -168,6 +168,7 @@ int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handl
int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);
+int lvmetad_token_matches(struct cmd_context *cmd);
int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid);
@@ -200,6 +201,7 @@ int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const cha
# define lvmetad_vg_clear_outdated_pvs(vg) (1)
# define lvmetad_validate_global_cache(cmd, force) do { } while (0)
# define lvmetad_vg_is_foreign(cmd, vgname, vgid) (0)
+# define lvmetad_token_matches(cmd) (1)
# endif /* LVMETAD_SUPPORT */
diff --git a/tools/commands.h b/tools/commands.h
index aba5b40b9..f49e57f22 100644
--- a/tools/commands.h
+++ b/tools/commands.h
@@ -698,7 +698,7 @@ xx(lvs,
xx(lvscan,
"List all logical volumes in all volume groups",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | NO_LVMETAD_AUTOSCAN,
"lvscan\n"
"\t[-a|--all]\n"
"\t[-b|--blockdevice]\n"
@@ -971,7 +971,7 @@ xx(pvs,
xx(pvscan,
"List all physical volumes",
- PERMITTED_READ_ONLY | LOCKD_VG_SH,
+ PERMITTED_READ_ONLY | LOCKD_VG_SH | NO_LVMETAD_AUTOSCAN,
"pvscan\n"
"\t[-b|--background]\n"
"\t[--cache [-a|--activate ay] [ DevicePath | -j|--major major --minor minor]...]\n"
@@ -1243,7 +1243,7 @@ xx(vgextend,
xx(vgimport,
"Register exported volume group with system",
- ALL_VGS_IS_DEFAULT,
+ ALL_VGS_IS_DEFAULT | NO_LVMETAD_AUTOSCAN,
"vgimport\n"
"\t[-a|--all]\n"
"\t[--commandprofile ProfileName]\n"
@@ -1382,7 +1382,7 @@ xx(vgs,
xx(vgscan,
"Search for all volume groups",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | NO_LVMETAD_AUTOSCAN,
"vgscan "
"\t[--cache]\n"
"\t[--commandprofile ProfileName]\n"
diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c
index 2ee54e8ff..3d7f25b4b 100644
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -1643,13 +1643,26 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
}
/*
- * Other hosts might have changed foreign VGs so enforce a rescan
- * before processing any command using them.
+ * pvscan/vgscan/lvscan/vgimport want their own control over rescanning
+ * to populate lvmetad and have similar code of their own.
+ * Other commands use this general policy for using lvmetad.
+ *
+ * The lvmetad cache may need to be repopulated before we use it because:
+ * - We are reading foreign VGs which others hosts may have changed
+ * which our lvmetad would not have seen.
+ * - lvmetad may have just been started and no command has been run
+ * to populate it yet (e.g. no pvscan --cache was run).
+ * - Another local command may have run with a different global filter
+ * which changed the content of lvmetad from what we want (recognized
+ * by different token values.)
*/
- if (cmd->include_foreign_vgs && lvmetad_used() &&
- !lvmetad_pvscan_foreign_vgs(cmd, NULL)) {
- log_error("Failed to scan devices.");
- return ECMD_FAILED;
+ if (lvmetad_used() && !(cmd->command->flags & NO_LVMETAD_AUTOSCAN)) {
+ if (!lvmetad_token_matches(cmd) || cmd->include_foreign_vgs) {
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
+ }
+ }
}
/*
diff --git a/tools/lvscan.c b/tools/lvscan.c
index 751ecb6d7..6c65918e6 100644
--- a/tools/lvscan.c
+++ b/tools/lvscan.c
@@ -21,10 +21,8 @@ static int _lvscan_single_lvmetad(struct cmd_context *cmd, struct logical_volume
struct dm_list all_pvs;
char pvid_s[64] __attribute__((aligned(8)));
- if (!lvmetad_used()) {
- log_verbose("Ignoring lvscan --cache because lvmetad is not in use.");
+ if (!lvmetad_used())
return ECMD_PROCESSED;
- }
dm_list_init(&all_pvs);
@@ -98,6 +96,24 @@ int lvscan(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
+ if (!lvmetad_used() && arg_is_set(cmd, cache_long_ARG))
+ log_verbose("Ignoring lvscan --cache because lvmetad is not in use.");
+
+ /* Needed because this command has NO_LVMETAD_AUTOSCAN. */
+ if (lvmetad_used() && !lvmetad_token_matches(cmd)) {
+ log_print_unless_silent("Scanning all devices.");
+
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
+ }
+
+ /*
+ * FIXME: doing lvscan --cache after a full scan is pointless.
+ * Should the cache case just exit here?
+ */
+ }
+
return process_each_lv(cmd, argc, argv, 0, NULL,
&lvscan_single);
}
diff --git a/tools/pvscan.c b/tools/pvscan.c
index b224c30b0..a759561f5 100644
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -234,8 +234,6 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
dev_t devno;
activation_handler handler = NULL;
- cmd->include_foreign_vgs = 1;
-
/*
* Return here immediately if lvmetad is not used.
* Also return if locking_type=3 (clustered) as we
@@ -273,11 +271,29 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
/* Scan everything? */
if (!argc && !devno_args) {
- if (!lvmetad_pvscan_all_devs(cmd, handler))
+ if (!lvmetad_pvscan_all_devs(cmd, handler)) {
+ log_error("Failed to scan devices.");
ret = ECMD_FAILED;
+ }
goto out;
}
+ /*
+ * FIXME: when specific devs are named, we generally don't
+ * want to scan any other devs, but if lvmetad is not yet
+ * populated, the first 'pvscan --cache dev' does need to
+ * do a full scan. We want to remove the need for this
+ * case so that 'pvscan --cache dev' is guaranteed to never
+ * scan any devices other than those specified.
+ */
+ if (lvmetad_used() && !lvmetad_token_matches(cmd)) {
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_error("Failed to scan devices.");
+ ret = ECMD_FAILED;
+ goto out;
+ }
+ }
+
log_verbose("Using physical volume(s) on command line");
/* Process any command line PVs first. */
@@ -404,6 +420,16 @@ int pvscan(struct cmd_context *cmd, int argc, char **argv)
arg_count(cmd, exported_ARG) ?
"of exported volume group(s)" : "in no volume group");
+ /* Needed because this command has NO_LVMETAD_AUTOSCAN. */
+ if (lvmetad_used() && !lvmetad_token_matches(cmd)) {
+ log_print_unless_silent("Scanning all devices.");
+
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
+ }
+ }
+
if (!lock_vol(cmd, VG_GLOBAL, LCK_VG_WRITE, NULL)) {
log_error("Unable to obtain global lock.");
return ECMD_FAILED;
diff --git a/tools/tools.h b/tools/tools.h
index 3762e8eed..7b1bda3a0 100644
--- a/tools/tools.h
+++ b/tools/tools.h
@@ -110,6 +110,8 @@ struct arg_value_group_list {
#define REQUIRES_FULL_LABEL_SCAN 0x00000080
/* Command must use all specified arg names and fail if all cannot be used. */
#define MUST_USE_ALL_ARGS 0x00000100
+/* Command wants to control the device scan for lvmetad itself. */
+#define NO_LVMETAD_AUTOSCAN 0x00000200
/* a register of the lvm commands */
struct command {
diff --git a/tools/vgimport.c b/tools/vgimport.c
index caeaf09cf..354792cfd 100644
--- a/tools/vgimport.c
+++ b/tools/vgimport.c
@@ -93,9 +93,11 @@ int vgimport(struct cmd_context *cmd, int argc, char **argv)
* We need to reread it to see that it's been exported before we can
* import it.
*/
- if (lvmetad_active() && !lvmetad_pvscan_all_devs(cmd, NULL)) {
- log_error("Failed to scan devices.");
- return ECMD_FAILED;
+ if (lvmetad_used()) {
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
+ }
}
return process_each_vg(cmd, argc, argv, NULL,
diff --git a/tools/vgscan.c b/tools/vgscan.c
index a5b04a33f..7dc8e74b8 100644
--- a/tools/vgscan.c
+++ b/tools/vgscan.c
@@ -61,21 +61,22 @@ int vgscan(struct cmd_context *cmd, int argc, char **argv)
cmd->filter->wipe(cmd->filter);
lvmcache_destroy(cmd, 1, 0);
- if (arg_count(cmd, cache_long_ARG)) {
- cmd->include_foreign_vgs = 1;
+ if (!lvmetad_used() && arg_is_set(cmd, cache_long_ARG))
+ log_verbose("Ignoring vgscan --cache command because lvmetad is not in use.");
- if (lvmetad_active()) {
- if (!lvmetad_pvscan_all_devs(cmd, NULL))
- return ECMD_FAILED;
- }
- else {
- log_error("Cannot proceed since lvmetad is not active.");
- unlock_vg(cmd, VG_GLOBAL);
- return ECMD_FAILED;
+ if (lvmetad_used() && (arg_is_set(cmd, cache_long_ARG) || !lvmetad_token_matches(cmd))) {
+ log_print_unless_silent("Scanning all devices.");
+
+ if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+ lvmetad_set_active(cmd, 0);
}
}
- log_print_unless_silent("Reading all physical volumes. This may take a while...");
+ if (!lvmetad_used())
+ log_print_unless_silent("Reading all physical volumes. This may take a while...");
+ else
+ log_print_unless_silent("Reading volume groups from cache.");
maxret = process_each_vg(cmd, argc, argv, NULL, 0, NULL,
&vgscan_single);