lvmetad: preemptively check and rescan in commands

Move checking the lvmetad state, and the possible rescan, out of lvmetad_send() to the start of the command. Previously, the token mismatch and rescan would occur within lvmetad_send() for some other request. Now, the token mismatch is detected earlier, so the rescan can be done before the main command is in progress. Rescanning deep within the processing of another command will disturb the lvmcache state of that other command. A rescan already exists at the start of the command for the case where foreign VGs are going to be read. This same rescan is now also performed when there is an lvmetad token mismatch (from a changed global_filter).
author: David Teigland <teigland@redhat.com> 2016-01-28 16:40:26 -0600
committer: David Teigland <teigland@redhat.com> 2016-02-11 16:48:56 -0600
commit: 6324409ee992a107e9a341f65e5b2875fa2a3a7a (patch)
tree: b617d3a017f0f4140d1920dd2c36c3b6fb1545a4
parent: d518977073e9cb05c9d3a5ff53900860f1eea4ee (diff)
download: lvm2-6324409ee992a107e9a341f65e5b2875fa2a3a7a.tar.gz
6 files changed, 191 insertions, 27 deletions
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c
index 90ff4cb50..abe916e6b 100644
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -146,6 +146,11 @@ void lvmetad_connect_or_warn(void)
 	}
 }
 
+int lvmetad_is_connected(void)
+{
+	return _lvmetad_connected;
+}
+
 int lvmetad_used(void)
 {
 	return _lvmetad_use;
@@ -206,6 +211,108 @@ void lvmetad_set_socket(const char *sock)
 	_lvmetad_socket = sock;
 }
 
+/*
+ * Check if lvmetad's token matches our token.  The token is a hash
+ * of the global filter used to populate lvmetad.  The lvmetad token
+ * was set by the last command to populate lvmetad, and it was set to
+ * the hash of the global filter that command used when scanning to
+ * populate lvmetad.
+ *
+ * Our token is a hash of the global filter this command is using.
+ *
+ * If the lvmetad token is not set (or "none"), then lvmetad has not
+ * been populated.  If the lvmetad token is "update in progress", then
+ * lvmetad is currently being populated (this should be temporary).
+ * If the lvmetad token otherwise differs from ours, then lvmetad was
+ * populated using a different global filter that we are using.
+ *
+ * Return 1 if the lvmetad token matches ours.  We can use it as is.
+ *
+ * Return 0 if the lvmetad token does not match ours (lvmetad is
+ * empty or populated using a different global filter).
+ * We cannot use the lvmetad cache until we repopulate it
+ * (and set lvmetad's token to match ours.)
+ *
+ * Return an error if lvmetad is stuck being updated.
+ * We can't use it.  This shouldn't happen, but could if
+ * the command updating lvmetad gets stuck, e.g. trying to
+ * read a bad device.
+ * FIXME: attempt to disable lvmetad.
+ */
+
+int lvmetad_token_matches(struct cmd_context *cmd)
+{
+	daemon_reply reply;
+	const char *daemon_token;
+	int retries = 0;
+	int ret = 1;
+
+retry:
+	log_debug_lvmetad("lvmetad send get_global_info");
+
+	reply = daemon_send_simple(_lvmetad, "get_global_info",
+				   "token = %s", "skip",
+				   NULL);
+	if (reply.error) {
+		log_error("lvmetad_token_matches get_global_info error %d", reply.error);
+		ret = 0;
+		goto out;
+	}
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+		log_error("lvmetad_token_matches get_global_info not ok");
+		ret = 0;
+		goto out;
+	}
+
+	daemon_token = daemon_reply_str(reply, "token", NULL);
+
+	if (!daemon_token) {
+		log_error("lvmetad_token_matches no token returned");
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * If lvmetad is being updated by another command, then sleep and retry
+	 * until the token shows the update is done, and go on to the token
+	 * comparison.  FIXME: after retrying enough, quit and disable the use
+	 * of lvmetad for this command.
+	 */
+	if (!strcmp(daemon_token, "update in progress")) {
+		if (retries > 120) {
+			/* FIXME: disable lvmetad for this command. */
+			log_error("Not using lvmetad which is busy.");
+			ret = 0;
+			goto out;
+		}
+		log_warn("lvmetad is being updated, retrying...");
+		usleep(500000);
+		retries++;
+		goto retry;
+	}
+
+	/*
+	 * lvmetad is empty, not yet populated.
+	 */
+	if (!strcmp(daemon_token, "none")) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * lvmetad has an unmatching token; it was last populated using
+	 * a different global filter.
+	 */
+	if (strcmp(daemon_token, _lvmetad_token)) {
+		ret = 0;
+		goto out;
+	}
+out:
+	daemon_reply_destroy(reply);
+	return ret;
+}
+
 static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
 				    int ignore_obsolete);
 
@@ -219,6 +326,8 @@ static daemon_reply _lvmetad_send(const char *id, ...)
 	unsigned max_remaining_sleep_times = 1;
 	unsigned wait_usecs;
 
+	log_debug_lvmetad("lvmetad_send %s", id);
+
 retry:
 	req = daemon_request_make(id);
 
@@ -235,37 +344,28 @@ retry:
 
 	daemon_request_destroy(req);
 
-	/*
-	 * If another process is trying to scan, it might have the
-	 * same future token id and it's better to wait and avoid doing
-	 * the work multiple times. For the case where the future token is
-	 * different, the wait is randomized so that multiple waiting
-	 * processes do not start scanning all at once.
-	 *
-	 * If the token is mismatched because of global_filter changes,
-	 * we re-scan immediately, but if we lose the potential race for
-	 * the update, we back off for a short while (0.05-0.5 seconds) and
-	 * try again.
-	 */
 	if (!repl.error && !strcmp(daemon_reply_str(repl, "response", ""), "token_mismatch") &&
 	    num_rescans < MAX_RESCANS && total_usecs_waited < (SCAN_TIMEOUT_SECONDS * 1000000) && !test_mode()) {
-		if (!strcmp(daemon_reply_str(repl, "expected", ""), "update in progress") ||
-		    max_remaining_sleep_times) {
+
+		/*
+		 * The other command should finish updating lvmetad soon.
+		 * Sleep to give it a chance to finish, then retry.
+		 */
+		if (!strcmp(daemon_reply_str(repl, "expected", ""), "update in progress") || max_remaining_sleep_times) {
+			log_debug_lvmetad("lvmetad is not ready, retrying...");
 			wait_usecs = 50000 + lvm_even_rand(&_lvmetad_cmd->rand_seed, 450000); /* between 0.05s and 0.5s */
 			(void) usleep(wait_usecs);
 			total_usecs_waited += wait_usecs;
 			if (max_remaining_sleep_times)
 				max_remaining_sleep_times--;	/* Sleep once before rescanning the first time, then 5 times each time after that. */
 		} else {
-			/* If the re-scan fails here, we try again later. */
-			(void) _lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL, 0);
-			num_rescans++;
-			max_remaining_sleep_times = 5;
+			log_error("lvmetad cache is not usable, update lvmetad and retry command.");
+			goto out;
 		}
 		daemon_reply_destroy(repl);
 		goto retry;
 	}
-
+out:
 	return repl;
 }
 
@@ -1427,6 +1527,8 @@ static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler
 		return 0;
 	}
 
+	log_debug_lvmetad("Scanning all devices to update lvmetad.");
+
 	if (!(iter = dev_iter_create(cmd->lvmetad_filter, 1))) {
 		log_error("dev_iter creation failed");
 		return 0;
@@ -1728,6 +1830,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
 	if (force)
 		goto do_scan;
 
+	log_debug_lvmetad("lvmetad validate send get_global_info");
+
 	reply = daemon_send_simple(_lvmetad, "get_global_info",
 				   "token = %s", "skip",
 				   NULL);
@@ -1760,7 +1864,12 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
 
 	/*
 	 * Update the local lvmetad cache so it correctly reflects any
-	 * changes made on remote hosts.
+	 * changes made on remote hosts.  (It's possible that this command
+	 * already refreshed the local lvmetad because of a token change,
+	 * but we need to do it again here since we now hold the global
+	 * lock.  Another host may have changed things between the time
+	 * we rescanned for the token, and the time we acquired the global
+	 * lock.)
 	 */
 	if (!lvmetad_pvscan_all_devs(cmd, NULL))
 		stack; /* FIXME: Anything more on this error path ? */
@@ -1771,6 +1880,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
 	 * from lvmetad will not see global_invalid until
 	 * another host makes another global change.
 	 */
+	log_debug_lvmetad("lvmetad validate send set_global_info");
+
 	reply = daemon_send_simple(_lvmetad, "set_global_info",
 				   "token = %s", "skip",
 				   "global_invalid = " FMTd64, INT64_C(0),
diff --git a/lib/cache/lvmetad.h b/lib/cache/lvmetad.h
index ce4affa8e..c5636e2a6 100644
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -71,6 +71,12 @@ int lvmetad_active(void);
 void lvmetad_connect_or_warn(void);
 
 /*
+ * Check if lvmetad is connected.  This is different from lvmetad_active()
+ * in that it doesn't connect to lvmetad.
+ */
+int lvmetad_is_connected(void);
+
+/*
  * Drop connection to lvmetad. A subsequent lvmetad_connect_or_warn or
  * lvmetad_active will re-establish the connection (possibly at a
  * different socket path).
@@ -168,6 +174,7 @@ int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handl
 
 int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
 void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);
+int lvmetad_token_matches(struct cmd_context *cmd);
 
 int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid);
 
@@ -200,6 +207,8 @@ int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const cha
 #    define lvmetad_vg_clear_outdated_pvs(vg)           (1)
 #    define lvmetad_validate_global_cache(cmd, force)	do { } while (0)
 #    define lvmetad_vg_is_foreign(cmd, vgname, vgid) (0)
+#    define lvmetad_token_matches(cmd) (1)
+#    define lvmetad_is_connected() (0)
 
 #  endif	/* LVMETAD_SUPPORT */
 
diff --git a/tools/commands.h b/tools/commands.h
index 651852d11..a30ed69c5 100644
--- a/tools/commands.h
+++ b/tools/commands.h
@@ -965,7 +965,7 @@ xx(pvs,
 
 xx(pvscan,
    "List all physical volumes",
-   PERMITTED_READ_ONLY | LOCKD_VG_SH,
+   PERMITTED_READ_ONLY | LOCKD_VG_SH | DISABLE_BUILTIN_PVSCAN,
    "pvscan\n"
    "\t[-b|--background]\n"
    "\t[--cache [-a|--activate ay] [ DevicePath | -j|--major major --minor minor]...]\n"
diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c
index 29c1c7eac..a42ecd932 100644
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -1640,13 +1640,25 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
 	}
 
 	/*
-	 * Other hosts might have changed foreign VGs so enforce a rescan
-	 * before processing any command using them.
+	 * The lvmetad cache may need to be repopulated before we use it because:
+	 * - We are reading foreign VGs which others hosts may have changed
+	 *   which our lvmetad would not have seen.
+	 * - lvmetad may have just been started and no command has been run
+	 *   to populate it yet (e.g. no pvscan --cache was run).
+	 * - Another local command may have run with a different global filter
+	 *   which changed the content of lvmetad from what we want (recognized
+	 *   by differnet token values.)
+	 *
+	 * Disable this bit of code for pvscan because the equivalent of
+	 * this is what pvscan itself does.
 	 */
-	if (cmd->include_foreign_vgs && lvmetad_used() &&
-	    !lvmetad_pvscan_foreign_vgs(cmd, NULL)) {
-		log_error("Failed to scan devices.");
-		return ECMD_FAILED;
+	if (lvmetad_is_connected() && !(cmd->command->flags & DISABLE_BUILTIN_PVSCAN)) {
+		if (!lvmetad_token_matches(cmd) || cmd->include_foreign_vgs) {
+			if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+				log_warn("WARNING: Disabling use of lvmetad because device scan failed.");
+				lvmetad_set_active(cmd, 0);
+			}
+		}
 	}
 
 	/*
diff --git a/tools/pvscan.c b/tools/pvscan.c
index 7d7a96f44..43f7df35a 100644
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -278,6 +278,21 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
 		goto out;
 	}
 
+	/*
+	 * FIXME: when specific devs are named, we generally don't
+	 * want to scan any other devs, but if lvmetad is not yet
+	 * populated, the first 'pvscan --cache dev' does need to
+	 * do a full scan.  We want to remove the need for this
+	 * case so that 'pvscan --cache dev' is guaranteed to never
+	 * scan any devices other than those specified.
+	 */
+	if (lvmetad_active() && !lvmetad_token_matches(cmd)) {
+		if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+			log_error("Failed to scan devices");
+			return ECMD_FAILED;
+		}
+	}
+
 	log_verbose("Using physical volume(s) on command line");
 
 	/* Process any command line PVs first. */
@@ -404,6 +419,21 @@ int pvscan(struct cmd_context *cmd, int argc, char **argv)
 			  arg_count(cmd, exported_ARG) ?
 			  "of exported volume group(s)" : "in no volume group");
 
+	/*
+	 * All pvscan commands skip the automatic repopulating of
+	 * lvmetad when the token doesn't match, because pvscan_lvmetad
+	 * above (for pvscan --cache) needs to do that repopulating
+	 * itself.  So for other pvscan commands (without --cache), we
+	 * need to check the lvmetad token and repopulate the cache
+	 * if it doesn't match.
+	 */
+	if (lvmetad_active() && !lvmetad_token_matches(cmd)) {
+		if (!lvmetad_pvscan_all_devs(cmd, NULL)) {
+			log_error("Failed to scan devices");
+			return ECMD_FAILED;
+		}
+	}
+
 	if (!lock_vol(cmd, VG_GLOBAL, LCK_VG_WRITE, NULL)) {
 		log_error("Unable to obtain global lock.");
 		return ECMD_FAILED;
diff --git a/tools/tools.h b/tools/tools.h
index 979b5e341..1a498efb2 100644
--- a/tools/tools.h
+++ b/tools/tools.h
@@ -107,6 +107,8 @@ struct arg_value_group_list {
 #define NO_METADATA_PROCESSING	0x00000040
 /* Command wants to scan for new devices and force labels to be read from them all. */
 #define REQUIRES_FULL_LABEL_SCAN 0x00000080
+/* For pvscan itself, disable the automatic/preemptive pvscan. */
+#define DISABLE_BUILTIN_PVSCAN   0x00000100
  
 /* a register of the lvm commands */
 struct command {
author	David Teigland <teigland@redhat.com>	2016-01-28 16:40:26 -0600
committer	David Teigland <teigland@redhat.com>	2016-02-11 16:48:56 -0600
commit	6324409ee992a107e9a341f65e5b2875fa2a3a7a (patch)
tree	b617d3a017f0f4140d1920dd2c36c3b6fb1545a4
parent	d518977073e9cb05c9d3a5ff53900860f1eea4ee (diff)
download	lvm2-6324409ee992a107e9a341f65e5b2875fa2a3a7a.tar.gz