summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2021-06-14 15:31:34 -0500
committerDavid Teigland <teigland@redhat.com>2021-06-14 16:18:33 -0500
commit31e2c4c1a6e0ddb7ee8a34fe96505b3135d51aa6 (patch)
tree593c54fd04131229ed6feeb1ced1fd36f0731a19
parent09b0eea6a010ba37dfd793caecc79876017679de (diff)
downloadlvm2-dev-dct-scan-errors-1.tar.gz
locking: allow using global lock for scanningdev-dct-scan-errors-1
When vg metadata consumes a major percentage of the metadata area, acquire the global lock prior to label_scan. Many commands already acquire the global lock prior to label_scan, but VG-specific commands may not otherwise use the global lock. This attempts to avoid the rare situation in which the metadata is large enough to wrap around the metadata area and invalidate the metadata location information that a single command gathered from label_scan. If this wrapping with large sizes occurs, the metadata locations seen during label scan may be overwritten before the same command is able to use them for vg_read(), causing vg_read to see invalid metadata and the command to fail. A large number of concurrent lvm commands is also a factor that can lead to this problem due to longer delays between label_scan and vg_read. This problem can be avoided if all commands acquire the global lock prior to label scan, and hold it across all the vg_read() calls. This ensures that the results from label scan are unchanging during label scan and remain valid for use in vg_read. Commands modifying vg metadata take the global lock ex, and those only reading vg metadata use sh. This extra use of the global lock is usually unnecessary, so lvm automatically detects when the extra locking may be needed before starting to use it. When vg metadata is a large enough percentage of the total metadata area, lvm begins doing extra locking. Currently this is 25% (could be configurable.) When one command sees this threshold has been reached, it creates the file /run/lvm/scan_lock_global. When subsequent commands see this file exists, then will acquire the global lock prior to their label_scan. When metadata goes below the threshold, the temp file is removed, and commands no longer do the extra locking.
-rw-r--r--lib/cache/lvmcache.c115
-rw-r--r--lib/cache/lvmcache.h9
-rw-r--r--lib/format_text/format-text.c4
-rw-r--r--lib/label/label.c10
-rw-r--r--lib/locking/locking.c4
-rw-r--r--tools/toollib.c57
6 files changed, 170 insertions, 29 deletions
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
index 017134d7d..a015b9fef 100644
--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -88,6 +88,9 @@ static int _vgs_locked = 0;
static int _found_duplicate_vgnames = 0;
static int _outdated_warning = 0;
+static const char *_scan_lock_global_file = DEFAULT_RUN_DIR "/scan_lock_global";
+static int _scan_lock_global_file_exists = 0;
+
int lvmcache_init(struct cmd_context *cmd)
{
/*
@@ -2742,19 +2745,115 @@ bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const c
return true;
}
-static uint64_t _max_metadata_size;
+/*
+ * max_size_bytes and max_size_percent may come from different areas and
+ * different vgs because of different area sizes.
+ */
+static uint64_t _max_metadata_size_bytes;
+static dm_percent_t _max_metadata_size_percent = DM_PERCENT_INVALID;
-void lvmcache_save_metadata_size(uint64_t val)
+void lvmcache_save_metadata_size_bytes(uint64_t val)
{
- if (!_max_metadata_size)
- _max_metadata_size = val;
- else if (_max_metadata_size < val)
- _max_metadata_size = val;
+ if (!_max_metadata_size_bytes)
+ _max_metadata_size_bytes = val;
+ else if (_max_metadata_size_bytes < val)
+ _max_metadata_size_bytes = val;
+}
+
+uint64_t lvmcache_max_metadata_size_bytes(void)
+{
+ return _max_metadata_size_bytes;
+}
+
+/*
+ * TODO: enable/disable scan_lock_global with config setting:
+ * y: always use it
+ * n: never use it
+ * auto (default): use based on /run/lvm/scan_lock_global
+ */
+void lvmcache_save_metadata_size_percent(uint64_t meta_size, uint64_t mdah_size)
+{
+
+ dm_percent_t pc = dm_make_percent(meta_size, mdah_size);
+
+ if (pc == DM_PERCENT_INVALID || pc == DM_PERCENT_FAILED ||
+ pc == DM_PERCENT_0 || pc == DM_PERCENT_1)
+ return;
+
+ if (_max_metadata_size_percent == DM_PERCENT_INVALID) {
+ _max_metadata_size_percent = pc;
+ return;
+ }
+
+ if (_max_metadata_size_percent < pc)
+ _max_metadata_size_percent = pc;
+}
+
+/*
+ * TODO: make the percent at which scan_lock_global is used
+ * configurable?
+ */
+#define SCAN_LOCK_GLOBAL_METADATA_PERCENT (DM_PERCENT_1 * 25)
+
+void set_scan_lock_global(struct cmd_context *cmd)
+{
+ FILE *fp;
+
+ if (_max_metadata_size_percent == DM_PERCENT_INVALID)
+ return;
+
+ if (_max_metadata_size_percent >= SCAN_LOCK_GLOBAL_METADATA_PERCENT) {
+ if (_scan_lock_global_file_exists)
+ return;
+ log_debug("Creating %s.", _scan_lock_global_file);
+ if (!(fp = fopen(_scan_lock_global_file, "w")))
+ return;
+ if (fclose(fp))
+ stack;
+ } else {
+ if (_scan_lock_global_file_exists) {
+ log_debug("Unlinking %s.", _scan_lock_global_file);
+ if (unlink(_scan_lock_global_file))
+ stack;
+ }
+ }
}
-uint64_t lvmcache_max_metadata_size(void)
+int do_scan_lock_global(struct cmd_context *cmd, int *gl_ex)
{
- return _max_metadata_size;
+ struct stat buf;
+
+ if (cmd->nolocking)
+ return 0;
+
+ /* global lock is already held */
+ if (cmd->lockf_global_ex)
+ return 0;
+
+ if (!stat(_scan_lock_global_file, &buf)) {
+ _scan_lock_global_file_exists = 1;
+
+ /*
+ * Tell the caller to use sh or ex. A command that may write
+ * vg metadata should use ex, otherwise sh.
+ *
+ * lockd_vg_default_sh/LOCKD_VG_SH is set for commands that
+ * do not modify vg metadata.
+ *
+ * FIXME: this variable/flag was previously used only for
+ * lvmlockd locking logic, but is now more general, so
+ * it should be renamed.
+ */
+ if (cmd->lockd_vg_default_sh)
+ *gl_ex = 0;
+ else
+ *gl_ex = 1;
+
+ return 1;
+
+ }
+
+ return 0;
}
int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid)
diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h
index 76429fc5b..ea9284af1 100644
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@@ -183,8 +183,9 @@ bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const c
int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid);
-uint64_t lvmcache_max_metadata_size(void);
-void lvmcache_save_metadata_size(uint64_t val);
+uint64_t lvmcache_max_metadata_size_bytes(void);
+void lvmcache_save_metadata_size_bytes(uint64_t val);
+void lvmcache_save_metadata_size_percent(uint64_t meta_size, uint64_t mdah_size);
int dev_in_device_list(struct device *dev, struct dm_list *head);
@@ -226,4 +227,8 @@ void lvmcache_extra_md_component_checks(struct cmd_context *cmd);
unsigned int lvmcache_vg_info_count(void);
+void set_scan_lock_global(struct cmd_context *cmd);
+int do_scan_lock_global(struct cmd_context *cmd, int *gl_ex);
+
+
#endif
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
index 64ad4677c..9c4aedce0 100644
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -1646,7 +1646,9 @@ int read_metadata_location_summary(const struct format_type *fmt,
vgsummary->mda_size = rlocn->size;
/* Keep track of largest metadata size we find. */
- lvmcache_save_metadata_size(rlocn->size);
+ lvmcache_save_metadata_size_bytes(rlocn->size);
+ /* Keep track of the most full metadata area. */
+ lvmcache_save_metadata_size_percent(rlocn->size, mdah->size);
lvmcache_lookup_mda(vgsummary);
diff --git a/lib/label/label.c b/lib/label/label.c
index 7eef1d99a..3f2305e0e 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -1221,7 +1221,7 @@ int label_scan(struct cmd_context *cmd)
* If the largest metadata is within 1MB of the bcache size, then start
* warning.
*/
- max_metadata_size_bytes = lvmcache_max_metadata_size();
+ max_metadata_size_bytes = lvmcache_max_metadata_size_bytes();
if (max_metadata_size_bytes + (1024 * 1024) > _current_bcache_size_bytes) {
/* we want bcache to be 1MB larger than the max metadata seen */
@@ -1236,6 +1236,14 @@ int label_scan(struct cmd_context *cmd)
(unsigned long long)want_size_kb);
}
+ /*
+ * If vg metadata is using a large percentage of a metadata area, then
+ * create /run/lvm/scan_lock_global to tell future lvm commands to
+ * begin doing lock_global() prior to scanning to avoid problems due to
+ * metadata wrapping between label_scan and vg_read.
+ */
+ set_scan_lock_global(cmd);
+
dm_list_init(&cmd->hints);
/*
diff --git a/lib/locking/locking.c b/lib/locking/locking.c
index 0aceb194a..3236b523a 100644
--- a/lib/locking/locking.c
+++ b/lib/locking/locking.c
@@ -359,10 +359,8 @@ static int _lockf_global(struct cmd_context *cmd, const char *mode, int convert,
if (!strcmp(mode, "ex")) {
flags |= LCK_WRITE;
- if (cmd->lockf_global_ex) {
- log_warn("global flock already held ex");
+ if (cmd->lockf_global_ex)
return 1;
- }
ret = lock_vol(cmd, VG_GLOBAL, flags, NULL);
if (ret)
diff --git a/tools/toollib.c b/tools/toollib.c
index 338551015..0d47eb29d 100644
--- a/tools/toollib.c
+++ b/tools/toollib.c
@@ -2140,6 +2140,7 @@ int process_each_vg(struct cmd_context *cmd,
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->cname->flags & ALL_VGS_IS_DEFAULT);
int process_all_vgs_on_system = 0;
+ int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -2173,11 +2174,25 @@ int process_each_vg(struct cmd_context *cmd,
process_all_vgs_on_system = 1;
/*
- * Needed for a current listing of the global VG namespace.
+ * The global lock will be taken prior to scanning if the
+ * /run/lvm/scan_lock_global file has been created by a prior command,
+ * indicating that vg metadata sizes are large enough to possibly wrap
+ * around the metadata area during label_scan or between label_scan and
+ * vg_read, which can invalidate the scan results (normally unlocked)
+ * and prevent a valid vg_read (which uses metadata locations saved by
+ * label_scan).
*/
- if (process_all_vgs_on_system && !lock_global(cmd, "sh")) {
- ret_max = ECMD_FAILED;
- goto_out;
+ if (do_scan_lock_global(cmd, &gl_ex)) {
+ if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+ } else if (process_all_vgs_on_system) {
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lock_global(cmd, "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
}
/*
@@ -3668,6 +3683,7 @@ int process_each_lv(struct cmd_context *cmd,
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->cname->flags & ALL_VGS_IS_DEFAULT);
int process_all_vgs_on_system = 0;
+ int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -3722,12 +3738,17 @@ int process_each_lv(struct cmd_context *cmd,
else if (dm_list_empty(&arg_vgnames) && handle->internal_report_for_select)
process_all_vgs_on_system = 1;
- /*
- * Needed for a current listing of the global VG namespace.
- */
- if (process_all_vgs_on_system && !lock_global(cmd, "sh")) {
- ret_max = ECMD_FAILED;
- goto_out;
+ if (do_scan_lock_global(cmd, &gl_ex)) {
+ if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+ } else if (process_all_vgs_on_system) {
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lock_global(cmd, "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
}
/*
@@ -4384,6 +4405,7 @@ int process_each_pv(struct cmd_context *cmd,
struct device_id_list *dil;
int process_all_pvs;
int process_all_devices;
+ int gl_ex = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -4434,10 +4456,17 @@ int process_each_pv(struct cmd_context *cmd,
process_all_devices = process_all_pvs && (cmd->cname->flags & ENABLE_ALL_DEVS) && all_is_set;
- /* Needed for a current listing of the global VG namespace. */
- if (!only_this_vgname && !lock_global(cmd, "sh")) {
- ret_max = ECMD_FAILED;
- goto_out;
+ if (do_scan_lock_global(cmd, &gl_ex)) {
+ if (!lock_global(cmd, gl_ex ? "ex" : "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+ } else if (!only_this_vgname) {
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lock_global(cmd, "sh")) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
}
if (!(read_flags & PROCESS_SKIP_SCAN))