summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2019-11-19 17:50:20 -0600
committerDavid Teigland <teigland@redhat.com>2019-11-20 14:37:40 -0600
commitf1adf8944d4d7b04a82d9c702370c7620247680e (patch)
tree47a9f8792ca7891b6c9c9b691f4dd72abf9ffdcc
parent7474440d3b540d20eb4f997efeb31b881cc6ac8e (diff)
downloadlvm2-dev-dct-lockfile.tar.gz
locking: use lock file timestamps to detect vg changesdev-dct-lockfile
A reporting command that is run concurrently with another command modifying a VG may report either the old or new VG state. This flexibility means the reporting command could be optimized to report metadata that was read prior to taking the VG lock. Using lock file timestamps, that window can be closed so that metadata reported is always consistent with the held VG lock. In some cases, this additional consistency will avoid warnings that could be produced when the command compares the metadata with the dm kernel state. The end result is that the optimization is used (to read disks only once) and the reported metadata is consistent with the dm kernel state, even if a concurrent command is making changes. A reporting command will now save the VG lock file timestamps prior to scanning disks. The VG metadata that is read while scanning disks is saved in memory. After the scan, when reporting each VG, the command will lock the VG, and then check the lock file timestamp again. If the timestamp is unchanged, then the metadata saved from the scan is unchanged and is reused to report the VG. If the timestamp has changed, then another command has modified the metadata since the scan, and the metadata is reread from disk prior to reporting it. Changes to lock file handling to support this: - lock files are no longer unlinked and recreated by every lvm command, but are left in place. - a command modifying a VG (holding an exclusive flock) will update the lock file timestamp before unlocking it.
-rw-r--r--lib/label/label.c9
-rw-r--r--lib/locking/file_locking.c74
-rw-r--r--lib/locking/locking.h4
-rw-r--r--lib/metadata/metadata.c28
-rw-r--r--lib/misc/lvm-flock.c217
-rw-r--r--lib/misc/lvm-flock.h5
-rw-r--r--tools/commands.h2
-rw-r--r--tools/vgremove.c2
8 files changed, 284 insertions, 57 deletions
diff --git a/lib/label/label.c b/lib/label/label.c
index 05986cbe7..69d6d67fe 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -1046,6 +1046,15 @@ int label_scan(struct cmd_context *cmd)
_prepare_open_file_limit(cmd, dm_list_size(&scan_devs));
/*
+ * Read and save the timestamps from VG lock files. The lock file
+ * timestamp is updated when a command releases an exclusive lock
+ * (indicating it has changed the VG.) So, the timestamps can be
+ * checked later to detect if another command has changed the VG since
+ * our label scan.
+ */
+ file_lock_save_times(cmd);
+
+ /*
* Do the main scan.
*/
_scan_list(cmd, cmd->filter, &scan_devs, NULL);
diff --git a/lib/locking/file_locking.c b/lib/locking/file_locking.c
index 9dfa06cf5..39578b8ff 100644
--- a/lib/locking/file_locking.c
+++ b/lib/locking/file_locking.c
@@ -26,12 +26,14 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
+#include <dirent.h>
static char _lock_dir[PATH_MAX];
static void _fin_file_locking(void)
{
release_flocks(1);
+ free_flocks();
}
static void _reset_file_locking(void)
@@ -50,11 +52,12 @@ static int _file_lock_resource(struct cmd_context *cmd, const char *resource,
log_error("Too long locking filename %s/P_%s.", _lock_dir, resource + 1);
return 0;
}
- } else
+ } else {
if (dm_snprintf(lockfile, sizeof(lockfile), "%s/V_%s", _lock_dir, resource) < 0) {
log_error("Too long locking filename %s/V_%s.", _lock_dir, resource);
return 0;
}
+ }
if (!lock_file(lockfile, flags))
return_0;
@@ -95,3 +98,72 @@ int init_file_locking(struct locking_type *locking, struct cmd_context *cmd,
return 1;
}
+
+/*
+ * For each file in locking_dir with V_ and no aux,
+ * stat and save the file time.
+ */
+
+void file_lock_save_times(struct cmd_context *cmd)
+{
+ char lockfile[PATH_MAX];
+ DIR *dir;
+ struct dirent *de;
+ char *aux;
+
+ if (!(dir = opendir(_lock_dir)))
+ return;
+
+ while ((de = readdir(dir))) {
+ if (de->d_name[0] != 'V')
+ continue;
+ if ((aux = strchr(de->d_name, ':'))) {
+ if (!strncmp(aux, ":aux", 4))
+ continue;
+ }
+
+ if (dm_snprintf(lockfile, sizeof(lockfile), "%s/%s", _lock_dir, de->d_name) < 0)
+ continue;
+
+ lock_file_time_init(lockfile);
+ }
+ closedir(dir);
+}
+
+/*
+ * Check if the file lock timestamp is unchanged from when it was saved by
+ * file_lock_save_times. Return true if unchanged. Return false if the
+ * timestamp is different, or if there's no info to know.
+ */
+bool file_lock_time_unchanged(struct cmd_context *cmd, const char *resource)
+{
+ char lockfile[PATH_MAX];
+
+ /* shouldn't be used with this function */
+ if (!strcmp(resource, VG_GLOBAL))
+ return false;
+
+ if (dm_snprintf(lockfile, sizeof(lockfile), "%s/V_%s", _lock_dir, resource) < 0) {
+ log_error("Too long locking filename %s/V_%s.", _lock_dir, resource);
+ return false;
+ }
+
+ return lock_file_time_unchanged(lockfile);
+}
+
+void file_lock_remove_on_unlock(struct cmd_context *cmd, const char *resource)
+{
+ char lockfile[PATH_MAX];
+
+ /* shouldn't be used with this function */
+ if (!strcmp(resource, VG_GLOBAL))
+ return;
+
+ if (dm_snprintf(lockfile, sizeof(lockfile), "%s/V_%s", _lock_dir, resource) < 0) {
+ log_error("Too long locking filename %s/V_%s.", _lock_dir, resource);
+ return;
+ }
+
+ lock_file_remove_on_unlock(lockfile);
+}
+
diff --git a/lib/locking/locking.h b/lib/locking/locking.h
index 746667a9b..58eecae37 100644
--- a/lib/locking/locking.h
+++ b/lib/locking/locking.h
@@ -78,4 +78,8 @@ int lockf_global_convert(struct cmd_context *cmd, const char *mode);
int lock_global(struct cmd_context *cmd, const char *mode);
int lock_global_convert(struct cmd_context *cmd, const char *mode);
+void file_lock_save_times(struct cmd_context *cmd);
+bool file_lock_time_unchanged(struct cmd_context *cmd, const char *resource);
+void file_lock_remove_on_unlock(struct cmd_context *cmd, const char *resource);
+
#endif
diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
index c4f724711..b98d1c29f 100644
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
@@ -4586,6 +4586,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
struct device *mda_dev, *dev_ret, *dev;
struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */
struct pv_list *pvl;
+ const char *rescan_reason = NULL;
int found_old_metadata = 0;
int found_md_component = 0;
unsigned use_previous_vg;
@@ -4622,16 +4623,27 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
*
* The devs in the VG may be persistently inconsistent due to some
* previous problem. In this case, rescanning the labels here will
- * find the same inconsistency. The VG repair (mistakenly done by
- * vg_read below) is supposed to fix that.
+ * find the same inconsistency.
*
- * FIXME: sort out the usage of the global lock (which is mixed up
- * with the orphan lock), and when we can tell that the global
- * lock is taken prior to the label scan, and still held here,
- * we can also skip the rescan in that case.
+ * Even though it's acceptable to report old metadata that was scanned
+ * prior to taking the VG lock, we can actually detect the rare case
+ * when another command has written the metadata between the time of
+ * our scan and us acquiring the VG lock. We save the VG lock file
+ * timestamp prior to scan, then check it again aftrr taking the VG
+ * lock (file_lock_time_unchanged). If the timestamp is different, it
+ * means that another command has written the metadata since our scan,
+ * and we rescan it here to report the latest metadata.
*/
- if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) {
- log_debug_metadata("Rescanning devices for %s %s", vgname, writing ? "rw" : "");
+
+ if (!cmd->can_use_one_scan)
+ rescan_reason = "disabled";
+ else if (lvmcache_scan_mismatch(cmd, vgname, vgid))
+ rescan_reason = "mismatch";
+ else if (!file_lock_time_unchanged(cmd, vgname))
+ rescan_reason = "changed";
+
+ if (rescan_reason) {
+ log_debug_metadata("Rescanning devices for %s %s (%s)", vgname, writing ? "rw" : "", rescan_reason);
if (writing)
lvmcache_label_rescan_vg_rw(cmd, vgname, vgid);
else
diff --git a/lib/misc/lvm-flock.c b/lib/misc/lvm-flock.c
index d65601d94..4e3496d1c 100644
--- a/lib/misc/lvm-flock.c
+++ b/lib/misc/lvm-flock.c
@@ -25,36 +25,16 @@
struct lock_list {
struct dm_list list;
int lf;
+ unsigned ex:1;
+ unsigned remove_on_unlock:1;
char *res;
+ struct timespec save_time;
};
static struct dm_list _lock_list;
static int _prioritise_write_locks;
-/* Drop lock known to be shared with another file descriptor. */
-static void _drop_shared_flock(const char *file, int fd)
-{
- log_debug_locking("_drop_shared_flock %s.", file);
-
- if (close(fd) < 0)
- log_sys_debug("close", file);
-}
-
-static void _undo_flock(const char *file, int fd)
-{
- struct stat buf1, buf2;
-
- log_debug_locking("_undo_flock %s", file);
- if (!flock(fd, LOCK_NB | LOCK_EX) &&
- !stat(file, &buf1) &&
- !fstat(fd, &buf2) &&
- is_same_inode(buf1, buf2))
- if (unlink(file))
- log_sys_debug("unlink", file);
-
- if (close(fd) < 0)
- log_sys_debug("close", file);
-}
+#define AUX_LOCK_SUFFIX ":aux"
static struct lock_list *_get_lock_list_entry(const char *file)
{
@@ -70,6 +50,19 @@ static struct lock_list *_get_lock_list_entry(const char *file)
return NULL;
}
+static void _unlink_aux(const char *file)
+{
+ char aux_path[PATH_MAX];
+
+ if (dm_snprintf(aux_path, sizeof(aux_path), "%s%s", file, AUX_LOCK_SUFFIX) < 0) {
+ stack;
+ return;
+ }
+
+ if (unlink(aux_path))
+ log_sys_debug("unlink", aux_path);
+}
+
static int _release_lock(const char *file, int unlock)
{
struct lock_list *ll;
@@ -78,18 +71,54 @@ static int _release_lock(const char *file, int unlock)
dm_list_iterate_safe(llh, llt, &_lock_list) {
ll = dm_list_item(llh, struct lock_list);
+ if (ll->lf < 0)
+ continue;
+
if (!file || !strcmp(ll->res, file)) {
- dm_list_del(llh);
+ /*
+ * When a VG is being removed, and the flock is still
+ * held for the VG, it sets the remove_on_unlock flag,
+ * so that when the flock is unlocked, the lock file is
+ * then also removed.
+ */
+ if (file && unlock && ll->remove_on_unlock) {
+ log_debug("Unlocking %s and removing", ll->res);
+
+ if (_prioritise_write_locks)
+ _unlink_aux(ll->res);
+ if (flock(ll->lf, LOCK_NB | LOCK_UN))
+ log_sys_debug("flock", ll->res);
+ if (unlink(ll->res))
+ log_sys_debug("unlink", ll->res);
+ if (close(ll->lf) < 0)
+ log_sys_debug("close", ll->res);
+
+ dm_list_del(&ll->list);
+ free(ll->res);
+ free(ll);
+ return 1;
+ }
+
+ /*
+ * Update the lock file timestamp when unlocking an
+ * exclusive flock. Other commands may use the
+ * timestamp change to detect that the VG was changed.
+ */
+ if (file && unlock && ll->ex) {
+ if (futimens(ll->lf, NULL) < 0)
+ log_debug("lock file %s time update error %d", file, errno);
+ }
+
if (unlock) {
log_very_verbose("Unlocking %s", ll->res);
if (flock(ll->lf, LOCK_NB | LOCK_UN))
log_sys_debug("flock", ll->res);
- _undo_flock(ll->res, ll->lf);
- } else
- _drop_shared_flock(ll->res, ll->lf);
+ }
+
+ if (close(ll->lf) < 0)
+ log_sys_debug("close", ll->res);
- free(ll->res);
- free(llh);
+ ll->lf = -1;
if (file)
return 1;
@@ -154,8 +183,6 @@ static int _do_flock(const char *file, int *fd, int operation, uint32_t nonblock
return_0;
}
-#define AUX_LOCK_SUFFIX ":aux"
-
static int _do_write_priority_flock(const char *file, int *fd, int operation, uint32_t nonblock)
{
int r, fd_aux = -1;
@@ -167,9 +194,11 @@ static int _do_write_priority_flock(const char *file, int *fd, int operation, ui
if ((r = _do_flock(file_aux, &fd_aux, LOCK_EX, 0))) {
if (operation == LOCK_EX) {
r = _do_flock(file, fd, operation, nonblock);
- _undo_flock(file_aux, fd_aux);
+ if (close(fd_aux) < 0)
+ log_sys_debug("close", file_aux);
} else {
- _undo_flock(file_aux, fd_aux);
+ if (close(fd_aux) < 0)
+ log_sys_debug("close", file_aux);
r = _do_flock(file, fd, operation, nonblock);
}
}
@@ -183,6 +212,7 @@ int lock_file(const char *file, uint32_t flags)
uint32_t nonblock = flags & LCK_NONBLOCK;
uint32_t convert = flags & LCK_CONVERT;
int r;
+ int ex = 0;
struct lock_list *ll;
char state;
@@ -194,6 +224,7 @@ int lock_file(const char *file, uint32_t flags)
case LCK_WRITE:
operation = LOCK_EX;
state = 'W';
+ ex = 1;
break;
case LCK_UNLOCK:
return _release_lock(file, 1);
@@ -210,21 +241,26 @@ int lock_file(const char *file, uint32_t flags)
log_very_verbose("Locking %s %c%c convert", ll->res, state,
nonblock ? ' ' : 'B');
r = flock(ll->lf, operation);
- if (!r)
+ if (!r) {
+ ll->ex = ex;
return 1;
+ }
log_error("Failed to convert flock on %s %d", file, errno);
return 0;
}
- if (!(ll = malloc(sizeof(struct lock_list))))
- return_0;
+ if (!(ll = _get_lock_list_entry(file))) {
+ if (!(ll = zalloc(sizeof(struct lock_list))))
+ return_0;
- if (!(ll->res = strdup(file))) {
- free(ll);
- return_0;
- }
+ if (!(ll->res = strdup(file))) {
+ free(ll);
+ return_0;
+ }
- ll->lf = -1;
+ ll->lf = -1;
+ dm_list_add(&_lock_list, &ll->list);
+ }
log_very_verbose("Locking %s %c%c", ll->res, state,
nonblock ? ' ' : 'B');
@@ -237,12 +273,9 @@ int lock_file(const char *file, uint32_t flags)
(void) dm_prepare_selinux_context(NULL, 0);
if (r)
- dm_list_add(&_lock_list, &ll->list);
- else {
- free(ll->res);
- free(ll);
+ ll->ex = ex;
+ else
stack;
- }
return r;
}
@@ -254,3 +287,93 @@ void init_flock(struct cmd_context *cmd)
_prioritise_write_locks =
find_config_tree_bool(cmd, global_prioritise_write_locks_CFG, NULL);
}
+
+void free_flocks(void)
+{
+ struct lock_list *ll, *ll2;
+
+ dm_list_iterate_items_safe(ll, ll2, &_lock_list) {
+ dm_list_del(&ll->list);
+ free(ll->res);
+ free(ll);
+ }
+}
+
+/*
+ * Save the lock file timestamps prior to scanning, so that the timestamps can
+ * be checked later (lock_file_time_unchanged) to see if the VG has been
+ * changed.
+ */
+
+void lock_file_time_init(const char *file)
+{
+ struct lock_list *ll;
+ struct stat buf;
+
+ if (stat(file, &buf) < 0)
+ return;
+
+ if (!(ll = _get_lock_list_entry(file))) {
+ if (!(ll = zalloc(sizeof(struct lock_list))))
+ return;
+
+ if (!(ll->res = strdup(file))) {
+ free(ll);
+ return;
+ }
+
+ ll->lf = -1;
+ ll->save_time = buf.st_mtim;
+ dm_list_add(&_lock_list, &ll->list);
+ }
+}
+
+/*
+ * Check if a lock file timestamp has been changed (by other command) since we
+ * saved it (lock_file_time_init). Another command may have updated the lock
+ * file timestamp when releasing an ex flock (futimens above.)
+ */
+
+bool lock_file_time_unchanged(const char *file)
+{
+ struct lock_list *ll;
+ struct stat buf;
+ struct timespec *prev, *now;
+
+ if (stat(file, &buf) < 0) {
+ log_debug("lock_file_time_unchanged no file %s", file);
+ return false;
+ }
+
+ if (!(ll = _get_lock_list_entry(file))) {
+ log_debug("lock_file_time_unchanged no list item %s", file);
+ return false;
+ }
+
+ prev = &ll->save_time;
+ now = &buf.st_mtim;
+
+ if ((now->tv_sec == prev->tv_sec) && (now->tv_nsec == prev->tv_nsec)) {
+ log_debug("lock file %s unchanged from %llu.%llu", file,
+ (unsigned long long)prev->tv_sec,
+ (unsigned long long)prev->tv_nsec);
+ return true;
+ }
+
+ log_debug("lock file %s changed from %llu.%llu to %llu.%llu", file,
+ (unsigned long long)prev->tv_sec,
+ (unsigned long long)prev->tv_nsec,
+ (unsigned long long)now->tv_sec,
+ (unsigned long long)now->tv_nsec);
+
+ return false;
+}
+
+void lock_file_remove_on_unlock(const char *file)
+{
+ struct lock_list *ll;
+
+ if ((ll = _get_lock_list_entry(file)))
+ ll->remove_on_unlock = 1;
+}
+
diff --git a/lib/misc/lvm-flock.h b/lib/misc/lvm-flock.h
index a6c7e367b..3525cf476 100644
--- a/lib/misc/lvm-flock.h
+++ b/lib/misc/lvm-flock.h
@@ -16,7 +16,12 @@
#define _LVM_FLOCK_H
void init_flock(struct cmd_context *cmd);
+void free_flocks(void);
int lock_file(const char *file, uint32_t flags);
void release_flocks(int unlock);
+void lock_file_time_init(const char *file);
+bool lock_file_time_unchanged(const char *file);
+void lock_file_remove_on_unlock(const char *file);
+
#endif /* _LVM_FLOCK_H */
diff --git a/tools/commands.h b/tools/commands.h
index 77cf1fac9..c1670ae66 100644
--- a/tools/commands.h
+++ b/tools/commands.h
@@ -99,7 +99,7 @@ xx(lvresize,
xx(lvs,
"Display information about logical volumes",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | ALLOW_HINTS)
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | CAN_USE_ONE_SCAN | ALLOW_HINTS)
xx(lvscan,
"List all logical volumes in all volume groups",
diff --git a/tools/vgremove.c b/tools/vgremove.c
index 8f73297dc..54006c8ec 100644
--- a/tools/vgremove.c
+++ b/tools/vgremove.c
@@ -78,6 +78,8 @@ static int _vgremove_single(struct cmd_context *cmd, const char *vg_name,
lockd_free_vg_final(cmd, vg);
+ file_lock_remove_on_unlock(cmd, vg->name);
+
return ECMD_PROCESSED;
}