summaryrefslogtreecommitdiff
path: root/lib/cache/lvmcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/cache/lvmcache.c')
-rw-r--r--lib/cache/lvmcache.c425
1 files changed, 389 insertions, 36 deletions
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
index 0ffa604df..99e36eed5 100644
--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -39,12 +39,19 @@ struct lvmcache_info {
uint32_t ext_version; /* Extension version */
uint32_t ext_flags; /* Extension flags */
uint32_t status;
+ int summary_seqno; /* vg seqno found on this dev during scan */
+ int mda1_seqno;
+ int mda2_seqno;
+ unsigned summary_seqno_mismatch:1; /* two mdas on this dev has mismatching metadata */
+ unsigned mda1_bad:1; /* label scan found bad metadata in mda1 */
+ unsigned mda2_bad:1; /* label scan found bad metadata in mda2 */
};
/* One per VG */
struct lvmcache_vginfo {
struct dm_list list; /* Join these vginfos together */
struct dm_list infos; /* List head for lvmcache_infos */
+ struct dm_list outdated_infos; /* vg_read moves info from infos to outdated_infos */
const struct format_type *fmt;
char *vgname; /* "" == orphan */
uint32_t status;
@@ -175,6 +182,33 @@ static void _destroy_duplicate_device_list(struct dm_list *head)
dm_list_init(head);
}
+int lvmcache_has_bad_metadata(struct device *dev)
+{
+ struct lvmcache_info *info;
+
+ if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) {
+ /* shouldn't happen */
+ log_error("No lvmcache info for checking bad metadata on %s", dev_name(dev));
+ return 0;
+ }
+
+ if (info->mda1_bad || info->mda2_bad)
+ return 1;
+ return 0;
+}
+
+/*
+ * "bad" metadata cannot be used/processed by lvm, e.g.
+ * it has a bad checksum, invalid/unrecognizable content.
+ */
+void lvmcache_set_bad_metadata(struct lvmcache_info *info, int mda1_bad, int mda2_bad)
+{
+ if (mda1_bad)
+ info->mda1_bad = 1;
+ if (mda2_bad)
+ info->mda2_bad = 1;
+}
+
static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo,
struct lvmcache_info *info)
{
@@ -1343,6 +1377,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info,
return 0;
}
dm_list_init(&vginfo->infos);
+ dm_list_init(&vginfo->outdated_infos);
/*
* A different VG (different uuid) can exist with the same name.
@@ -1467,12 +1502,9 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt)
}
/*
- * FIXME: get rid of other callers of this function which call it
- * in odd cases to "fix up" some bit of lvmcache state. Make those
- * callers fix up what they need to directly, and leave this function
- * with one purpose and caller.
+ * Returning 0 causes the caller to remove the info struct for this
+ * device from lvmcache, which will make it look like a missing device.
*/
-
int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary)
{
const char *vgname = vgsummary->vgname;
@@ -1498,6 +1530,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg
* Puts the vginfo into the vgname hash table.
*/
if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt)) {
+ /* shouldn't happen, internal error */
log_error("Failed to update VG %s info in lvmcache.", vgname);
return 0;
}
@@ -1506,6 +1539,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg
* Puts the vginfo into the vgid hash table.
*/
if (!_lvmcache_update_vgid(info, info->vginfo, vgid)) {
+ /* shouldn't happen, internal error */
log_error("Failed to update VG %s info in lvmcache.", vgname);
return 0;
}
@@ -1521,56 +1555,140 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg
if (!vgsummary->seqno && !vgsummary->mda_size && !vgsummary->mda_checksum)
return 1;
+ /*
+ * Keep track of which devs/mdas have old versions of the metadata.
+ * The values we keep in vginfo are from the metadata with the largest
+ * seqno. One dev may have more recent metadata than another dev, and
+ * one mda may have more recent metadata than the other mda on the same
+ * device.
+ *
+ * When a device holds old metadata, the info struct for the device
+ * remains in lvmcache, so the device is not treated as missing.
+ * Also the mda struct containing the old metadata is kept on
+ * info->mdas. This means that vg_read will read metadata from
+ * the mda again (and probably see the same old metadata). It
+ * also means that vg_write will use the mda to write new metadata
+ * into the mda that currently has the old metadata.
+ */
+ if (vgsummary->mda_num == 1)
+ info->mda1_seqno = vgsummary->seqno;
+ else if (vgsummary->mda_num == 2)
+ info->mda2_seqno = vgsummary->seqno;
+
+ if (!info->summary_seqno)
+ info->summary_seqno = vgsummary->seqno;
+ else {
+ if (info->summary_seqno == vgsummary->seqno) {
+ /* This mda has the same metadata as the prev mda on this dev. */
+ return 1;
+
+ } else if (info->summary_seqno > vgsummary->seqno) {
+ /* This mda has older metadata than the prev mda on this dev. */
+ info->summary_seqno_mismatch = 1;
+
+ } else if (info->summary_seqno < vgsummary->seqno) {
+ /* This mda has newer metadata than the prev mda on this dev. */
+ info->summary_seqno_mismatch = 1;
+ info->summary_seqno = vgsummary->seqno;
+ }
+ }
+
+ /* this shouldn't happen */
if (!(vginfo = info->vginfo))
return 1;
if (!vginfo->seqno) {
vginfo->seqno = vgsummary->seqno;
+ vginfo->mda_checksum = vgsummary->mda_checksum;
+ vginfo->mda_size = vgsummary->mda_size;
- log_debug_cache("lvmcache %s: VG %s: set seqno to %d",
- dev_name(info->dev), vginfo->vgname, vginfo->seqno);
+ log_debug_cache("lvmcache %s mda%d VG %s set seqno %u checksum %x mda_size %zu",
+ dev_name(info->dev), vgsummary->mda_num, vgname,
+ vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size);
+ goto update_vginfo;
- } else if (vgsummary->seqno != vginfo->seqno) {
- log_warn("Scan of VG %s from %s found metadata seqno %d vs previous %d.",
- vgname, dev_name(info->dev), vgsummary->seqno, vginfo->seqno);
+ } else if (vgsummary->seqno < vginfo->seqno) {
vginfo->scan_summary_mismatch = 1;
- /* If we don't return success, this dev info will be removed from lvmcache,
- and then we won't be able to rescan it or repair it. */
+
+ log_debug_cache("lvmcache %s mda%d VG %s older seqno %u checksum %x mda_size %zu",
+ dev_name(info->dev), vgsummary->mda_num, vgname,
+ vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size);
return 1;
- }
- if (!vginfo->mda_size) {
+ } else if (vgsummary->seqno > vginfo->seqno) {
+ vginfo->scan_summary_mismatch = 1;
+
+ /* Replace vginfo values with values from newer metadata. */
+ vginfo->seqno = vgsummary->seqno;
vginfo->mda_checksum = vgsummary->mda_checksum;
vginfo->mda_size = vgsummary->mda_size;
- log_debug_cache("lvmcache %s: VG %s: set mda_checksum to %x mda_size to %zu",
- dev_name(info->dev), vginfo->vgname,
- vginfo->mda_checksum, vginfo->mda_size);
+ log_debug_cache("lvmcache %s mda%d VG %s newer seqno %u checksum %x mda_size %zu",
+ dev_name(info->dev), vgsummary->mda_num, vgname,
+ vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size);
- } else if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) {
- log_warn("Scan of VG %s from %s found mda_checksum %x mda_size %zu vs previous %x %zu",
- vgname, dev_name(info->dev), vgsummary->mda_checksum, vgsummary->mda_size,
- vginfo->mda_checksum, vginfo->mda_size);
- vginfo->scan_summary_mismatch = 1;
- /* If we don't return success, this dev info will be removed from lvmcache,
- and then we won't be able to rescan it or repair it. */
+ goto update_vginfo;
+ } else {
+ /*
+ * Same seqno as previous metadata we saw for this VG.
+ * If the metadata somehow has a different checksum or size,
+ * even though it has the same seqno, something has gone wrong.
+ * FIXME: test this case: VG has two PVs, first goes missing,
+ * second updated to seqno 4, first comes back and second goes
+ * missing, first updated to seqno 4, second comes back, now
+ * both are present with same seqno but different checksums.
+ */
+
+ if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) {
+ log_warn("WARNING: scan of VG %s from %s mda%d found mda_checksum %x mda_size %zu vs %x %zu",
+ vgname, dev_name(info->dev), vgsummary->mda_num,
+ vgsummary->mda_checksum, vgsummary->mda_size,
+ vginfo->mda_checksum, vginfo->mda_size);
+ vginfo->scan_summary_mismatch = 1;
+ return 0;
+ }
+
+ /*
+ * The seqno and checksum matches what was previously seen;
+ * the summary values have already been saved in vginfo.
+ */
return 1;
}
- /*
- * If a dev has an unmatching checksum, ignore the other
- * info from it, keeping the info we already saved.
- */
+ update_vginfo:
if (!_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host,
vgsummary->lock_type, vgsummary->system_id)) {
+ /*
+ * This shouldn't happen, it's an internal errror, and we can leave
+ * the info in place without saving the summary values in vginfo.
+ */
log_error("Failed to update VG %s info in lvmcache.", vgname);
- return 0;
}
return 1;
}
-int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
+/*
+ * FIXME: quit trying to mirror changes that a command is making into lvmcache.
+ *
+ * First, it's complicated and hard to ensure it's done correctly in every case
+ * (it would be much easier and safer to just toss out what's in lvmcache and
+ * reread the info to recreate it from scratch instead of trying to make sure
+ * every possible discrete state change is correct.)
+ *
+ * Second, it's unnecessary if commands just use the vg they are modifying
+ * rather than also trying to get info from lvmcache. The lvmcache state
+ * should be populated by label_scan, used to perform vg_read's, and then
+ * ignored (or dropped so it can't be used).
+ *
+ * lvmcache info is already used very little after a command begins its
+ * operation. The code that's supposed to keep the lvmcache in sync with
+ * changes being made to disk could be half wrong and we wouldn't know it.
+ * That creates a landmine for someone who might try to use a bit of it that
+ * isn't being updated correctly.
+ */
+
+int lvmcache_update_vg_from_write(struct volume_group *vg)
{
struct pv_list *pvl;
struct lvmcache_info *info;
@@ -1595,6 +1713,110 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
}
/*
+ * The lvmcache representation of a VG after label_scan can be incorrect
+ * because the label_scan does not use the full VG metadata to construct
+ * vginfo/info. PVs that don't hold VG metadata weren't attached to the vginfo
+ * during label scan, and PVs with outdated metadata (claiming to be in the VG,
+ * but not listed in the latest metadata) were attached to the vginfo, but
+ * shouldn't be. After vg_read() gets the full metdata in the form of a 'vg',
+ * this function is called to fix up the lvmcache representation of the VG
+ * using the 'vg'.
+ */
+
+int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted)
+{
+ struct pv_list *pvl;
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info, *info2;
+ struct metadata_area *mda;
+ char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+ struct lvmcache_vgsummary vgsummary = {
+ .vgname = vg->name,
+ .vgstatus = vg->status,
+ .vgid = vg->id,
+ .system_id = vg->system_id,
+ .lock_type = vg->lock_type
+ };
+
+ if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, (const char *)&vg->id))) {
+ log_error(INTERNAL_ERROR "lvmcache_update_vg %s no vginfo", vg->name);
+ return 0;
+ }
+
+ /*
+ * The label scan doesn't know when a PV with old metadata has been
+ * removed from the VG. Now with the vg we can tell, so remove the
+ * info for a PV that has been removed from the VG with
+ * vgreduce --removemissing.
+ */
+ dm_list_iterate_items_safe(info, info2, &vginfo->infos) {
+ int found = 0;
+ dm_list_iterate_items(pvl, &vg->pvs) {
+ if (pvl->pv->dev != info->dev)
+ continue;
+ found = 1;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ log_warn("WARNING: outdated PV %s seqno %u has been removed in current VG %s seqno %u.",
+ dev_name(info->dev), info->summary_seqno, vg->name, vginfo->seqno);
+
+ _drop_vginfo(info, vginfo); /* remove from vginfo->infos */
+ dm_list_add(&vginfo->outdated_infos, &info->list);
+ }
+
+ dm_list_iterate_items(pvl, &vg->pvs) {
+ (void) dm_strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s));
+
+ if (!(info = lvmcache_info_from_pvid(pvid_s, pvl->pv->dev, 0))) {
+ log_debug_cache("lvmcache_update_vg %s no info for %s %s",
+ vg->name,
+ (char *) &pvl->pv->id,
+ pvl->pv->dev ? dev_name(pvl->pv->dev) : "missing");
+ continue;
+ }
+
+ log_debug_cache("lvmcache_update_vg %s for info %s",
+ vg->name, dev_name(info->dev));
+
+ /*
+ * FIXME: use a different function that just attaches info's that
+ * had no metadata onto the correct vginfo.
+ *
+ * info's for PVs without metadata were not connected to the
+ * vginfo by label_scan, so do it here.
+ */
+ if (!lvmcache_update_vgname_and_id(info, &vgsummary)) {
+ log_debug_cache("lvmcache_update_vg %s failed to update info for %s",
+ vg->name, dev_name(info->dev));
+ }
+
+ /*
+ * Ignored mdas were not copied from info->mdas to
+ * fid->metadata_areas... when create_text_instance (at the
+ * start of vg_read) called lvmcache_fid_add_mdas_vg because at
+ * that point the info's were not connected to the vginfo
+ * (since label_scan didn't know this without metadata.)
+ */
+ dm_list_iterate_items(mda, &info->mdas) {
+ if (!mda_is_ignored(mda))
+ continue;
+ log_debug("lvmcache_update_vg %s copy ignored mdas for %s", vg->name, dev_name(info->dev));
+ if (!lvmcache_fid_add_mdas_pv(info, vg->fid)) {
+ log_debug_cache("lvmcache_update_vg %s failed to update mdas for %s",
+ vg->name, dev_name(info->dev));
+ }
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
* We can see multiple different devices with the
* same pvid, i.e. duplicates.
*
@@ -1645,7 +1867,7 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
* transient duplicate?
*/
-static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev)
+static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev, uint64_t label_sector)
{
struct lvmcache_info *info;
struct label *label;
@@ -1658,6 +1880,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev
return NULL;
}
+ label->dev = dev;
+ label->sector = label_sector;
+
info->dev = dev;
info->fmt = labeller->fmt;
@@ -1673,8 +1898,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev
}
struct lvmcache_info *lvmcache_add(struct labeller *labeller,
- const char *pvid, struct device *dev,
- const char *vgname, const char *vgid, uint32_t vgstatus)
+ const char *pvid, struct device *dev, uint64_t label_sector,
+ const char *vgname, const char *vgid, uint32_t vgstatus,
+ int *is_duplicate)
{
char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
char uuid[64] __attribute__((aligned(8)));
@@ -1702,7 +1928,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller,
info = lvmcache_info_from_pvid(dev->pvid, NULL, 0);
if (!info) {
- info = _create_info(labeller, dev);
+ info = _create_info(labeller, dev, label_sector);
created = 1;
}
@@ -1734,6 +1960,8 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller,
dm_list_add(&_found_duplicate_devs, &devl->list);
_found_duplicate_pvs = 1;
+ if (is_duplicate)
+ *is_duplicate = 1;
return NULL;
}
@@ -1877,6 +2105,14 @@ int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance
return lvmcache_fid_add_mdas(info, fid, info->dev->pvid, ID_LEN);
}
+/*
+ * This is the linkage where information is passed from
+ * the label_scan to vg_read.
+ *
+ * Called by create_text_instance in vg_read to copy the
+ * mda's found during label_scan and saved in info->mdas,
+ * to fid->metadata_areas_in_use which is used by vg_read.
+ */
int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_instance *fid)
{
struct lvmcache_info *info;
@@ -1967,9 +2203,10 @@ void lvmcache_del_bas(struct lvmcache_info *info)
}
int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev,
- uint64_t start, uint64_t size, unsigned ignored)
+ uint64_t start, uint64_t size, unsigned ignored,
+ struct metadata_area **mda_new)
{
- return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored);
+ return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored, mda_new);
}
int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size)
@@ -2296,3 +2533,119 @@ int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid)
}
return 0;
}
+
+/*
+ * This is used by the metadata repair command to check if
+ * the metadata on a dev needs repair because it's old.
+ */
+int lvmcache_has_old_metadata(struct cmd_context *cmd, const char *vgname, const char *vgid, struct device *dev)
+{
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info;
+
+ /* shouldn't happen */
+ if (!vgname || !vgid)
+ return 0;
+
+ /* shouldn't happen */
+ if (!(vginfo = lvmcache_vginfo_from_vgid(vgid)))
+ return 0;
+
+ /* shouldn't happen */
+ if (!(info = lvmcache_info_from_pvid(dev->pvid, NULL, 0)))
+ return 0;
+
+ /* writing to a new PV */
+ if (!info->summary_seqno)
+ return 0;
+
+ /* on same dev, one mda has newer metadata than the other */
+ if (info->summary_seqno_mismatch)
+ return 1;
+
+ /* one or both mdas on this dev has older metadata than another dev */
+ if (vginfo->seqno > info->summary_seqno)
+ return 1;
+
+ return 0;
+}
+
+void lvmcache_get_outdated_devs(struct cmd_context *cmd,
+ const char *vgname, const char *vgid,
+ struct dm_list *devs)
+{
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info;
+ struct device_list *devl;
+
+ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
+ log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo %s", vgname);
+ return;
+ }
+
+ dm_list_iterate_items(info, &vginfo->outdated_infos) {
+ if (!(devl = zalloc(sizeof(*devl))))
+ return;
+ devl->dev = info->dev;
+ dm_list_add(devs, &devl->list);
+ }
+}
+
+void lvmcache_del_outdated_devs(struct cmd_context *cmd,
+ const char *vgname, const char *vgid)
+{
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info, *info2;
+
+ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
+ log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo");
+ return;
+ }
+
+ dm_list_iterate_items_safe(info, info2, &vginfo->outdated_infos)
+ lvmcache_del(info);
+}
+
+void lvmcache_get_outdated_mdas(struct cmd_context *cmd,
+ const char *vgname, const char *vgid,
+ struct device *dev,
+ struct dm_list **mdas)
+{
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info;
+
+ *mdas = NULL;
+
+ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
+ log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo");
+ return;
+ }
+
+ dm_list_iterate_items(info, &vginfo->outdated_infos) {
+ if (info->dev != dev)
+ continue;
+ *mdas = &info->mdas;
+ return;
+ }
+}
+
+int lvmcache_is_outdated_dev(struct cmd_context *cmd,
+ const char *vgname, const char *vgid,
+ struct device *dev)
+{
+ struct lvmcache_vginfo *vginfo;
+ struct lvmcache_info *info;
+
+ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
+ log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo");
+ return 0;
+ }
+
+ dm_list_iterate_items(info, &vginfo->outdated_infos) {
+ if (info->dev == dev)
+ return 1;
+ }
+
+ return 0;
+}
+