diff options
37 files changed, 1938 insertions, 1630 deletions
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 0ffa604df..8c3f30727 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -31,6 +31,7 @@ struct lvmcache_info { struct dm_list mdas; /* list head for metadata areas */ struct dm_list das; /* list head for data areas */ struct dm_list bas; /* list head for bootloader areas */ + struct dm_list bad_mdas;/* list head for bad metadata areas */ struct lvmcache_vginfo *vginfo; /* NULL == unknown */ struct label *label; const struct format_type *fmt; @@ -39,12 +40,19 @@ struct lvmcache_info { uint32_t ext_version; /* Extension version */ uint32_t ext_flags; /* Extension flags */ uint32_t status; + int summary_seqno; /* vg seqno found on this dev during scan */ + int mda1_seqno; + int mda2_seqno; + unsigned summary_seqno_mismatch:1; /* two mdas on this dev has mismatching metadata */ + unsigned mda1_bad:1; /* label scan found bad metadata in mda1 */ + unsigned mda2_bad:1; /* label scan found bad metadata in mda2 */ }; /* One per VG */ struct lvmcache_vginfo { struct dm_list list; /* Join these vginfos together */ struct dm_list infos; /* List head for lvmcache_infos */ + struct dm_list outdated_infos; /* vg_read moves info from infos to outdated_infos */ const struct format_type *fmt; char *vgname; /* "" == orphan */ uint32_t status; @@ -175,6 +183,51 @@ static void _destroy_duplicate_device_list(struct dm_list *head) dm_list_init(head); } +int lvmcache_has_bad_metadata(struct device *dev) +{ + struct lvmcache_info *info; + + if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { + /* shouldn't happen */ + log_error("No lvmcache info for checking bad metadata on %s", dev_name(dev)); + return 0; + } + + if (info->mda1_bad || info->mda2_bad) + return 1; + return 0; +} + +void lvmcache_save_bad_mda(struct lvmcache_info *info, struct metadata_area *mda) +{ + if (mda->mda_num == 1) + info->mda1_bad = 1; + else if (mda->mda_num == 2) + info->mda2_bad = 1; + dm_list_add(&info->bad_mdas, &mda->list); +} + +void lvmcache_get_bad_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *bad_mdas) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + struct metadata_area *mda, *mda2; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_bad_mdas no vginfo %s", vgname); + return; + } + + dm_list_iterate_items(info, &vginfo->infos) { + dm_list_iterate_items_safe(mda, mda2, &info->bad_mdas) { + dm_list_del(&mda->list); + dm_list_add(bad_mdas, &mda->list); + } + } +} + static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo, struct lvmcache_info *info) { @@ -1343,6 +1396,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info, return 0; } dm_list_init(&vginfo->infos); + dm_list_init(&vginfo->outdated_infos); /* * A different VG (different uuid) can exist with the same name. @@ -1467,12 +1521,9 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) } /* - * FIXME: get rid of other callers of this function which call it - * in odd cases to "fix up" some bit of lvmcache state. Make those - * callers fix up what they need to directly, and leave this function - * with one purpose and caller. + * Returning 0 causes the caller to remove the info struct for this + * device from lvmcache, which will make it look like a missing device. */ - int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { const char *vgname = vgsummary->vgname; @@ -1498,6 +1549,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg * Puts the vginfo into the vgname hash table. */ if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt)) { + /* shouldn't happen, internal error */ log_error("Failed to update VG %s info in lvmcache.", vgname); return 0; } @@ -1506,6 +1558,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg * Puts the vginfo into the vgid hash table. */ if (!_lvmcache_update_vgid(info, info->vginfo, vgid)) { + /* shouldn't happen, internal error */ log_error("Failed to update VG %s info in lvmcache.", vgname); return 0; } @@ -1521,56 +1574,140 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg if (!vgsummary->seqno && !vgsummary->mda_size && !vgsummary->mda_checksum) return 1; + /* + * Keep track of which devs/mdas have old versions of the metadata. + * The values we keep in vginfo are from the metadata with the largest + * seqno. One dev may have more recent metadata than another dev, and + * one mda may have more recent metadata than the other mda on the same + * device. + * + * When a device holds old metadata, the info struct for the device + * remains in lvmcache, so the device is not treated as missing. + * Also the mda struct containing the old metadata is kept on + * info->mdas. This means that vg_read will read metadata from + * the mda again (and probably see the same old metadata). It + * also means that vg_write will use the mda to write new metadata + * into the mda that currently has the old metadata. + */ + if (vgsummary->mda_num == 1) + info->mda1_seqno = vgsummary->seqno; + else if (vgsummary->mda_num == 2) + info->mda2_seqno = vgsummary->seqno; + + if (!info->summary_seqno) + info->summary_seqno = vgsummary->seqno; + else { + if (info->summary_seqno == vgsummary->seqno) { + /* This mda has the same metadata as the prev mda on this dev. */ + return 1; + + } else if (info->summary_seqno > vgsummary->seqno) { + /* This mda has older metadata than the prev mda on this dev. */ + info->summary_seqno_mismatch = 1; + + } else if (info->summary_seqno < vgsummary->seqno) { + /* This mda has newer metadata than the prev mda on this dev. */ + info->summary_seqno_mismatch = 1; + info->summary_seqno = vgsummary->seqno; + } + } + + /* this shouldn't happen */ if (!(vginfo = info->vginfo)) return 1; if (!vginfo->seqno) { vginfo->seqno = vgsummary->seqno; + vginfo->mda_checksum = vgsummary->mda_checksum; + vginfo->mda_size = vgsummary->mda_size; - log_debug_cache("lvmcache %s: VG %s: set seqno to %d", - dev_name(info->dev), vginfo->vgname, vginfo->seqno); + log_debug_cache("lvmcache %s mda%d VG %s set seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); + goto update_vginfo; - } else if (vgsummary->seqno != vginfo->seqno) { - log_warn("Scan of VG %s from %s found metadata seqno %d vs previous %d.", - vgname, dev_name(info->dev), vgsummary->seqno, vginfo->seqno); + } else if (vgsummary->seqno < vginfo->seqno) { vginfo->scan_summary_mismatch = 1; - /* If we don't return success, this dev info will be removed from lvmcache, - and then we won't be able to rescan it or repair it. */ + + log_debug_cache("lvmcache %s mda%d VG %s older seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); return 1; - } - if (!vginfo->mda_size) { + } else if (vgsummary->seqno > vginfo->seqno) { + vginfo->scan_summary_mismatch = 1; + + /* Replace vginfo values with values from newer metadata. */ + vginfo->seqno = vgsummary->seqno; vginfo->mda_checksum = vgsummary->mda_checksum; vginfo->mda_size = vgsummary->mda_size; - log_debug_cache("lvmcache %s: VG %s: set mda_checksum to %x mda_size to %zu", - dev_name(info->dev), vginfo->vgname, - vginfo->mda_checksum, vginfo->mda_size); + log_debug_cache("lvmcache %s mda%d VG %s newer seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); - } else if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { - log_warn("Scan of VG %s from %s found mda_checksum %x mda_size %zu vs previous %x %zu", - vgname, dev_name(info->dev), vgsummary->mda_checksum, vgsummary->mda_size, - vginfo->mda_checksum, vginfo->mda_size); - vginfo->scan_summary_mismatch = 1; - /* If we don't return success, this dev info will be removed from lvmcache, - and then we won't be able to rescan it or repair it. */ + goto update_vginfo; + } else { + /* + * Same seqno as previous metadata we saw for this VG. + * If the metadata somehow has a different checksum or size, + * even though it has the same seqno, something has gone wrong. + * FIXME: test this case: VG has two PVs, first goes missing, + * second updated to seqno 4, first comes back and second goes + * missing, first updated to seqno 4, second comes back, now + * both are present with same seqno but different checksums. + */ + + if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { + log_warn("WARNING: scan of VG %s from %s mda%d found mda_checksum %x mda_size %zu vs %x %zu", + vgname, dev_name(info->dev), vgsummary->mda_num, + vgsummary->mda_checksum, vgsummary->mda_size, + vginfo->mda_checksum, vginfo->mda_size); + vginfo->scan_summary_mismatch = 1; + return 0; + } + + /* + * The seqno and checksum matches what was previously seen; + * the summary values have already been saved in vginfo. + */ return 1; } - /* - * If a dev has an unmatching checksum, ignore the other - * info from it, keeping the info we already saved. - */ + update_vginfo: if (!_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id)) { + /* + * This shouldn't happen, it's an internal errror, and we can leave + * the info in place without saving the summary values in vginfo. + */ log_error("Failed to update VG %s info in lvmcache.", vgname); - return 0; } return 1; } -int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) +/* + * FIXME: quit trying to mirror changes that a command is making into lvmcache. + * + * First, it's complicated and hard to ensure it's done correctly in every case + * (it would be much easier and safer to just toss out what's in lvmcache and + * reread the info to recreate it from scratch instead of trying to make sure + * every possible discrete state change is correct.) + * + * Second, it's unnecessary if commands just use the vg they are modifying + * rather than also trying to get info from lvmcache. The lvmcache state + * should be populated by label_scan, used to perform vg_read's, and then + * ignored (or dropped so it can't be used). + * + * lvmcache info is already used very little after a command begins its + * operation. The code that's supposed to keep the lvmcache in sync with + * changes being made to disk could be half wrong and we wouldn't know it. + * That creates a landmine for someone who might try to use a bit of it that + * isn't being updated correctly. + */ + +int lvmcache_update_vg_from_write(struct volume_group *vg) { struct pv_list *pvl; struct lvmcache_info *info; @@ -1595,6 +1732,110 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) } /* + * The lvmcache representation of a VG after label_scan can be incorrect + * because the label_scan does not use the full VG metadata to construct + * vginfo/info. PVs that don't hold VG metadata weren't attached to the vginfo + * during label scan, and PVs with outdated metadata (claiming to be in the VG, + * but not listed in the latest metadata) were attached to the vginfo, but + * shouldn't be. After vg_read() gets the full metdata in the form of a 'vg', + * this function is called to fix up the lvmcache representation of the VG + * using the 'vg'. + */ + +int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted) +{ + struct pv_list *pvl; + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info, *info2; + struct metadata_area *mda; + char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); + struct lvmcache_vgsummary vgsummary = { + .vgname = vg->name, + .vgstatus = vg->status, + .vgid = vg->id, + .system_id = vg->system_id, + .lock_type = vg->lock_type + }; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, (const char *)&vg->id))) { + log_error(INTERNAL_ERROR "lvmcache_update_vg %s no vginfo", vg->name); + return 0; + } + + /* + * The label scan doesn't know when a PV with old metadata has been + * removed from the VG. Now with the vg we can tell, so remove the + * info for a PV that has been removed from the VG with + * vgreduce --removemissing. + */ + dm_list_iterate_items_safe(info, info2, &vginfo->infos) { + int found = 0; + dm_list_iterate_items(pvl, &vg->pvs) { + if (pvl->pv->dev != info->dev) + continue; + found = 1; + break; + } + + if (found) + continue; + + log_warn("WARNING: outdated PV %s seqno %u has been removed in current VG %s seqno %u.", + dev_name(info->dev), info->summary_seqno, vg->name, vginfo->seqno); + + _drop_vginfo(info, vginfo); /* remove from vginfo->infos */ + dm_list_add(&vginfo->outdated_infos, &info->list); + } + + dm_list_iterate_items(pvl, &vg->pvs) { + (void) dm_strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s)); + + if (!(info = lvmcache_info_from_pvid(pvid_s, pvl->pv->dev, 0))) { + log_debug_cache("lvmcache_update_vg %s no info for %s %s", + vg->name, + (char *) &pvl->pv->id, + pvl->pv->dev ? dev_name(pvl->pv->dev) : "missing"); + continue; + } + + log_debug_cache("lvmcache_update_vg %s for info %s", + vg->name, dev_name(info->dev)); + + /* + * FIXME: use a different function that just attaches info's that + * had no metadata onto the correct vginfo. + * + * info's for PVs without metadata were not connected to the + * vginfo by label_scan, so do it here. + */ + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + log_debug_cache("lvmcache_update_vg %s failed to update info for %s", + vg->name, dev_name(info->dev)); + } + + /* + * Ignored mdas were not copied from info->mdas to + * fid->metadata_areas... when create_text_instance (at the + * start of vg_read) called lvmcache_fid_add_mdas_vg because at + * that point the info's were not connected to the vginfo + * (since label_scan didn't know this without metadata.) + */ + dm_list_iterate_items(mda, &info->mdas) { + if (!mda_is_ignored(mda)) + continue; + log_debug("lvmcache_update_vg %s copy ignored mdas for %s", vg->name, dev_name(info->dev)); + if (!lvmcache_fid_add_mdas_pv(info, vg->fid)) { + log_debug_cache("lvmcache_update_vg %s failed to update mdas for %s", + vg->name, dev_name(info->dev)); + } + break; + } + } + + return 1; +} + +/* * We can see multiple different devices with the * same pvid, i.e. duplicates. * @@ -1645,7 +1886,7 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) * transient duplicate? */ -static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev) +static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev, uint64_t label_sector) { struct lvmcache_info *info; struct label *label; @@ -1658,6 +1899,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev return NULL; } + label->dev = dev; + label->sector = label_sector; + info->dev = dev; info->fmt = labeller->fmt; @@ -1673,8 +1917,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev } struct lvmcache_info *lvmcache_add(struct labeller *labeller, - const char *pvid, struct device *dev, - const char *vgname, const char *vgid, uint32_t vgstatus) + const char *pvid, struct device *dev, uint64_t label_sector, + const char *vgname, const char *vgid, uint32_t vgstatus, + int *is_duplicate) { char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); char uuid[64] __attribute__((aligned(8))); @@ -1702,7 +1947,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, info = lvmcache_info_from_pvid(dev->pvid, NULL, 0); if (!info) { - info = _create_info(labeller, dev); + info = _create_info(labeller, dev, label_sector); created = 1; } @@ -1734,6 +1979,8 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, dm_list_add(&_found_duplicate_devs, &devl->list); _found_duplicate_pvs = 1; + if (is_duplicate) + *is_duplicate = 1; return NULL; } @@ -1877,6 +2124,14 @@ int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance return lvmcache_fid_add_mdas(info, fid, info->dev->pvid, ID_LEN); } +/* + * This is the linkage where information is passed from + * the label_scan to vg_read. + * + * Called by create_text_instance in vg_read to copy the + * mda's found during label_scan and saved in info->mdas, + * to fid->metadata_areas_in_use which is used by vg_read. + */ int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_instance *fid) { struct lvmcache_info *info; @@ -1950,6 +2205,10 @@ void lvmcache_del_mdas(struct lvmcache_info *info) if (info->mdas.n) del_mdas(&info->mdas); dm_list_init(&info->mdas); + + if (info->bad_mdas.n) + del_mdas(&info->bad_mdas); + dm_list_init(&info->bad_mdas); } void lvmcache_del_das(struct lvmcache_info *info) @@ -1967,9 +2226,10 @@ void lvmcache_del_bas(struct lvmcache_info *info) } int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev, - uint64_t start, uint64_t size, unsigned ignored) + uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new) { - return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored); + return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored, mda_new); } int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size) @@ -2296,3 +2556,119 @@ int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid) } return 0; } + +/* + * This is used by the metadata repair command to check if + * the metadata on a dev needs repair because it's old. + */ +int lvmcache_has_old_metadata(struct cmd_context *cmd, const char *vgname, const char *vgid, struct device *dev) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + /* shouldn't happen */ + if (!vgname || !vgid) + return 0; + + /* shouldn't happen */ + if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) + return 0; + + /* shouldn't happen */ + if (!(info = lvmcache_info_from_pvid(dev->pvid, NULL, 0))) + return 0; + + /* writing to a new PV */ + if (!info->summary_seqno) + return 0; + + /* on same dev, one mda has newer metadata than the other */ + if (info->summary_seqno_mismatch) + return 1; + + /* one or both mdas on this dev has older metadata than another dev */ + if (vginfo->seqno > info->summary_seqno) + return 1; + + return 0; +} + +void lvmcache_get_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *devs) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + struct device_list *devl; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo %s", vgname); + return; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (!(devl = zalloc(sizeof(*devl)))) + return; + devl->dev = info->dev; + dm_list_add(devs, &devl->list); + } +} + +void lvmcache_del_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info, *info2; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo"); + return; + } + + dm_list_iterate_items_safe(info, info2, &vginfo->outdated_infos) + lvmcache_del(info); +} + +void lvmcache_get_outdated_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev, + struct dm_list **mdas) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + *mdas = NULL; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo"); + return; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (info->dev != dev) + continue; + *mdas = &info->mdas; + return; + } +} + +int lvmcache_is_outdated_dev(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo"); + return 0; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (info->dev == dev) + return 1; + } + + return 0; +} + diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 12f17dfa6..799190173 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -57,10 +57,12 @@ struct lvmcache_vgsummary { char *creation_host; const char *system_id; const char *lock_type; + uint32_t seqno; uint32_t mda_checksum; size_t mda_size; - int zero_offset; - int seqno; + int mda_num; /* 1 = summary from mda1, 2 = summary from mda2 */ + unsigned mda_ignored:1; + unsigned zero_offset:1; }; int lvmcache_init(struct cmd_context *cmd); @@ -72,9 +74,9 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const /* Add/delete a device */ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid, - struct device *dev, + struct device *dev, uint64_t label_sector, const char *vgname, const char *vgid, - uint32_t vgstatus); + uint32_t vgstatus, int *is_duplicate); int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt); void lvmcache_del(struct lvmcache_info *info); void lvmcache_del_dev(struct device *dev); @@ -82,7 +84,8 @@ void lvmcache_del_dev(struct device *dev); /* Update things */ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary); -int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted); +int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted); +int lvmcache_update_vg_from_write(struct volume_group *vg); void lvmcache_lock_vgname(const char *vgname, int read_only); void lvmcache_unlock_vgname(const char *vgname); @@ -127,7 +130,8 @@ void lvmcache_del_mdas(struct lvmcache_info *info); void lvmcache_del_das(struct lvmcache_info *info); void lvmcache_del_bas(struct lvmcache_info *info); int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev, - uint64_t start, uint64_t size, unsigned ignored); + uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new); int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size); int lvmcache_add_ba(struct lvmcache_info *info, uint64_t start, uint64_t size); @@ -212,4 +216,28 @@ void lvmcache_drop_saved_vgid(const char *vgid); int dev_in_device_list(struct device *dev, struct dm_list *head); +int lvmcache_has_bad_metadata(struct device *dev); +int lvmcache_has_old_metadata(struct cmd_context *cmd, const char *vgname, const char *vgid, struct device *dev); + +void lvmcache_get_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *devs); +void lvmcache_get_outdated_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev, + struct dm_list **mdas); + +int lvmcache_is_outdated_dev(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev); + +void lvmcache_del_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid); + +void lvmcache_save_bad_mda(struct lvmcache_info *info, struct metadata_area *mda); + +void lvmcache_get_bad_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *bad_mdas); + #endif diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index e8ce312a4..59a468a0d 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -176,6 +176,7 @@ struct cmd_context { unsigned use_hints:1; /* if hints are enabled this cmd can use them */ unsigned pvscan_recreate_hints:1; /* enable special case hint handling for pvscan --cache */ unsigned scan_lvs:1; + unsigned wipe_outdated_pvs:1; /* * Devices and filtering. diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index 34eff55f5..052c2bd2b 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -320,6 +320,9 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd, break; } + if (vg) + set_pv_devices(tf, vg); + if (!vg) tf->fmt->ops->destroy_instance(tf); diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 414941f22..9033db36b 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -166,6 +166,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, char *buf=NULL; struct device_area *area; struct mda_context *mdac; + uint32_t bad_fields = 0; int r=0; mdac = (struct mda_context *) mda->metadata_locn; @@ -174,7 +175,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, FMTu64, mdac->area.start, mdac->area.size); area = &mdac->area; - if (!(mdah = raw_read_mda_header(fmt, area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fmt, area, mda_is_primary(mda), 0, &bad_fields))) goto_out; rlocn = mdah->raw_locns; @@ -312,61 +313,88 @@ static void _xlate_mdah(struct mda_header *mdah) } } -static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, int primary_mda) +static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, + int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields) { + int bad = 0; + log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); if (!dev_read_bytes(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); + *bad_fields |= BAD_MDA_READ; return 0; } if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl)))) { - log_error("Incorrect checksum in metadata area header on %s at %llu", + log_warn("WARNING: wrong checksum %x in mda header on %s at %llu", + mdah->checksum_xl, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + + if (!(ignore_bad_fields & BAD_MDA_CHECKSUM)) { + *bad_fields |= BAD_MDA_CHECKSUM; + bad = 1; + } } _xlate_mdah(mdah); if (strncmp((char *)mdah->magic, FMTT_MAGIC, sizeof(mdah->magic))) { - log_error("Wrong magic number in metadata area header on %s at %llu", + log_warn("WARNING: wrong magic number %.8s in mda header on %s at %llu", + mdah->magic, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + + if (!(ignore_bad_fields & BAD_MDA_MAGIC)) { + *bad_fields |= BAD_MDA_MAGIC; + bad = 1; + } } if (mdah->version != FMTT_VERSION) { - log_error("Incompatible version %u metadata area header on %s at %llu", + log_warn("WARNING: wrong version %u in mda header on %s at %llu", mdah->version, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + + if (!(ignore_bad_fields & BAD_MDA_VERSION)) { + *bad_fields |= BAD_MDA_VERSION; + bad = 1; + } } if (mdah->start != dev_area->start) { - log_error("Incorrect start sector %llu in metadata area header on %s at %llu", + log_warn("WARNING: wrong start sector %llu in mda header on %s at %llu", (unsigned long long)mdah->start, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + + if (!(ignore_bad_fields & BAD_MDA_START)) { + *bad_fields |= BAD_MDA_START; + bad = 1; + } } + if (bad) + return 0; + return 1; } struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area, int primary_mda) + struct device_area *dev_area, + int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields) { struct mda_header *mdah; if (!(mdah = dm_pool_alloc(fmt->cmd->mem, MDA_HEADER_SIZE))) { log_error("struct mda_header allocation failed"); + *bad_fields |= BAD_MDA_INTERNAL; return NULL; } - if (!_raw_read_mda_header(mdah, dev_area, primary_mda)) { + if (!_raw_read_mda_header(mdah, dev_area, primary_mda, ignore_bad_fields, bad_fields)) { dm_pool_free(fmt->cmd->mem, mdah); return NULL; } @@ -564,8 +592,9 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, time_t when; char *desc; uint32_t wrap = 0; + uint32_t bad_fields = 0; - if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) { + if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda ? 1 : 2, 0, &bad_fields))) { log_error("Failed to read vg %s from %s", vgname, dev_name(area->dev)); goto out; } @@ -686,6 +715,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, uint64_t old_start = 0, old_last = 0, old_size = 0, old_wrap = 0; uint64_t new_start = 0, new_last = 0, new_size = 0, new_wrap = 0; uint64_t max_size; + uint32_t bad_fields = 0; char *new_buf = NULL; int overlap; int found = 0; @@ -701,7 +731,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!found) return 1; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields))) goto_out; /* @@ -972,6 +1002,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, struct raw_locn *rlocn_slot1; struct raw_locn *rlocn_new; struct pv_list *pvl; + uint32_t bad_fields = 0; int r = 0; int found = 0; @@ -992,7 +1023,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, * mdah buffer, but the mdah buffer is not modified and mdac->rlocn is * modified. */ - if (!(mdab = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) + if (!(mdab = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields))) goto_out; /* @@ -1184,6 +1215,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, struct mda_header *mdah; struct raw_locn *rlocn_slot0; struct raw_locn *rlocn_slot1; + uint32_t bad_fields = 0; int r = 0; if (!(mdah = dm_pool_alloc(fid->fmt->cmd->mem, MDA_HEADER_SIZE))) { @@ -1197,7 +1229,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, * Just to print the warning? */ - if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda))) + if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda), 0, &bad_fields)) log_warn("WARNING: Removing metadata location on %s with bad mda header.", dev_name(mdac->area.dev)); @@ -1494,7 +1526,7 @@ int read_metadata_location_summary(const struct format_type *fmt, * valid vg name. */ if (!validate_name(namebuf)) { - log_error("Metadata location on %s at %llu begins with invalid VG name.", + log_warn("WARNING: Metadata location on %s at %llu begins with invalid VG name.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -1556,7 +1588,7 @@ int read_metadata_location_summary(const struct format_type *fmt, (off_t) (dev_area->start + MDA_HEADER_SIZE), wrap, calc_crc, vgsummary->vgname ? 1 : 0, vgsummary)) { - log_error("Metadata location on %s at %llu has invalid summary for VG.", + log_warn("WARNING: metadata on %s at %llu has invalid summary for VG.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -1564,7 +1596,7 @@ int read_metadata_location_summary(const struct format_type *fmt, /* Ignore this entry if the characters aren't permissible */ if (!validate_name(vgsummary->vgname)) { - log_error("Metadata location on %s at %llu has invalid VG name.", + log_warn("WARNING: metadata on %s at %llu has invalid VG name.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -1646,13 +1678,12 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume /* Add a new cache entry with PV info or update existing one. */ if (!(info = lvmcache_add(fmt->labeller, (const char *) &pv->id, - pv->dev, pv->vg_name, - is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0))) + pv->dev, pv->label_sector, pv->vg_name, + is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0, NULL))) return_0; + /* lvmcache_add() creates info and info->label structs for the dev, get info->label. */ label = lvmcache_get_label(info); - label->sector = pv->label_sector; - label->dev = pv->dev; lvmcache_update_pv(info, pv, fmt); @@ -1680,7 +1711,7 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume // if fmt is not the same as info->fmt we are in trouble if (!lvmcache_add_mda(info, mdac->area.dev, mdac->area.start, mdac->area.size, - mda_is_ignored(mda))) + mda_is_ignored(mda), NULL)) return_0; } @@ -1734,12 +1765,16 @@ static int _text_pv_needs_rewrite(const struct format_type *fmt, struct physical { struct lvmcache_info *info; uint32_t ext_vsn; + uint32_t ext_flags; *needs_rewrite = 0; if (!pv->is_labelled) return 1; + if (!pv->dev) + return 1; + if (!(info = lvmcache_info_from_pvid((const char *)&pv->id, pv->dev, 0))) { log_error("Failed to find cached info for PV %s.", pv_dev_name(pv)); return 0; @@ -1747,8 +1782,16 @@ static int _text_pv_needs_rewrite(const struct format_type *fmt, struct physical ext_vsn = lvmcache_ext_version(info); - if (ext_vsn < PV_HEADER_EXTENSION_VSN) + if (ext_vsn < PV_HEADER_EXTENSION_VSN) { + log_debug("PV %s header needs rewrite for new ext version", dev_name(pv->dev)); *needs_rewrite = 1; + } + + ext_flags = lvmcache_ext_flags(info); + if (!(ext_flags & PV_EXT_USED)) { + log_debug("PV %s header needs rewrite to set ext used", dev_name(pv->dev)); + *needs_rewrite = 1; + } return 1; } @@ -2598,3 +2641,37 @@ bad: return NULL; } + +int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev, + struct metadata_area *mda) +{ + struct mda_context *mdac = mda->metadata_locn; + uint64_t start_byte = mdac->area.start; + struct mda_header *mdab; + struct raw_locn *rlocn_slot0; + struct raw_locn *rlocn_slot1; + uint32_t bad_fields = 0; + + if (!(mdab = raw_read_mda_header(cmd->fmt, &mdac->area, mda_is_primary(mda), 0, &bad_fields))) { + log_error("Failed to read outdated pv mda header on %s", dev_name(dev)); + return 0; + } + + rlocn_slot0 = &mdab->raw_locns[0]; + rlocn_slot1 = &mdab->raw_locns[1]; + + rlocn_slot0->offset = 0; + rlocn_slot0->size = 0; + rlocn_slot0->checksum = 0; + rlocn_slot1->offset = 0; + rlocn_slot1->size = 0; + rlocn_slot1->checksum = 0; + + if (!_raw_write_mda_header(cmd->fmt, dev, mda_is_primary(mda), start_byte, mdab)) { + log_error("Failed to write outdated pv mda header on %s", dev_name(dev)); + return 0; + } + + return 1; +} + diff --git a/lib/format_text/format-text.h b/lib/format_text/format-text.h index 1300e58af..2345d52a9 100644 --- a/lib/format_text/format-text.h +++ b/lib/format_text/format-text.h @@ -61,7 +61,8 @@ int add_ba(struct dm_pool *mem, struct dm_list *eas, uint64_t start, uint64_t size); void del_bas(struct dm_list *bas); int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *mdas, - struct device *dev, uint64_t start, uint64_t size, unsigned ignored); + struct device *dev, uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new); void del_mdas(struct dm_list *mdas); /* On disk */ @@ -76,4 +77,7 @@ struct data_area_list { struct disk_locn disk_locn; }; +int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev, + struct metadata_area *mda); + #endif diff --git a/lib/format_text/import.c b/lib/format_text/import.c index d487100b9..743077b69 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -61,13 +61,13 @@ int text_read_metadata_summary(const struct format_type *fmt, offset2, size2, checksum_fn, vgsummary->mda_checksum, checksum_only, 1)) { - /* FIXME: handle errors */ - log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + log_warn("WARNING: invalid metadata text from %s at %llu.", + dev_name(dev), (unsigned long long)offset); goto out; } } else { if (!config_file_read(cft)) { - log_error("Couldn't read volume group metadata from file."); + log_warn("WARNING: invalid metadata text from file."); goto out; } } @@ -229,9 +229,11 @@ static struct volume_group *_import_vg_from_config_tree(const struct dm_config_t */ if (!(vg = (*vsn)->read_vg(fid, cft, allow_lvmetad_extensions))) stack; - else if ((vg_missing = vg_missing_pv_count(vg))) { - log_verbose("There are %d physical volumes missing.", - vg_missing); + else { + set_pv_devices(fid, vg); + + if ((vg_missing = vg_missing_pv_count(vg))) + log_verbose("There are %d physical volumes missing.", vg_missing); vg_mark_partial_lvs(vg, 1); /* FIXME: move this code inside read_vg() */ } diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index c9b927524..43ec10beb 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -206,21 +206,6 @@ static int _read_pv(struct format_instance *fid, pv->is_labelled = 1; /* All format_text PVs are labelled. */ - /* - * Convert the uuid into a device. - */ - if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, &pv->label_sector))) { - char buffer[64] __attribute__((aligned(8))); - - if (!id_write_format(&pv->id, buffer, sizeof(buffer))) - buffer[0] = '\0'; - - if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single) - log_error_once("Couldn't find device with uuid %s.", buffer); - else - log_debug_metadata("Couldn't find device with uuid %s.", buffer); - } - if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) return_0; @@ -231,15 +216,6 @@ static int _read_pv(struct format_instance *fid, return 0; } - if (!pv->dev) - pv->status |= MISSING_PV; - - if ((pv->status & MISSING_PV) && pv->dev && pv_mda_used_count(pv) == 0) { - pv->status &= ~MISSING_PV; - log_info("Recovering a previously MISSING PV %s with no MDAs.", - pv_dev_name(pv)); - } - /* Late addition */ if (dm_config_has_node(pvn, "dev_size") && !_read_uint64(pvn, "dev_size", &pv->size)) { @@ -292,21 +268,6 @@ static int _read_pv(struct format_instance *fid, pv->pe_align = 0; pv->fmt = fid->fmt; - /* Fix up pv size if missing or impossibly large */ - if ((!pv->size || pv->size > (1ULL << 62)) && pv->dev) { - if (!dev_get_size(pv->dev, &pv->size)) { - log_error("%s: Couldn't get size.", pv_dev_name(pv)); - return 0; - } - log_verbose("Fixing up missing size (%s) " - "for PV %s", display_size(fid->fmt->cmd, pv->size), - pv_dev_name(pv)); - size = pv->pe_count * (uint64_t) vg->extent_size + pv->pe_start; - if (size > pv->size) - log_warn("WARNING: Physical Volume %s is too large " - "for underlying device", pv_dev_name(pv)); - } - if (!alloc_pv_segment_whole_pv(mem, pv)) return_0; diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index e1462f172..7320d9c2f 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -81,7 +81,9 @@ struct mda_header { } __attribute__ ((packed)); struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area, int primary_mda); + struct device_area *dev_area, int primary_mda, + uint32_t ignore_bad_fields, + uint32_t *bad_fields); struct mda_lists { struct metadata_area_ops *file_ops; diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 1157b98aa..24bf60e30 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -241,11 +241,10 @@ void del_bas(struct dm_list *bas) del_das(bas); } -/* FIXME: refactor this function with other mda constructor code */ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *mdas, - struct device *dev, uint64_t start, uint64_t size, unsigned ignored) + struct device *dev, uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new) { -/* FIXME List size restricted by pv_header SECTOR_SIZE */ struct metadata_area *mdal, *mda; struct mda_lists *mda_lists = (struct mda_lists *) fmt->private; struct mda_context *mdac, *mdac2; @@ -295,9 +294,18 @@ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list * mda_set_ignored(mdal, ignored); dm_list_add(mdas, &mdal->list); + if (mda_new) + *mda_new = mdal; return 1; } +static void _del_mda(struct metadata_area *mda) +{ + free(mda->metadata_locn); + dm_list_del(&mda->list); + free(mda); +} + void del_mdas(struct dm_list *mdas) { struct dm_list *mdah, *tmp; @@ -305,9 +313,7 @@ void del_mdas(struct dm_list *mdas) dm_list_iterate_safe(mdah, tmp, mdas) { mda = dm_list_item(mdah, struct metadata_area); - free(mda->metadata_locn); - dm_list_del(&mda->list); - free(mda); + _del_mda(mda); } } @@ -319,78 +325,103 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)), return 1; } -struct _update_mda_baton { - struct lvmcache_info *info; - struct label *label; -}; - -static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) +static int _read_mda_header_and_metadata(const struct format_type *fmt, + struct metadata_area *mda, + struct lvmcache_vgsummary *vgsummary, + uint32_t *bad_fields) { - struct _update_mda_baton *p = baton; - const struct format_type *fmt = p->label->labeller->fmt; struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct mda_header *mdah; - struct lvmcache_vgsummary vgsummary = { 0 }; - if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda)))) { - log_error("Failed to read mda header from %s", dev_name(mdac->area.dev)); - goto fail; + if (!(mdah = raw_read_mda_header(fmt, &mdac->area, (mda->mda_num == 1), 0, bad_fields))) { + log_warn("WARNING: bad metadata header on %s at %llu.", + dev_name(mdac->area.dev), + (unsigned long long)mdac->area.start); + if (mda) + mda->header_start = mdac->area.start; + *bad_fields |= BAD_MDA_HEADER; + return 0; } + if (mda) + mda->header_start = mdah->start; + mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns)); if (mda_is_ignored(mda)) { log_debug_metadata("Ignoring mda on device %s at offset " FMTu64, dev_name(mdac->area.dev), mdac->area.start); + vgsummary->mda_ignored = 1; return 1; } if (!read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, - &vgsummary, &mdac->free_sectors)) { - if (vgsummary.zero_offset) + vgsummary, &mdac->free_sectors)) { + if (vgsummary->zero_offset) return 1; - log_error("Failed to read metadata summary from %s", dev_name(mdac->area.dev)); - goto fail; - } - - if (!lvmcache_update_vgname_and_id(p->info, &vgsummary)) { - log_error("Failed to save lvm summary for %s", dev_name(mdac->area.dev)); - goto fail; + log_warn("WARNING: bad metadata text on %s in mda%d", + dev_name(mdac->area.dev), mda->mda_num); + *bad_fields |= BAD_MDA_TEXT; + return 0; } return 1; - -fail: - lvmcache_del(p->info); - return 0; } -static int _text_read(struct labeller *l, struct device *dev, void *label_buf, - struct label **label) +/* + * Used by label_scan to get a summary of the VG that exists on this PV. This + * summary is stored in lvmcache vginfo/info/info->mdas and is used later by + * vg_read which needs to know which PVs to read for a given VG name, and where + * the metadata is at for those PVs. + */ + +static int _text_read(struct labeller *labeller, struct device *dev, void *label_buf, + uint64_t label_sector, int *is_duplicate) { + struct lvmcache_vgsummary vgsummary; + struct lvmcache_info *info; + const struct format_type *fmt = labeller->fmt; struct label_header *lh = (struct label_header *) label_buf; struct pv_header *pvhdr; struct pv_header_extension *pvhdr_ext; - struct lvmcache_info *info; + struct metadata_area *mda; + struct metadata_area *mda1 = NULL; + struct metadata_area *mda2 = NULL; struct disk_locn *dlocn_xl; uint64_t offset; uint32_t ext_version; - struct _update_mda_baton baton; + uint32_t bad_fields; + int mda_count = 0; + int good_mda_count = 0; + int bad_mda_count = 0; + int rv1, rv2; /* * PV header base */ pvhdr = (struct pv_header *) ((char *) label_buf + xlate32(lh->offset_xl)); - if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, + /* + * FIXME: stop adding the device to lvmcache initially as an orphan + * (and then moving it later) and instead just add it when we know the + * VG. + * + * If another device with this same PVID has already been seen, + * lvmcache_add will put this device in the duplicates list in lvmcache + * and return NULL. At the end of label_scan, the duplicate devs are + * compared, and if another dev is preferred for this PV, then the + * existing dev is removed from lvmcache and _text_read is called again + * for this dev, and lvmcache_add will add it. + * + * Other reasons for lvmcache_add to return NULL are internal errors. + */ + if (!(info = lvmcache_add(labeller, (char *)pvhdr->pv_uuid, dev, label_sector, FMT_TEXT_ORPHAN_VG_NAME, - FMT_TEXT_ORPHAN_VG_NAME, 0))) + FMT_TEXT_ORPHAN_VG_NAME, 0, is_duplicate))) return_0; - *label = lvmcache_get_label(info); - lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl)); lvmcache_del_das(info); @@ -404,11 +435,27 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, dlocn_xl++; } - /* Metadata area headers */ dlocn_xl++; + + /* Metadata areas */ while ((offset = xlate64(dlocn_xl->offset))) { - lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0); + + /* + * This just calls add_mda() above, replacing info with info->mdas. + */ + lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0, &mda); + dlocn_xl++; + mda_count++; + + if (mda_count == 1) { + mda1 = mda; + mda1->mda_num = 1; + } + else if (mda_count == 2) { + mda2 = mda; + mda2->mda_num = 2; + } } dlocn_xl++; @@ -418,7 +465,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, */ pvhdr_ext = (struct pv_header_extension *) ((char *) dlocn_xl); if (!(ext_version = xlate32(pvhdr_ext->version))) - goto out; + goto scan_mdas; log_debug_metadata("%s: PV header extension version " FMTu32 " found", dev_name(dev), ext_version); @@ -435,22 +482,117 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size)); dlocn_xl++; } -out: - baton.info = info; - baton.label = *label; + + scan_mdas: + if (!mda_count) { + log_debug_metadata("Scanning %s found no mdas.", dev_name(dev)); + return 1; + } /* - * In the vg_read phase, we compare all mdas and decide which to use - * which are bad and need repair. + * Track which devs have bad metadata so repair can find them (even if + * this dev also has good metadata that we are able to use). * - * FIXME: this quits if the first mda is bad, but we need something - * smarter to be able to use the second mda if it's good. + * When bad metadata is seen, the unusable mda struct is removed from + * lvmcache info->mdas. This means that vg_read and vg_write will skip + * the bad mda not try to read or write the bad metadata. The bad mdas + * are saved in a separate bad_mdas list in lvmcache so that repair can + * find them to repair. */ - if (!lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton)) { - log_error("Failed to scan VG from %s", dev_name(dev)); - return 0; + + if (mda1) { + log_debug_metadata("Scanning %s mda1 summary.", dev_name(dev)); + memset(&vgsummary, 0, sizeof(vgsummary)); + bad_fields = 0; + vgsummary.mda_num = 1; + + rv1 = _read_mda_header_and_metadata(fmt, mda1, &vgsummary, &bad_fields); + + if (rv1 && !vgsummary.zero_offset && !vgsummary.mda_ignored) { + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + /* I believe this is only an internal error. */ + log_warn("WARNING: Scanning %s mda1 failed to save internal summary.", dev_name(dev)); + + dm_list_del(&mda1->list); + bad_fields |= BAD_MDA_INTERNAL; + mda1->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda1); + mda1 = NULL; + bad_mda_count++; + } else { + /* The normal success path */ + log_debug("Scanned %s mda1 seqno %u", dev_name(dev), vgsummary.seqno); + good_mda_count++; + } + } + + if (!rv1) { + /* + * Remove the bad mda from normal mda list so it's not + * used by vg_read/vg_write, but keep track of it in + * lvmcache for repair. + */ + log_warn("WARNING: scanning %s mda1 failed to read metadata summary.", dev_name(dev)); + log_warn("WARNING: repair VG metadata on %s with vgck --updatemetadata.", dev_name(dev)); + + dm_list_del(&mda1->list); + mda1->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda1); + mda1 = NULL; + bad_mda_count++; + } } + if (mda2) { + log_debug_metadata("Scanning %s mda2 summary.", dev_name(dev)); + memset(&vgsummary, 0, sizeof(vgsummary)); + bad_fields = 0; + vgsummary.mda_num = 2; + + rv2 = _read_mda_header_and_metadata(fmt, mda2, &vgsummary, &bad_fields); + + if (rv2 && !vgsummary.zero_offset && !vgsummary.mda_ignored) { + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + /* I believe this is only an internal error. */ + log_warn("WARNING: Scanning %s mda2 failed to save internal summary.", dev_name(dev)); + + dm_list_del(&mda2->list); + bad_fields |= BAD_MDA_INTERNAL; + mda2->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda2); + mda2 = NULL; + bad_mda_count++; + } else { + /* The normal success path */ + log_debug("Scanned %s mda2 seqno %u", dev_name(dev), vgsummary.seqno); + good_mda_count++; + } + } + + if (!rv2) { + /* + * Remove the bad mda from normal mda list so it's not + * used by vg_read/vg_write, but keep track of it in + * lvmcache for repair. + */ + log_warn("WARNING: scanning %s mda2 failed to read metadata summary.", dev_name(dev)); + log_warn("WARNING: repair VG metadata on %s with vgck --updatemetadata.", dev_name(dev)); + + dm_list_del(&mda2->list); + mda2->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda2); + mda2 = NULL; + bad_mda_count++; + } + } + + if (good_mda_count) + return 1; + + if (bad_mda_count) + return 0; + + /* no metadata in the mdas */ return 1; } diff --git a/lib/label/label.c b/lib/label/label.c index 7d5073ef5..53ae008a1 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -353,9 +353,9 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, int *is_lvm_device) { char label_buf[LABEL_SIZE] __attribute__((aligned(8))); - struct label *label = NULL; struct labeller *labeller; uint64_t sector = 0; + int is_duplicate = 0; int ret = 0; int pass; @@ -420,17 +420,38 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, /* * This is the point where the scanning code dives into the rest of - * lvm. ops->read() is usually _text_read() which reads the pv_header, - * mda locations, mda contents. As these bits of data are read, they - * are saved into lvmcache as info/vginfo structs. + * lvm. ops->read() is _text_read() which reads the pv_header, mda + * locations, and metadata text. All of the info it finds about the PV + * and VG is stashed in lvmcache which saves it in the form of + * info/vginfo structs. That lvmcache info is used later when the + * command wants to read the VG to do something to it. */ + ret = labeller->ops->read(labeller, dev, label_buf, sector, &is_duplicate); - if ((ret = (labeller->ops->read)(labeller, dev, label_buf, &label)) && label) { - label->dev = dev; - label->sector = sector; - } else { - /* FIXME: handle errors */ - lvmcache_del_dev(dev); + if (!ret) { + if (is_duplicate) { + /* + * _text_read() called lvmcache_add() which found an + * existing info struct for this PVID but for a + * different dev. lvmcache_add() did not add an info + * struct for this dev, but added this dev to the list + * of duplicate devs. + */ + log_warn("WARNING: scan found duplicate PVID %s on %s", dev->pvid, dev_name(dev)); + } else { + /* + * Leave the info in lvmcache because the device is + * present and can still be used even if it has + * metadata that we can't process (we can get metadata + * from another PV/mda.) _text_read only saves mdas + * with good metadata in lvmcache (this includes old + * metadata), and if a PV has no mdas with good + * metadata, then the info for the PV will be in + * lvmcache with empty info->mdas, and it will behave + * like a PV with no mdas (a common configuration.) + */ + log_warn("WARNING: scan failed to get metadata summary from %s PVID %s", dev_name(dev), dev->pvid); + } } out: return ret; @@ -693,7 +714,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, scan_failed = 1; scan_process_errors++; scan_failed_count++; - lvmcache_del_dev(devl->dev); } } diff --git a/lib/label/label.h b/lib/label/label.h index 59396559a..bd4509bb8 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -64,8 +64,8 @@ struct label_ops { /* * Read a label from a volume. */ - int (*read) (struct labeller * l, struct device * dev, - void *label_buf, struct label ** label); + int (*read) (struct labeller *l, struct device *dev, + void *label_buf, uint64_t label_sector, int *is_duplicate); /* * Populate label_type etc. diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index c76e644be..027494c7c 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -181,7 +181,6 @@ #define MIRROR_SKIP_INIT_SYNC 0x00000010U /* skip initial sync */ /* vg_read and vg_read_for_update flags */ -#define READ_ALLOW_INCONSISTENT 0x00010000U #define READ_ALLOW_EXPORTED 0x00020000U #define READ_OK_NOTFOUND 0x00040000U #define READ_WARN_INCONSISTENT 0x00080000U @@ -189,8 +188,8 @@ #define PROCESS_SKIP_SCAN 0x00200000U /* skip lvmcache_label_scan in process_each_pv */ #define PROCESS_SKIP_ORPHAN_LOCK 0x00400000U /* skip lock_vol(VG_ORPHAN) in vg_read */ -/* vg's "read_status" field */ -#define FAILED_INCONSISTENT 0x00000001U +/* vg_read returns these in error_flags */ +#define FAILED_NOT_ENABLED 0x00000001U #define FAILED_LOCKING 0x00000002U #define FAILED_NOTFOUND 0x00000004U #define FAILED_READ_ONLY 0x00000008U @@ -203,6 +202,7 @@ #define FAILED_SYSTEMID 0x00000400U #define FAILED_LOCK_TYPE 0x00000800U #define FAILED_LOCK_MODE 0x00001000U +#define FAILED_INTERNAL_ERROR 0x00002000U #define SUCCESS 0x00000000U #define VGMETADATACOPIES_ALL UINT32_MAX @@ -715,24 +715,14 @@ int lv_resize(struct logical_volume *lv, struct lvresize_params *lp, struct dm_list *pvh); -/* - * Return a handle to VG metadata. - */ -struct volume_group *vg_read_internal(struct cmd_context *cmd, - const char *vgname, const char *vgid, - uint32_t lockd_state, uint32_t warn_flags, - int enable_repair, - int *mdas_consistent); -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state); +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, + uint32_t *error_flags, struct volume_group **error_vg); struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, const char *vgid, uint32_t read_flags, uint32_t lockd_state); -struct volume_group *vg_read_orphans(struct cmd_context *cmd, - uint32_t warn_flags, - const char *orphan_vgname); -/* - * Test validity of a VG handle. - */ +struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname); + +/* this is historical and being removed, don't use */ uint32_t vg_read_error(struct volume_group *vg_handle); /* pe_start and pe_end relate to any existing data so that new metadata @@ -755,7 +745,7 @@ uint32_t pv_list_extents_free(const struct dm_list *pvh); int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name); int vg_validate(struct volume_group *vg); struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name); -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name); +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, int *exists); int vg_remove_mdas(struct volume_group *vg); int vg_remove_check(struct volume_group *vg); void vg_remove_pvs(struct volume_group *vg); @@ -1377,4 +1367,6 @@ int vg_strip_outdated_historical_lvs(struct volume_group *vg); int lv_on_pmem(struct logical_volume *lv); +void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg); + #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 9efc35592..f31b4b979 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -28,11 +28,14 @@ #include "lib/display/display.h" #include "lib/locking/locking.h" #include "lib/format_text/archiver.h" +#include "lib/format_text/format-text.h" +#include "lib/format_text/layout.h" +#include "lib/format_text/import-export.h" #include "lib/config/defaults.h" #include "lib/locking/lvmlockd.h" -#include "time.h" #include "lib/notify/lvmnotify.h" +#include <time.h> #include <math.h> static struct physical_volume *_pv_read(struct cmd_context *cmd, @@ -222,6 +225,75 @@ out: (unsigned long long)pv->pe_align_offset, dev_name(pv->dev)); } +/* + * FIXME: we only want to print the warnings when this is called from + * vg_read, not from import_vg_from_metadata, so do the warnings elsewhere + * or avoid calling this from import_vg_from. + */ +static void _set_pv_device(struct format_instance *fid, + struct volume_group *vg, + struct physical_volume *pv) +{ + char buffer[64] __attribute__((aligned(8))); + uint64_t size; + + if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, &pv->label_sector))) { + if (!id_write_format(&pv->id, buffer, sizeof(buffer))) + buffer[0] = '\0'; + + if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single) + log_error_once("Couldn't find device with uuid %s.", buffer); + else + log_debug_metadata("Couldn't find device with uuid %s.", buffer); + } + + /* + * A previous command wrote the VG while this dev was missing, so + * the MISSING flag was included in the PV. + */ + if ((pv->status & MISSING_PV) && pv->dev) + log_warn("WARNING: VG %s was previously updated while PV %s was missing.", vg->name, dev_name(pv->dev)); + + /* + * If this command writes the VG, we want the MISSING flag to be + * written for this PV with no device. + */ + if (!pv->dev) + pv->status |= MISSING_PV; + + /* is this correct? */ + if ((pv->status & MISSING_PV) && pv->dev && (pv_mda_used_count(pv) == 0)) { + pv->status &= ~MISSING_PV; + log_info("Found a previously MISSING PV %s with no MDAs.", pv_dev_name(pv)); + } + + /* Fix up pv size if missing or impossibly large */ + if ((!pv->size || pv->size > (1ULL << 62)) && pv->dev) { + if (!dev_get_size(pv->dev, &pv->size)) { + log_error("%s: Couldn't get size.", pv_dev_name(pv)); + return; + } + log_verbose("Fixing up missing size (%s) for PV %s", display_size(fid->fmt->cmd, pv->size), + pv_dev_name(pv)); + size = pv->pe_count * (uint64_t) vg->extent_size + pv->pe_start; + if (size > pv->size) + log_warn("WARNING: Physical Volume %s is too large " + "for underlying device", pv_dev_name(pv)); + } +} + +/* + * Finds the 'struct device' that correponds to each PV in the metadata, + * and may make some adjustments to vg fields based on the dev properties. + */ +void set_pv_devices(struct format_instance *fid, struct volume_group *vg) +{ + struct pv_list *pvl; + + dm_list_iterate_items(pvl, &vg->pvs) + _set_pv_device(fid, vg, pvl->pv); +} + void add_pvl_to_vgs(struct volume_group *vg, struct pv_list *pvl) { dm_list_add(&vg->pvs, &pvl->list); @@ -370,48 +442,6 @@ int add_pv_to_vg(struct volume_group *vg, const char *pv_name, return 1; } -static int _copy_pv(struct dm_pool *pvmem, - struct physical_volume *pv_to, - struct physical_volume *pv_from) -{ - memcpy(pv_to, pv_from, sizeof(*pv_to)); - - /* We must use pv_set_fid here to update the reference counter! */ - pv_to->fid = NULL; - pv_set_fid(pv_to, pv_from->fid); - - if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name))) - return_0; - - if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags)) - return_0; - - if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments)) - return_0; - - return 1; -} - -static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from) -{ - struct pv_list *pvl_to = NULL; - - if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to)))) - return_NULL; - - if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv)))) - goto_bad; - - if (!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv)) - goto_bad; - - return pvl_to; - -bad: - dm_pool_free(pvmem, pvl_to); - return NULL; -} - static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to, const char *pv_name, int enforce_pv_from_source) { @@ -584,7 +614,7 @@ int vg_remove_check(struct volume_group *vg) { unsigned lv_count; - if (vg_read_error(vg) || vg_missing_pv_count(vg)) { + if (vg_missing_pv_count(vg)) { log_error("Volume group \"%s\" not found, is inconsistent " "or has PVs missing.", vg ? vg->name : ""); log_error("Consider vgreduce --removemissing if metadata " @@ -963,36 +993,6 @@ static int _vg_update_embedded_copy(struct volume_group *vg, struct volume_group return 1; } -/* - * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a - * possible failure code or zero for success. - */ -static struct volume_group *_vg_make_handle(struct cmd_context *cmd, - struct volume_group *vg, - uint32_t failure) -{ - /* Never return a cached VG structure for a failure */ - if (vg && vg->vginfo && failure != SUCCESS) { - release_vg(vg); - vg = NULL; - } - - if (!vg && !(vg = alloc_vg("vg_make_handle", cmd, NULL))) - return_NULL; - - vg->read_status = failure; - - /* - * If we hold a write lock and might be changing the VG contents, embed a pristine - * copy of the VG metadata for the activation code to use later - */ - if (vg->fid && !dm_pool_locked(vg->vgmem) && !vg->vg_committed && !is_orphan_vg(vg->name)) - if (vg_write_lock_held() && !_vg_update_embedded_copy(vg, &vg->vg_committed)) - vg->read_status |= FAILED_ALLOCATION; - - return vg; -} - int lv_has_unknown_segments(const struct logical_volume *lv) { struct lv_segment *seg; @@ -1014,24 +1014,24 @@ int vg_has_unknown_segments(const struct volume_group *vg) return 0; } -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name) +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, int *exists) { uint32_t rc; struct volume_group *vg; if (!validate_name(vg_name)) { log_error("Invalid vg name %s", vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ return NULL; } rc = vg_lock_newname(cmd, vg_name); + if (rc == FAILED_EXIST) + *exists = 1; if (rc != SUCCESS) - /* NOTE: let caller decide - this may be check for existence */ - return _vg_make_handle(cmd, NULL, rc); + return NULL; vg = vg_create(cmd, vg_name); - if (!vg || vg_read_error(vg)) + if (!vg) unlock_vg(cmd, NULL, vg_name); return vg; @@ -1039,12 +1039,8 @@ struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_ /* * Create a VG with default parameters. - * Returns: - * - struct volume_group* with SUCCESS code: VG structure created - * - NULL or struct volume_group* with FAILED_* code: error creating VG structure - * Use vg_read_error() to determine success or failure. - * FIXME: cleanup usage of _vg_make_handle() */ + struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) { struct volume_group *vg; @@ -1084,11 +1080,10 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) vg_name); goto bad; } - return _vg_make_handle(cmd, vg, SUCCESS); + return vg; bad: unlock_and_release_vg(cmd, vg, vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ return NULL; } @@ -2807,57 +2802,6 @@ static int _pv_in_pv_list(struct physical_volume *pv, struct dm_list *head) return 0; } -/* - * Check if any of the PVs in VG still contain old PV headers - * and if yes, schedule them for PV header update. - */ -static int _vg_update_old_pv_ext_if_needed(struct volume_group *vg) -{ - struct pv_list *pvl, *new_pvl; - int pv_needs_rewrite; - - if (!(vg->fid->fmt->features & FMT_PV_FLAGS)) - return 1; - - dm_list_iterate_items(pvl, &vg->pvs) { - if (is_missing_pv(pvl->pv) || - !pvl->pv->fmt->ops->pv_needs_rewrite) - continue; - - if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) - continue; - - if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv, - &pv_needs_rewrite)) - return_0; - - if (pv_needs_rewrite) { - /* - * Schedule PV for writing only once! - */ - if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) - continue; - - if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl)))) { - log_error("pv_to_write allocation for '%s' failed", pv_dev_name(pvl->pv)); - return 0; - } - new_pvl->pv = pvl->pv; - dm_list_add(&vg->pv_write_list, &new_pvl->list); - log_debug("PV %s has old extension header, updating to newest version.", - pv_dev_name(pvl->pv)); - } - } - - if (!dm_list_empty(&vg->pv_write_list) && - (!vg_write(vg) || !vg_commit(vg))) { - log_error("Failed to update old PV extension headers in VG %s.", vg->name); - return 0; - } - - return 1; -} - static int _check_historical_lv_is_valid(struct historical_logical_volume *hlv) { struct glv_list *glvl; @@ -2922,6 +2866,69 @@ static int _handle_historical_lvs(struct volume_group *vg) return 1; } +static void _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg) +{ + struct dm_list devs; + struct dm_list *mdas = NULL; + struct device_list *devl; + struct device *dev; + struct metadata_area *mda; + struct label *label; + struct lvmcache_info *info; + uint32_t ext_flags; + + dm_list_init(&devs); + + /* + * When vg_read selected a good copy of the metadata, it used it to + * update the lvmcache representation of the VG (lvmcache_update_vg). + * At that point outdated PVs were recognized and moved into the + * vginfo->outdated_infos list. Here we clear the PVs on that list. + */ + + lvmcache_get_outdated_devs(cmd, vg->name, (const char *)&vg->id, &devs); + + dm_list_iterate_items(devl, &devs) { + dev = devl->dev; + + lvmcache_get_outdated_mdas(cmd, vg->name, (const char *)&vg->id, dev, &mdas); + + if (mdas) { + dm_list_iterate_items(mda, mdas) { + log_warn("WARNING: wiping mda on outdated PV %s", dev_name(dev)); + + if (!text_wipe_outdated_pv_mda(cmd, dev, mda)) + log_warn("WARNING: failed to wipe mda on outdated PV %s", dev_name(dev)); + } + } + + if (!(label = lvmcache_get_dev_label(dev))) { + log_error("_wipe_outdated_pvs no label for %s", dev_name(dev)); + continue; + } + + info = label->info; + ext_flags = lvmcache_ext_flags(info); + ext_flags &= ~PV_EXT_USED; + lvmcache_set_ext_version(info, PV_HEADER_EXTENSION_VSN); + lvmcache_set_ext_flags(info, ext_flags); + + log_warn("WARNING: wiping header on outdated PV %s", dev_name(dev)); + + if (!label_write(dev, label)) + log_warn("WARNING: failed to wipe header on outdated PV %s", dev_name(dev)); + + lvmcache_del(info); + } + + /* + * A vgremove will involve many vg_write() calls (one for each lv + * removed) but we only need to wipe pvs once, so clear the outdated + * list so it won't be wiped again. + */ + lvmcache_del_outdated_devs(cmd, vg->name, (const char *)&vg->id); +} + /* * After vg_write() returns success, * caller MUST call either vg_commit() or vg_revert() @@ -2929,9 +2936,10 @@ static int _handle_historical_lvs(struct volume_group *vg) int vg_write(struct volume_group *vg) { struct dm_list *mdah; - struct pv_list *pvl, *pvl_safe; + struct pv_list *pvl, *pvl_safe, *new_pvl; struct metadata_area *mda; struct lv_list *lvl; + struct device *mda_dev; int revert = 0, wrote = 0; if (vg_is_shared(vg)) { @@ -2986,6 +2994,9 @@ int vg_write(struct volume_group *vg) return 0; } + if (vg->cmd->wipe_outdated_pvs) + _wipe_outdated_pvs(vg->cmd, vg); + if (critical_section()) log_error(INTERNAL_ERROR "Writing metadata in critical section."); @@ -2994,6 +3005,26 @@ int vg_write(struct volume_group *vg) memlock_unlock(vg->cmd); vg->seqno++; + dm_list_iterate_items(pvl, &vg->pvs) { + int update_pv_header = 0; + + if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) + continue; + + if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv, &update_pv_header)) + continue; + + if (!update_pv_header) + continue; + + if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl)))) + continue; + + new_pvl->pv = pvl->pv; + dm_list_add(&vg->pv_write_list, &new_pvl->list); + log_warn("WARNING: updating PV header on %s for VG %s.", pv_dev_name(pvl->pv), vg->name); + } + dm_list_iterate_items_safe(pvl, pvl_safe, &vg->pv_write_list) { if (!pv_write(vg->cmd, pvl->pv, 1)) return_0; @@ -3002,8 +3033,27 @@ int vg_write(struct volume_group *vg) /* Write to each copy of the metadata area */ dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + if (mda->status & MDA_FAILED) continue; + + /* + * When the scan and vg_read find old metadata in an mda, they + * leave the info struct in lvmcache, and leave the mda in + * info->mdas. That means we use the mda here to write new + * metadata into. This means that a command writing a VG will + * automatically update old metadata to the latest. + * + * This can also happen if the metadata was ignored on this + * dev, and then it's later changed to not ignored, and + * we see the old metadata. + */ + if (lvmcache_has_old_metadata(vg->cmd, vg->name, (const char *)&vg->id, mda_dev)) { + log_warn("WARNING: updating old metadata to %u on %s for VG %s.", + vg->seqno, dev_name(mda_dev), vg->name); + } + if (!mda->ops->vg_write) { log_error("Format does not support writing volume" "group metadata areas"); @@ -3072,6 +3122,7 @@ static int _vg_commit_mdas(struct volume_group *vg) struct metadata_area *mda, *tmda; struct dm_list ignored; int failed = 0; + int good = 0; int cache_updated = 0; /* Rearrange the metadata_areas_in_use so ignored mdas come first. */ @@ -3092,27 +3143,31 @@ static int _vg_commit_mdas(struct volume_group *vg) !mda->ops->vg_commit(vg->fid, vg, mda)) { stack; failed = 1; - } + } else + good++; + /* Update cache first time we succeed */ if (!failed && !cache_updated) { - lvmcache_update_vg(vg, 0); + lvmcache_update_vg_from_write(vg); cache_updated = 1; } } - return cache_updated; + if (good) + return 1; + return 0; } /* Commit pending changes */ int vg_commit(struct volume_group *vg) { - int cache_updated = 0; struct pv_list *pvl; + int ret; - cache_updated = _vg_commit_mdas(vg); + ret = _vg_commit_mdas(vg); set_vg_notify(vg->cmd); - if (cache_updated) { + if (ret) { /* * We need to clear old_name after a successful commit. * The volume_group structure could be reused later. @@ -3126,7 +3181,7 @@ int vg_commit(struct volume_group *vg) } /* If at least one mda commit succeeded, it was committed */ - return cache_updated; + return ret; } /* Don't commit any pending changes */ @@ -3152,14 +3207,6 @@ void vg_revert(struct volume_group *vg) } } -static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) -{ - int *in_use = _in_use; - if (!mda_is_ignored(mda)) - *in_use = 1; - return 1; -} - struct _vg_read_orphan_baton { struct cmd_context *cmd; struct volume_group *vg; @@ -3197,6 +3244,14 @@ struct _vg_read_orphan_baton { */ #if 0 +static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) +{ + int *in_use = _in_use; + if (!mda_is_ignored(mda)) + *in_use = 1; + return 1; +} + static int _check_or_repair_orphan_pv_ext(struct physical_volume *pv, struct lvmcache_info *info, struct _vg_read_orphan_baton *b) @@ -3331,9 +3386,7 @@ static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton) } /* Make orphan PVs look like a VG. */ -struct volume_group *vg_read_orphans(struct cmd_context *cmd, - uint32_t warn_flags, - const char *orphan_vgname) +struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname) { const struct format_type *fmt; struct lvmcache_vginfo *vginfo; @@ -3394,40 +3447,6 @@ struct volume_group *vg_read_orphans(struct cmd_context *cmd, return vg; } -static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg) -{ - struct pv_list *pvl, *pvl2; - - dm_list_iterate_items(pvl, &vg->pvs) { - dm_list_iterate_items(pvl2, all_pvs) { - if (pvl->pv->dev == pvl2->pv->dev) - goto next_pv; - } - - /* - * PV is not on list so add it. - */ - if (!(pvl2 = _copy_pvl(pvmem, pvl))) { - log_error("pv_list allocation for '%s' failed", - pv_dev_name(pvl->pv)); - return 0; - } - dm_list_add(all_pvs, &pvl2->list); - next_pv: - ; - } - - return 1; -} - -static void _free_pv_list(struct dm_list *all_pvs) -{ - struct pv_list *pvl; - - dm_list_iterate_items(pvl, all_pvs) - pvl->pv->fid->fmt->ops->destroy_instance(pvl->pv->fid); -} - static void _destroy_fid(struct format_instance **fid) { if (*fid) { @@ -3447,812 +3466,49 @@ int vg_missing_pv_count(const struct volume_group *vg) return ret; } -static int _check_reappeared_pv(struct volume_group *correct_vg, - struct physical_volume *pv, int act) -{ - struct pv_list *pvl; - int rv = 0; - - /* - * Skip these checks in case the tool is going to deal with missing - * PVs, especially since the resulting messages can be pretty - * confusing. - */ - if (correct_vg->cmd->handles_missing_pvs) - return rv; - - /* - * Skip this if there is no underlying device present for this PV. - */ - if (!pv->dev) - return rv; - - dm_list_iterate_items(pvl, &correct_vg->pvs) - if (pv->dev == pvl->pv->dev && is_missing_pv(pvl->pv)) { - if (act) - log_warn("WARNING: Missing device %s reappeared, updating " - "metadata for VG %s to version %u.", - pv_dev_name(pvl->pv), pv_vg_name(pvl->pv), - correct_vg->seqno); - if (pvl->pv->pe_alloc_count == 0) { - if (act) { - pv->status &= ~MISSING_PV; - pvl->pv->status &= ~MISSING_PV; - } - ++ rv; - } else if (act) - log_warn("WARNING: Device %s still marked missing because of allocated data " - "on it, remove volumes and consider vgreduce --removemissing.", - pv_dev_name(pvl->pv)); - } - - return rv; -} - static int _is_foreign_vg(struct volume_group *vg) { return vg->cmd->system_id && strcmp(vg->system_id, vg->cmd->system_id); } -static int _repair_inconsistent_vg(struct volume_group *vg, uint32_t lockd_state) -{ - unsigned saved_handles_missing_pvs = vg->cmd->handles_missing_pvs; - - if (lvmcache_found_duplicate_pvs()) { - log_debug_metadata("Skip metadata repair with duplicates."); - return 0; - } - - /* Cannot write foreign VGs, the owner will repair it. */ - if (_is_foreign_vg(vg)) { - log_verbose("Skip metadata repair for foreign VG."); - return 0; - } - - if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_verbose("Skip metadata repair for shared VG without exclusive lock."); - return 0; - } - - log_warn("WARNING: Inconsistent metadata found for VG %s - updating to use version %u", vg->name, vg->seqno); - - vg->cmd->handles_missing_pvs = 1; - if (!vg_write(vg)) { - log_error("Automatic metadata correction failed"); - vg->cmd->handles_missing_pvs = saved_handles_missing_pvs; - return 0; - } - - vg->cmd->handles_missing_pvs = saved_handles_missing_pvs; - - if (!vg_commit(vg)) { - log_error("Automatic metadata correction commit failed"); - return 0; - } - - return 1; -} - -static int _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg, struct dm_list *to_check, uint32_t lockd_state) -{ - struct pv_list *pvl, *pvl2; - char uuid[64] __attribute__((aligned(8))); - - if (lvmcache_found_duplicate_pvs()) { - log_debug_metadata("Skip wiping outdated PVs with duplicates."); - return 0; - } - - /* - * Cannot write foreign VGs, the owner will repair it. - * Also, if another host is updating its VG, we may read - * the PVs while some are written but not others, making - * some PVs look outdated to us just because we're reading - * the VG while it's only partially written out. - */ - if (_is_foreign_vg(vg)) { - log_debug_metadata("Skip wiping outdated PVs for foreign VG."); - return 0; - } - - if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_verbose("Skip wiping outdated PVs for shared VG without exclusive lock."); - return 0; - } - - dm_list_iterate_items(pvl, to_check) { - dm_list_iterate_items(pvl2, &vg->pvs) { - if (pvl->pv->dev == pvl2->pv->dev) - goto next_pv; - } - - - if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) - return_0; - log_warn("WARNING: Removing PV %s (%s) that no longer belongs to VG %s", - pv_dev_name(pvl->pv), uuid, vg->name); - if (!pv_write_orphan(cmd, pvl->pv)) - return_0; -next_pv: - ; - } - return 1; -} - -static int _check_or_repair_pv_ext(struct cmd_context *cmd, - struct volume_group *vg, - uint32_t lockd_state, - int repair, int *inconsistent_pvs) +static int _check_pv_ext(struct cmd_context *cmd, struct volume_group *vg) { - char uuid[64] __attribute__((aligned(8))); struct lvmcache_info *info; uint32_t ext_version, ext_flags; struct pv_list *pvl; - unsigned pvs_fixed = 0; - int r = 0; - *inconsistent_pvs = 0; + if (_is_foreign_vg(vg)) + return 1; + + if (vg_is_shared(vg)) + return 1; dm_list_iterate_items(pvl, &vg->pvs) { - /* Missing PV - nothing to do. */ if (is_missing_pv(pvl->pv)) continue; - if (!pvl->pv->dev) { - /* is_missing_pv doesn't catch NULL dev */ - memset(&uuid, 0, sizeof(uuid)); - if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) - goto_out; - log_warn("WARNING: Not repairing PV %s with missing device.", uuid); + /* is_missing_pv doesn't catch NULL dev */ + if (!pvl->pv->dev) continue; - } - if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0))) { - log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv)); - goto out; - } + if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0))) + continue; ext_version = lvmcache_ext_version(info); - if (ext_version < 2) + if (ext_version < PV_HEADER_EXTENSION_VSN) { + log_warn("WARNING: PV %s in VG %s is using an old PV header, modify the VG to update.", + dev_name(pvl->pv->dev), vg->name); continue; + } ext_flags = lvmcache_ext_flags(info); if (!(ext_flags & PV_EXT_USED)) { - if (!repair) { - *inconsistent_pvs = 1; - /* we're not repairing now, so no need to - * check further PVs - inconsistent_pvs is already - * set and that will trigger the repair next time */ - return 1; - } - - if (_is_foreign_vg(vg)) { - log_verbose("Skip repair of PV %s that is in foreign " - "VG %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - *inconsistent_pvs = 1; - } else if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_warn("Skip repair of PV %s that is in shared " - "VG %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - *inconsistent_pvs = 1; - } else { - log_warn("WARNING: Repairing Physical Volume %s that is " - "in Volume Group %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - - /* pv write will set correct ext_flags */ - if (!pv_write(cmd, pvl->pv, 1)) { - *inconsistent_pvs = 1; - log_error("Failed to repair physical volume \"%s\".", - pv_dev_name(pvl->pv)); - goto out; - } - pvs_fixed++; - } - } - } - - r = 1; -out: - if ((pvs_fixed > 0) && !_repair_inconsistent_vg(vg, lockd_state)) - return_0; - - return r; -} - -/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct - * inconsistent metadata on disk (i.e. the VG write lock is held). - * This guarantees only consistent metadata is returned. - * If consistent is 0, caller must check whether consistent == 1 on return - * and take appropriate action if it isn't (e.g. abort; get write lock - * and call vg_read_internal again). - * - * If precommitted is set, use precommitted metadata if present. - * - * Either of vgname or vgid may be NULL. - * - * Note: vginfo structs must not be held or used as parameters - * across the call to this function. - */ -static struct volume_group *_vg_read(struct cmd_context *cmd, - const char *vgname, - const char *vgid, - uint32_t lockd_state, - uint32_t warn_flags, - int enable_repair, - int *mdas_consistent, - unsigned precommitted) -{ - struct format_instance *fid = NULL; - struct format_instance_ctx fic; - const struct format_type *fmt; - struct volume_group *vg, *correct_vg = NULL; - struct metadata_area *mda; - struct lvmcache_info *info; - int inconsistent = 0; - int inconsistent_vgid = 0; - int inconsistent_pvs = 0; - int inconsistent_mdas = 0; - int inconsistent_mda_count = 0; - int strip_historical_lvs = enable_repair; - int update_old_pv_ext = enable_repair; - unsigned use_precommitted = precommitted; - struct dm_list *pvids; - struct pv_list *pvl; - struct dm_list all_pvs; - char uuid[64] __attribute__((aligned(8))); - int skipped_rescan = 0; - struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ - unsigned use_previous_vg; - - *mdas_consistent = 1; - - if (is_orphan_vg(vgname)) { - log_very_verbose("Reading VG %s", vgname); - - if (use_precommitted) { - log_error(INTERNAL_ERROR "vg_read_internal requires vgname " - "with pre-commit."); - return NULL; - } - return vg_read_orphans(cmd, warn_flags, vgname); - } - - uuid[0] = '\0'; - if (vgid && !id_write_format((const struct id*)vgid, uuid, sizeof(uuid))) - stack; - - log_very_verbose("Reading VG %s %s", vgname ?: "<no name>", vgid ? uuid : "<no vgid>"); - - /* - * Rescan the devices that are associated with this vg in lvmcache. - * This repeats what was done by the command's initial label scan, - * but only the devices associated with this VG. - * - * The lvmcache info about these devs is from the initial label scan - * performed by the command before the vg lock was held. Now the VG - * lock is held, so we rescan all the info from the devs in case - * something changed between the initial scan and now that the lock - * is held. - * - * Some commands (e.g. reporting) are fine reporting data read by - * the label scan. It doesn't matter if the devs changed between - * the label scan and here, we can report what was seen in the - * scan, even though it is the old state, since we will not be - * making any modifications. If the VG was being modified during - * the scan, and caused us to see inconsistent metadata on the - * different PVs in the VG, then we do want to rescan the devs - * here to get a consistent view of the VG. Note that we don't - * know if the scan found all the PVs in the VG at this point. - * We don't know that until vg_read looks at the list of PVs in - * the metadata and compares that to the devices found by the scan. - * - * It's possible that a change made to the VG during scan was - * adding or removing a PV from the VG. In this case, the list - * of devices associated with the VG in lvmcache would change - * due to the rescan. - * - * The devs in the VG may be persistently inconsistent due to some - * previous problem. In this case, rescanning the labels here will - * find the same inconsistency. The VG repair (mistakenly done by - * vg_read below) is supposed to fix that. - * - * FIXME: sort out the usage of the global lock (which is mixed up - * with the orphan lock), and when we can tell that the global - * lock is taken prior to the label scan, and still held here, - * we can also skip the rescan in that case. - */ - if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { - /* the skip rescan special case is for clvmd vg_read_by_vgid */ - /* FIXME: this is not a warn flag, pass this differently */ - if (warn_flags & SKIP_RESCAN) - goto find_vg; - skipped_rescan = 0; - log_debug_metadata("Rescanning devices for %s", vgname); - lvmcache_label_rescan_vg(cmd, vgname, vgid); - } else { - log_debug_metadata("Skipped rescanning devices for %s", vgname); - skipped_rescan = 1; - } - - find_vg: - - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { - log_debug_metadata("Cache did not find fmt for vgname %s", vgname); - return_NULL; - } - - /* Now determine the correct vgname if none was supplied */ - if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { - log_debug_metadata("Cache did not find VG name from vgid %s", uuid); - return_NULL; - } - - /* Determine the correct vgid if none was supplied */ - if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) { - log_debug_metadata("Cache did not find VG vgid from name %s", vgname); - return_NULL; - } - - if (use_precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; - - /* - * A "format instance" is an abstraction for a VG location, - * i.e. where a VG's metadata exists on disk. - * - * An fic (format_instance_ctx) is a temporary struct used - * to create an fid (format_instance). The fid hangs around - * and is used to create a 'vg' to which it connected (vg->fid). - * - * The 'fic' describes a VG in terms of fmt/name/id. - * - * The 'fid' describes a VG in more detail than the fic, - * holding information about where to find the VG metadata. - * - * The 'vg' describes the VG in the most detail representing - * all the VG metadata. - * - * The fic and fid are set up by create_instance() to describe - * the VG location. This happens before the VG metadata is - * assembled into the more familiar struct volume_group "vg". - * - * The fid has one main purpose: to keep track of the metadata - * locations for a given VG. It does this by putting 'mda' - * structs on fid->metadata_areas_in_use, which specify where - * metadata is located on disk. It gets this information - * (metadata locations for a specific VG) from the command's - * initial label scan. The info is passed indirectly via - * lvmcache info/vginfo structs, which are created by the - * label scan and then copied into fid by create_instance(). - */ - - /* create format instance with appropriate metadata area */ - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); - return NULL; - } - - /* Store pvids for later so we can check if any are missing */ - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - _destroy_fid(&fid); - return_NULL; - } - - /* - * We use the fid globally here so prevent the release_vg - * call to destroy the fid - we may want to reuse it! - */ - fid->ref_count++; - /* Ensure contents of all metadata areas match - else do recovery */ - inconsistent_mda_count=0; - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - struct device *mda_dev = mda_get_device(mda); - - use_previous_vg = 0; - - log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev)); - - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; - continue; - } - - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; - continue; - } - - if (!correct_vg) { - correct_vg = vg; - continue; - } - - /* FIXME Also ensure contents same - checksum compare? */ - if (correct_vg->seqno != vg->seqno) { - if (cmd->metadata_read_only || skipped_rescan) - log_warn("Not repairing metadata for VG %s.", vgname); - else - inconsistent = 1; - - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } - - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } - } - fid->ref_count--; - - /* Ensure every PV in the VG was in the cache */ - if (correct_vg) { - /* - * Update the seqno from the cache, for the benefit of - * retro-style metadata formats like LVM1. - */ - // correct_vg->seqno = seqno > correct_vg->seqno ? seqno : correct_vg->seqno; - - /* - * If the VG has PVs without mdas, or ignored mdas, they may - * still be orphans in the cache: update the cache state here, - * and update the metadata lists in the vg. - */ - if (!inconsistent && - dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) { - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (!pvl->pv->dev) { - inconsistent_pvs = 1; - break; - } - - if (str_list_match_item(pvids, pvl->pv->dev->pvid)) - continue; - - /* - * PV not marked as belonging to this VG in cache. - * Check it's an orphan without metadata area - * not ignored. - */ - if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 1)) || - !lvmcache_is_orphan(info)) { - inconsistent_pvs = 1; - break; - } - - if (lvmcache_mda_count(info)) { - if (!lvmcache_fid_add_mdas_pv(info, fid)) { - release_vg(correct_vg); - return_NULL; - } - - log_debug_metadata("Empty mda found for VG %s on %s.", - vgname, dev_name(pvl->pv->dev)); - -#if 0 - /* - * If we are going to do any repair we have to be using - * the latest metadata on disk, so we have to rescan devs - * if we skipped that at the start of the vg_read. We'll - * likely come back through here, but without having - * skipped_rescan. - * - * FIXME: in some cases we don't want to do this. - */ - if (skipped_rescan && cmd->can_use_one_scan) { - log_debug_metadata("Restarting read to rescan devs."); - cmd->can_use_one_scan = 0; - release_vg(correct_vg); - correct_vg = NULL; - lvmcache_del(info); - label_read(pvl->pv->dev); - goto restart_scan; - } -#endif - - if (inconsistent_mdas) - continue; - - /* - * If any newly-added mdas are in-use then their - * metadata needs updating. - */ - lvmcache_foreach_mda(info, _check_mda_in_use, - &inconsistent_mdas); - } - } - - /* If the check passed, let's update VG and recalculate pvids */ - if (!inconsistent_pvs) { - log_debug_metadata("Updating cache for PVs without mdas " - "in VG %s.", vgname); - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED); - - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - release_vg(correct_vg); - return_NULL; - } - } - } - - fid->ref_count++; - if (dm_list_size(&correct_vg->pvs) != - dm_list_size(pvids) + vg_missing_pv_count(correct_vg)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - - if (prioritized_section()) - inconsistent = 1; - else { - release_vg(correct_vg); - correct_vg = NULL; - } - } else dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (is_missing_pv(pvl->pv)) - continue; - if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - release_vg(correct_vg); - correct_vg = NULL; - break; - } - } - - if (correct_vg && inconsistent_mdas) { - release_vg(correct_vg); - correct_vg = NULL; - } - fid->ref_count--; - } - - dm_list_init(&all_pvs); - - /* Failed to find VG where we expected it - full scan and retry */ - if (!correct_vg) { - /* - * Free outstanding format instance that remained unassigned - * from previous step where we tried to get the "correct_vg", - * but we failed to do so (so there's a dangling fid now). - */ - _destroy_fid(&fid); - vg_fmtdata = NULL; - - inconsistent = 0; - - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) - return_NULL; - - if (precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; - - /* create format instance with appropriate metadata area */ - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); - return NULL; - } - - /* - * We use the fid globally here so prevent the release_vg - * call to destroy the fid - we may want to reuse it! - */ - fid->ref_count++; - /* Ensure contents of all metadata areas match - else recover */ - inconsistent_mda_count=0; - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - use_previous_vg = 0; - - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; - continue; - } - - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; - continue; - } - - if (!correct_vg) { - correct_vg = vg; - if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - return_NULL; - } - continue; - } - - if (!id_equal(&vg->id, &correct_vg->id)) { - inconsistent = 1; - inconsistent_vgid = 1; - } - - /* FIXME Also ensure contents same - checksums same? */ - if (correct_vg->seqno != vg->seqno) { - /* Ignore inconsistent seqno if told to skip repair logic */ - if (cmd->metadata_read_only || skipped_rescan) - log_warn("Not repairing metadata for VG %s.", vgname); - else - inconsistent = 1; - - if (!_update_pv_list(cmd->mem, &all_pvs, vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - release_vg(correct_vg); - return_NULL; - } - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } - - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } - } - fid->ref_count--; - - /* Give up looking */ - if (!correct_vg) { - _free_pv_list(&all_pvs); - _destroy_fid(&fid); - return_NULL; - } - } - - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg(correct_vg, (correct_vg->status & PRECOMMITTED)); - - if (inconsistent) { - /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */ - if (use_precommitted) { - log_error("Inconsistent pre-commit metadata copies " - "for volume group %s", vgname); - - /* - * Check whether all of the inconsistent MDAs were on - * MISSING PVs -- in that case, we should be safe. - */ - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - if (mda->status & MDA_INCONSISTENT) { - log_debug_metadata("Checking inconsistent MDA: %s", dev_name(mda_get_device(mda))); - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (mda_get_device(mda) == pvl->pv->dev && - (pvl->pv->status & MISSING_PV)) - --inconsistent_mda_count; - } - } - } - - if (inconsistent_mda_count < 0) - log_error(INTERNAL_ERROR "Too many inconsistent MDAs."); - - if (!inconsistent_mda_count) { - _free_pv_list(&all_pvs); - return correct_vg; - } - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; - } - - if (!enable_repair) { - _free_pv_list(&all_pvs); - *mdas_consistent = 0; - return correct_vg; - } - - if (skipped_rescan) { - log_warn("Not repairing metadata for VG %s.", vgname); - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return_NULL; - } - - /* Don't touch if vgids didn't match */ - if (inconsistent_vgid) { - log_warn("WARNING: Inconsistent metadata UUIDs found for volume group %s.", vgname); - _free_pv_list(&all_pvs); - *mdas_consistent = 0; - return correct_vg; - } - - /* - * If PV is marked missing but we found it, - * update metadata and remove MISSING flag - */ - dm_list_iterate_items(pvl, &all_pvs) - _check_reappeared_pv(correct_vg, pvl->pv, 1); - - if (!_repair_inconsistent_vg(correct_vg, lockd_state)) { - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; - } - - if (!_wipe_outdated_pvs(cmd, correct_vg, &all_pvs, lockd_state)) { - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return_NULL; - } - } - - _free_pv_list(&all_pvs); - - if (vg_missing_pv_count(correct_vg)) { - log_verbose("There are %d physical volumes missing.", - vg_missing_pv_count(correct_vg)); - vg_mark_partial_lvs(correct_vg, 1); - } - - if ((correct_vg->status & PVMOVE) && !pvmove_mode()) { - log_error("Interrupted pvmove detected in volume group %s.", - correct_vg->name); - log_print("Please restore the metadata by running vgcfgrestore."); - release_vg(correct_vg); - return NULL; - } - - /* We have the VG now finally, check if PV ext info is in sync with VG metadata. */ - if (!_check_or_repair_pv_ext(cmd, correct_vg, lockd_state, skipped_rescan ? 0 : enable_repair, - &inconsistent_pvs)) { - release_vg(correct_vg); - return_NULL; - } - - if (correct_vg && enable_repair && !skipped_rescan) { - if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } - - if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) { - release_vg(correct_vg); - return_NULL; + log_warn("WARNING: PV %s in VG %s is missing the used flag in PV header.", + dev_name(pvl->pv->dev), vg->name); } } - if (inconsistent_pvs) - *mdas_consistent = 0; - - return correct_vg; + return 1; } #define DEV_LIST_DELIM ", " @@ -4333,7 +3589,7 @@ static int _check_devs_used_correspond_with_lv(struct dm_pool *mem, struct dm_li return 1; } -static int _check_devs_used_correspond_with_vg(struct volume_group *vg) +static void _check_devs_used_correspond_with_vg(struct volume_group *vg) { struct dm_pool *mem; char vgid[ID_LEN + 1]; @@ -4343,9 +3599,6 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) struct device_list *dl; int found_inconsistent = 0; - if (is_orphan_vg(vg->name)) - return 1; - strncpy(vgid, (const char *) vg->id.uuid, sizeof(vgid)); vgid[ID_LEN] = '\0'; @@ -4366,7 +3619,7 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) } if (!(list = dev_cache_get_dev_list_for_vgid(vgid))) - return 1; + return; dm_list_iterate_items(dl, list) { if (!(dl->dev->flags & DEV_OPEN_FAILURE) && @@ -4378,79 +3631,19 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) if (found_inconsistent) { if (!(mem = dm_pool_create("vg_devs_check", 1024))) - return_0; + return; dm_list_iterate_items(lvl, &vg->lvs) { if (!_check_devs_used_correspond_with_lv(mem, list, lvl->lv)) { dm_pool_destroy(mem); - return_0; + return; } } dm_pool_destroy(mem); } - return 1; -} - -struct volume_group *vg_read_internal(struct cmd_context *cmd, - const char *vgname, const char *vgid, - uint32_t lockd_state, uint32_t warn_flags, - int enable_repair, - int *mdas_consistent) -{ - struct volume_group *vg; - struct lv_list *lvl; - - if (!(vg = _vg_read(cmd, vgname, vgid, lockd_state, - warn_flags, enable_repair, mdas_consistent, 0))) - goto_out; - - if (!check_pv_dev_sizes(vg)) - log_warn("One or more devices used as PVs in VG %s " - "have changed sizes.", vg->name); - - if (!check_pv_segments(vg)) { - log_error(INTERNAL_ERROR "PV segments corrupted in %s.", - vg->name); - release_vg(vg); - vg = NULL; - goto out; - } - - dm_list_iterate_items(lvl, &vg->lvs) { - if (!check_lv_segments(lvl->lv, 0)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - dm_list_iterate_items(lvl, &vg->lvs) { - /* - * Checks that cross-reference other LVs. - */ - if (!check_lv_segments(lvl->lv, 1)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - (void) _check_devs_used_correspond_with_vg(vg); -out: - if (!*mdas_consistent && (warn_flags & WARN_INCONSISTENT)) { - if (is_orphan_vg(vgname)) - log_warn("WARNING: Found inconsistent standalone Physical Volumes."); - else - log_warn("WARNING: Volume Group %s is not consistent.", vgname); - } - - return vg; + return; } void free_pv_fid(struct physical_volume *pv) @@ -4699,10 +3892,6 @@ uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status) { uint32_t failure = 0; - if ((status & CLUSTERED) && !_access_vg_clustered(vg->cmd, vg)) - /* Return because other flags are considered undefined. */ - return FAILED_CLUSTERED; - if ((status & EXPORTED_VG) && vg_is_exported(vg)) { log_error("Volume group %s is exported", vg->name); @@ -4734,48 +3923,6 @@ int vg_check_status(const struct volume_group *vg, uint64_t status) return !vg_bad_status_bits(vg, status); } -/* - * VG is left unlocked on failure - */ -static struct volume_group *_recover_vg(struct cmd_context *cmd, - const char *vg_name, const char *vgid, - int is_shared, uint32_t lockd_state) -{ - int mdas_consistent = 0; - struct volume_group *vg; - uint32_t state = 0; - - unlock_vg(cmd, NULL, vg_name); - - if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL)) - return_NULL; - - /* - * Convert vg lock in lvmlockd from sh to ex. - */ - if (is_shared && !(lockd_state & LDST_FAIL) && !(lockd_state & LDST_EX)) { - log_debug("Upgrade lvmlockd lock to repair vg %s.", vg_name); - if (!lockd_vg(cmd, vg_name, "ex", 0, &state)) { - log_warn("Skip repair for shared VG without exclusive lock."); - return NULL; - } - lockd_state |= LDST_EX; - } - - if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, 0, 1, &mdas_consistent))) { - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - if (!mdas_consistent) { - release_vg(vg); - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - return (struct volume_group *)vg; -} - static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id) { const struct dm_config_node *cn; @@ -4805,9 +3952,6 @@ static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg, uint32_t lockd_state, uint32_t *failure) { - if (!is_real_vg(vg->name)) - return 1; - if (cmd->lockd_vg_disable) return 1; @@ -4954,225 +4098,15 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg) } /* - * FIXME: move vg_bad_status_bits() checks in here. - */ -static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg, - uint32_t lockd_state, uint32_t *failure) -{ - if (!is_real_vg(vg->name)) { - return 1; - } - - if (!_access_vg_clustered(cmd, vg)) { - *failure |= FAILED_CLUSTERED; - return 0; - } - - if (!_access_vg_lock_type(cmd, vg, lockd_state, failure)) { - /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */ - return 0; - } - - if (!_access_vg_systemid(cmd, vg)) { - *failure |= FAILED_SYSTEMID; - return 0; - } - - return 1; -} - -/* - * Consolidated locking, reading, and status flag checking. - * - * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in - * read_flags will return it with FAILED_INCONSISTENT set instead of - * giving you nothing. - * - * Use vg_read_error(vg) to determine the result. Nonzero means there were - * problems reading the volume group. - * Zero value means that the VG is open and appropriate locks are held. - */ -static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, - uint32_t lock_flags, - uint64_t status_flags, - uint32_t read_flags, - uint32_t lockd_state) -{ - struct volume_group *vg = NULL; - uint32_t failure = 0; - uint32_t warn_flags = 0; - int mdas_consistent = 1; - int enable_repair = 1; - int is_shared = 0; - int skip_lock = is_orphan_vg(vg_name) && (read_flags & PROCESS_SKIP_ORPHAN_LOCK); - - if ((read_flags & READ_ALLOW_INCONSISTENT) || (lock_flags != LCK_VG_WRITE)) { - enable_repair = 0; - warn_flags |= WARN_INCONSISTENT; - } - - if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) { - log_error("Volume group name \"%s\" has invalid characters.", - vg_name); - return NULL; - } - - if (!skip_lock && - !lock_vol(cmd, vg_name, lock_flags, NULL)) { - log_error("Can't get lock for %s", vg_name); - return _vg_make_handle(cmd, vg, FAILED_LOCKING); - } - - if (skip_lock) - log_very_verbose("Locking %s already done", vg_name); - - if (is_orphan_vg(vg_name)) - status_flags &= ~LVM_WRITE; - - if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, warn_flags, enable_repair, &mdas_consistent))) { - if (!(read_flags & READ_OK_NOTFOUND)) - log_error("Volume group \"%s\" not found", vg_name); - failure |= FAILED_NOTFOUND; - goto bad; - } - - if (!_vg_access_permitted(cmd, vg, lockd_state, &failure)) - goto bad; - - /* - * If we called vg_read_internal above without repair enabled, - * and the read found inconsistent mdas, then then get a write/ex - * lock and call it again with repair enabled so it will fix - * the inconsistent mdas. - * - * FIXME: factor vg repair out of vg_read. The vg_read caller - * should get an error about the vg have problems and then call - * a repair-specific function if it wants to. (NB there are - * other kinds of repairs hidden in _vg_read that should be - * pulled out in addition to _recover_vg). - */ - if (!mdas_consistent && !enable_repair) { - is_shared = vg_is_shared(vg); - release_vg(vg); - - if (!(vg = _recover_vg(cmd, vg_name, vgid, is_shared, lockd_state))) { - if (is_orphan_vg(vg_name)) - log_error("Recovery of standalone physical volumes failed."); - else - log_error("Recovery of volume group \"%s\" failed.", vg_name); - failure |= FAILED_RECOVERY; - goto bad_no_unlock; - } - } - - /* - * Check that the tool can handle tricky cases -- missing PVs and - * unknown segment types. - */ - - if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s while PVs are missing.", vg->name); - log_error("Consider vgreduce --removemissing."); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; - } - - if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s with unknown segments in it!", - vg->name); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; - } - - failure |= vg_bad_status_bits(vg, status_flags); - if (failure) - goto_bad; - - if (!(vg = _vg_make_handle(cmd, vg, failure)) || vg_read_error(vg)) - if (!skip_lock) - unlock_vg(cmd, vg, vg_name); - - return vg; - -bad: - if (!skip_lock) - unlock_vg(cmd, vg, vg_name); - -bad_no_unlock: - return _vg_make_handle(cmd, vg, failure); -} - -/* - * vg_read: High-level volume group metadata read function. - * - * vg_read_error() must be used on any handle returned to check for errors. - * - * - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT - * - VG is read-only: FAILED_READ_ONLY - * - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED - * - VG is not RESIZEABLE: FAILED_RESIZEABLE - * - locking failed: FAILED_LOCKING - * - * On failures, all locks are released, unless one of the following applies: - * - vgname_is_locked(lock_name) is true - * FIXME: remove the above 2 conditions if possible and make an error always - * release the lock. - * - * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE. - * - * Checking for VG existence: - * - * FIXME: We want vg_read to attempt automatic recovery after acquiring a - * temporary write lock: if that fails, we bail out as usual, with failed & - * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in - * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with - * *consistent = 1. - */ -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - uint64_t status_flags = UINT64_C(0); - uint32_t lock_flags = LCK_VG_READ; - - if (read_flags & READ_FOR_UPDATE) { - status_flags |= EXPORTED_VG | LVM_WRITE; - lock_flags = LCK_VG_WRITE; - } - - if (read_flags & READ_ALLOW_EXPORTED) - status_flags &= ~EXPORTED_VG; - - return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status_flags, read_flags, lockd_state); -} - -/* - * A high-level volume group metadata reading function. Open a volume group for - * later update (this means the user code can change the metadata and later - * request the new metadata to be written and committed). - */ -struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - struct volume_group *vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state); - - if (!vg || vg_read_error(vg)) - stack; - - return vg; -} - -/* * Test the validity of a VG handle returned by vg_read() or vg_read_for_update(). + * FIXME: drop this function */ uint32_t vg_read_error(struct volume_group *vg_handle) { if (!vg_handle) return FAILED_ALLOCATION; - return vg_handle->read_status; + return SUCCESS; } /* @@ -5668,3 +4602,631 @@ int lv_on_pmem(struct logical_volume *lv) return 0; } +static struct volume_group *_vg_read(struct cmd_context *cmd, + const char *vgname, + const char *vgid, + unsigned precommitted) +{ + const struct format_type *fmt = cmd->fmt; + struct format_instance *fid = NULL; + struct format_instance_ctx fic; + struct volume_group *vg, *vg_ret = NULL; + struct metadata_area *mda, *mda2; + unsigned use_precommitted = precommitted; + struct device *mda_dev, *dev_ret; + struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ + int found_old_metadata = 0; + unsigned use_previous_vg; + + log_debug_metadata("Reading VG %s %s", vgname ?: "<no name>", vgid ?: "<no vgid>"); + + /* + * Rescan the devices that are associated with this vg in lvmcache. + * This repeats what was done by the command's initial label scan, + * but only the devices associated with this VG. + * + * The lvmcache info about these devs is from the initial label scan + * performed by the command before the vg lock was held. Now the VG + * lock is held, so we rescan all the info from the devs in case + * something changed between the initial scan and now that the lock + * is held. + * + * Some commands (e.g. reporting) are fine reporting data read by + * the label scan. It doesn't matter if the devs changed between + * the label scan and here, we can report what was seen in the + * scan, even though it is the old state, since we will not be + * making any modifications. If the VG was being modified during + * the scan, and caused us to see inconsistent metadata on the + * different PVs in the VG, then we do want to rescan the devs + * here to get a consistent view of the VG. Note that we don't + * know if the scan found all the PVs in the VG at this point. + * We don't know that until vg_read looks at the list of PVs in + * the metadata and compares that to the devices found by the scan. + * + * It's possible that a change made to the VG during scan was + * adding or removing a PV from the VG. In this case, the list + * of devices associated with the VG in lvmcache would change + * due to the rescan. + * + * The devs in the VG may be persistently inconsistent due to some + * previous problem. In this case, rescanning the labels here will + * find the same inconsistency. The VG repair (mistakenly done by + * vg_read below) is supposed to fix that. + * + * FIXME: sort out the usage of the global lock (which is mixed up + * with the orphan lock), and when we can tell that the global + * lock is taken prior to the label scan, and still held here, + * we can also skip the rescan in that case. + */ + if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { + log_debug_metadata("Rescanning devices for %s", vgname); + lvmcache_label_rescan_vg(cmd, vgname, vgid); + } else { + log_debug_metadata("Skipped rescanning devices for %s", vgname); + } + + /* Now determine the correct vgname if none was supplied */ + if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { + log_debug_metadata("Cache did not find VG name from vgid %s", vgid); + return NULL; + } + + /* Determine the correct vgid if none was supplied */ + if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) { + log_debug_metadata("Cache did not find VG vgid from name %s", vgname); + return NULL; + } + + /* + * A "format instance" is an abstraction for a VG location, + * i.e. where a VG's metadata exists on disk. + * + * An fic (format_instance_ctx) is a temporary struct used + * to create an fid (format_instance). The fid hangs around + * and is used to create a 'vg' to which it connected (vg->fid). + * + * The 'fic' describes a VG in terms of fmt/name/id. + * + * The 'fid' describes a VG in more detail than the fic, + * holding information about where to find the VG metadata. + * + * The 'vg' describes the VG in the most detail representing + * all the VG metadata. + * + * The fic and fid are set up by create_instance() to describe + * the VG location. This happens before the VG metadata is + * assembled into the more familiar struct volume_group "vg". + * + * The fid has one main purpose: to keep track of the metadata + * locations for a given VG. It does this by putting 'mda' + * structs on fid->metadata_areas_in_use, which specify where + * metadata is located on disk. It gets this information + * (metadata locations for a specific VG) from the command's + * initial label scan. The info is passed indirectly via + * lvmcache info/vginfo structs, which are created by the + * label scan and then copied into fid by create_instance(). + * + * FIXME: just use the vginfo/info->mdas lists directly instead + * of copying them into the fid list. + */ + + fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; + fic.context.vg_ref.vg_name = vgname; + fic.context.vg_ref.vg_id = vgid; + + /* + * Sets up the metadata areas that we need to read below. + * For each info in vginfo->infos, for each mda in info->mdas, + * (found during label_scan), copy the mda to fid->metadata_areas_in_use + */ + if (!(fid = fmt->ops->create_instance(fmt, &fic))) { + log_error("Failed to create format instance"); + return NULL; + } + + /* + * We use the fid globally here so prevent the release_vg + * call to destroy the fid - we may want to reuse it! + */ + fid->ref_count++; + + + /* + * label_scan found PVs for this VG and set up lvmcache to describe the + * VG/PVs that we use here to read the VG. It created 'vginfo' for the + * VG, and created an 'info' attached to vginfo for each PV. It also + * added a metadata_area struct to info->mdas for each metadata area it + * found on the PV. The info->mdas structs are copied to + * fid->metadata_areas_in_use by create_instance above, and here we + * read VG metadata from each of those mdas. + */ + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + + /* I don't think this can happen */ + if (!mda_dev) { + log_warn("Ignoring metadata for VG %s from missing dev.", vgname); + continue; + } + + use_previous_vg = 0; + + if (use_precommitted) { + log_debug_metadata("Reading VG %s precommit metadata from %s %llu", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + + vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg); + + if (!vg && !use_previous_vg) { + log_warn("WARNING: Reading VG %s precommit on %s failed.", vgname, dev_name(mda_dev)); + vg_fmtdata = NULL; + continue; + } + } else { + log_debug_metadata("Reading VG %s metadata from %s %llu", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + + vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg); + + if (!vg && !use_previous_vg) { + log_warn("WARNING: Reading VG %s on %s failed.", vgname, dev_name(mda_dev)); + vg_fmtdata = NULL; + continue; + } + } + + if (!vg) + continue; + + if (vg && !vg_ret) { + vg_ret = vg; + dev_ret = mda_dev; + continue; + } + + /* + * Use the newest copy of the metadata found on any mdas. + * Above, We could check if the scan found an old metadata + * seqno in this mda and just skip reading it again; then these + * seqno checks would just be sanity checks. + */ + + if (vg->seqno == vg_ret->seqno) { + release_vg(vg); + continue; + } + + if (vg->seqno > vg_ret->seqno) { + log_warn("WARNING: ignoring old metadata seqno %u on %s vs new metadata seqno %u on %s for VG %s.", + vg_ret->seqno, dev_name(dev_ret), + vg->seqno, dev_name(mda_dev), vg->name); + found_old_metadata = 1; + release_vg(vg_ret); + vg_ret = vg; + dev_ret = mda_dev; + vg_fmtdata = NULL; + continue; + } + + if (vg_ret->seqno > vg->seqno) { + log_warn("WARNING: ignoring old metadata seqno %u on %s vs new metadata seqno %u on %s for VG %s.", + vg->seqno, dev_name(mda_dev), + vg_ret->seqno, dev_name(dev_ret), vg->name); + found_old_metadata = 1; + release_vg(vg); + vg_fmtdata = NULL; + continue; + } + } + + if (found_old_metadata) + log_warn("WARNING: Inconsistent metadata found for VG %s", vgname); + + vg = NULL; + + if (vg_ret) + set_pv_devices(fid, vg_ret); + + fid->ref_count--; + + if (!vg_ret) { + _destroy_fid(&fid); + goto_out; + } + + /* + * Correct the lvmcache representation of the VG using the metadata + * that we have chosen above (vg_ret). + * + * The vginfo/info representation created by label_scan was not + * entirely correct since it did not use the full or final metadata. + * + * In lvmcache, PVs with no mdas were not attached to the vginfo during + * label_scan because label_scan didn't know where they should go. Now + * that we have the VG metadata we can tell, so use that to attach those + * info's to the vginfo. + * + * Also, outdated PVs that have been removed from the VG were incorrectly + * attached to the vginfo during label_scan, and now need to be detached. + */ + lvmcache_update_vg_from_read(vg_ret, vg_ret->status & PRECOMMITTED); + + /* + * lvmcache_update_vg identified outdated mdas that we read above that + * are not actually part of the VG. Remove those outdated mdas from + * the fid's list of mdas. + */ + dm_list_iterate_items_safe(mda, mda2, &fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + if (lvmcache_is_outdated_dev(cmd, vg_ret->name, (const char *)&vg_ret->id, mda_dev)) { + log_debug_metadata("vg_read %s ignore mda for outdated dev %s", + vg_ret->name, dev_name(mda_dev)); + /* FIXME: use _del_mda */ + dm_list_del(&mda->list); + } + } + +out: + return vg_ret; +} + +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, + uint32_t *error_flags, struct volume_group **error_vg) +{ + struct volume_group *vg = NULL; + struct lv_list *lvl; + struct pv_list *pvl; + int missing_pv_dev = 0; + int missing_pv_flag = 0; + uint32_t failure = 0; + int writing = (read_flags & READ_FOR_UPDATE); + + /* + * FIXME: is this function still used to read orphans? + * If so, replace any callers with vg_read_orphans. + */ + if (is_orphan_vg(vg_name)) { + int skip_lock = read_flags & PROCESS_SKIP_ORPHAN_LOCK; + log_very_verbose("Reading orphan VG %s", vg_name); + + if (!skip_lock && !lock_vol(cmd, vg_name, LCK_VG_READ, NULL)) + return_NULL; + + vg = vg_read_orphans(cmd, vg_name); + + if (!skip_lock) + unlock_vg(cmd, vg, vg_name); + + *error_flags = 0; + *error_vg = NULL; + return vg; + } + + if (!validate_name(vg_name)) { + log_error("Volume group name \"%s\" has invalid characters.", vg_name); + return NULL; + } + + if (!lock_vol(cmd, vg_name, writing ? LCK_VG_WRITE : LCK_VG_READ, NULL)) { + log_error("Can't get lock for %s", vg_name); + failure |= FAILED_LOCKING; + goto_bad; + } + + if (!(vg = _vg_read(cmd, vg_name, vgid, 0))) { + /* Some callers don't care if the VG doesn't exist and don't want an error message. */ + if (!(read_flags & READ_OK_NOTFOUND)) + log_error("Volume group \"%s\" not found", vg_name); + failure |= FAILED_NOTFOUND; + goto_bad; + } + + /* + * Check and warn if PV ext info is not in sync with VG metadata + * (vg_write fixes.) + */ + _check_pv_ext(cmd, vg); + + if (!vg_strip_outdated_historical_lvs(vg)) + log_warn("WARNING: failed to strip outdated historical lvs."); + + /* + * Check for missing devices in the VG. In most cases a VG cannot be + * changed while it's missing devices. This restriction is implemented + * here in vg_read. Below we return an error from vg_read if the + * vg_read flag indicates that the command is going to modify the VG. + * (We should probably implement this restriction elsewhere instead of + * returning an error from vg_read.) + * + * The PV's device may be present while the PV for the device has the + * MISSING_PV flag set in the metadata. This happened because the VG + * was written while this dev was missing, so the MISSING flag was + * written in the metadata for PV. Now the device has reappeared. + * However, the VG has changed since the device was last present, and + * if the device has outdated data it may not be safe to just start + * using it again. + * + * If there were no PE's used on the PV, we can just clear the MISSING + * flag, but if there were PE's used we need to continue to treat the + * PV as if the device is missing, limiting operations like the VG has + * a missing device, and requiring the user to remove the reappeared + * device from the VG, like a missing device, with vgreduce + * --removemissing. + */ + dm_list_iterate_items(pvl, &vg->pvs) { + if (!pvl->pv->dev) { + /* The obvious and common case of a missing device. */ + + log_warn("WARNING: VG %s is missing PVID %s.", vg_name, (const char *)&pvl->pv->id); + missing_pv_dev++; + + } else if (pvl->pv->status & MISSING_PV) { + /* A device that was missing but has reappeared. */ + + if (pvl->pv->pe_alloc_count == 0) { + log_warn("WARNING: VG %s has unused reappeared PV %s.", vg_name, dev_name(pvl->pv->dev)); + pvl->pv->status &= ~MISSING_PV; + /* tell vgextend restoremissing that MISSING flag was cleared here */ + pvl->pv->unused_missing_cleared = 1; + } else { + log_warn("WARNING: VG %s was missing PV %s.", vg_name, dev_name(pvl->pv->dev)); + missing_pv_flag++; + } + } + } + + if (missing_pv_dev || missing_pv_flag) + vg_mark_partial_lvs(vg, 1); + + if (!check_pv_segments(vg)) { + log_error(INTERNAL_ERROR "PV segments corrupted in %s.", vg->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + + dm_list_iterate_items(lvl, &vg->lvs) { + if (!check_lv_segments(lvl->lv, 0)) { + log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + } + + dm_list_iterate_items(lvl, &vg->lvs) { + /* Checks that cross-reference other LVs. */ + if (!check_lv_segments(lvl->lv, 1)) { + log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + } + + if (!check_pv_dev_sizes(vg)) + log_warn("WARNING: One or more devices used as PVs in VG %s have changed sizes.", vg->name); + + _check_devs_used_correspond_with_vg(vg); + + if (!_access_vg_lock_type(cmd, vg, lockd_state, &failure)) { + /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */ + goto_bad; + } + + if (!_access_vg_systemid(cmd, vg)) { + failure |= FAILED_SYSTEMID; + goto_bad; + } + + if (!_access_vg_clustered(cmd, vg)) { + failure |= FAILED_CLUSTERED; + goto_bad; + } + + if (writing && !(read_flags & READ_ALLOW_EXPORTED) && vg_is_exported(vg)) { + log_error("Volume group %s is exported", vg->name); + failure |= FAILED_EXPORTED; + goto_bad; + } + + if (writing && !(vg->status & LVM_WRITE)) { + log_error("Volume group %s is read-only", vg->name); + failure |= FAILED_READ_ONLY; + goto_bad; + } + + if (!cmd->handles_missing_pvs && (missing_pv_dev || missing_pv_flag) && writing) { + log_error("Cannot change VG %s while PVs are missing.", vg->name); + log_error("See vgreduce --removemissing and vgextend --restoremissing."); + failure |= FAILED_NOT_ENABLED; + goto_bad; + } + + if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && writing) { + log_error("Cannot change VG %s with unknown segments in it!", vg->name); + failure |= FAILED_NOT_ENABLED; /* FIXME new failure code here? */ + goto_bad; + } + + /* + * When we are reading the VG with the intention of writing it, + * we save a second copy of the VG in vg->vg_committed. This + * copy remains unmodified by the command operation, and is used + * later if there is an error and we want to reactivate LVs. + * FIXME: be specific about exactly when this works correctly. + */ + if (writing) { + struct dm_config_tree *cft; + + if (dm_pool_locked(vg->vgmem)) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: pool locked"); + goto out; + } + + if (vg->vg_committed) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: copy exists"); + release_vg(vg->vg_committed); + vg->vg_committed = NULL; + } + + if (vg->vg_precommitted) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: pre copy exists"); + release_vg(vg->vg_precommitted); + vg->vg_precommitted = NULL; + } + + if (!(cft = export_vg_to_config_tree(vg))) { + log_warn("WARNING: vg_read no vg copy: copy export failed"); + goto out; + } + + if (!(vg->vg_committed = import_vg_from_config_tree(cft, vg->fid))) + log_warn("WARNING: vg_read no vg copy: copy import failed"); + + dm_config_destroy(cft); + } else { + if (vg->vg_precommitted) + log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted); + if (vg->vg_committed) + log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed); + } +out: + /* We return with the VG lock held when read is successful. */ + *error_flags = SUCCESS; + if (error_vg) + *error_vg = NULL; + return vg; + +bad: + *error_flags = failure; + + /* + * FIXME: get rid of this case so we don't have to return the vg when + * there's an error. It is here for process_each_pv() which wants to + * eliminate the VG's devs from the list of devs it is processing, even + * when it can't access the VG because of wrong system id or similar. + * This could be done by looking at lvmcache info structs intead of 'vg'. + * It's also used by process_each_vg/process_each_lv which want to + * include error_vg values (like system_id) in error messages. + * These values could also be found from lvmcache vginfo. + */ + if (error_vg && vg) { + if (vg->vg_precommitted) + log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted); + if (vg->vg_committed) + log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed); + + /* caller must unlock_vg and release_vg */ + *error_vg = vg; + return_NULL; + } + + if (vg) { + unlock_vg(cmd, vg, vg_name); + release_vg(vg); + } + if (error_vg) + *error_vg = NULL; + return_NULL; +} + +/* + * Simply a version of vg_read() that automatically sets the READ_FOR_UPDATE + * flag, which means the caller intends to write the VG after reading it, + * so vg_read should acquire an exclusive file lock on the vg. + */ +struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, + const char *vgid, uint32_t read_flags, uint32_t lockd_state) +{ + struct volume_group *vg; + uint32_t error_flags = 0; + + vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state, &error_flags, NULL); + + return vg; +} + +void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg) +{ + struct dm_list bad_mdas; + struct metadata_area *mda; + struct device *dev; + + dm_list_init(&bad_mdas); + + lvmcache_get_bad_mdas(cmd, vg->name, (const char *)&vg->id, &bad_mdas); + + dm_list_iterate_items(mda, &bad_mdas) { + dev = mda_get_device(mda); + + /* + * bad_fields: + * + * 0: shouldn't happen + * + * READ|INTERNAL: there's probably nothing wrong on disk + * + * MAGIC|START: there's a good chance that we were + * reading the mda_header from the wrong location; maybe + * the pv_header location was wrong. We don't want to + * write new metadata to the wrong location. To handle + * this we would want to do some further verification that + * we have the mda location correct. + * + * VERSION|CHECKSUM: when the others are correct these + * look safe to repair. + * + * HEADER: general error related to header, covered by fields + * above. + * + * TEXT: general error related to text metadata, we can repair. + */ + if (!mda->bad_fields || + (mda->bad_fields & BAD_MDA_READ) || + (mda->bad_fields & BAD_MDA_INTERNAL) || + (mda->bad_fields & BAD_MDA_MAGIC) || + (mda->bad_fields & BAD_MDA_START)) { + log_warn("WARNING: not repairing bad metadata (0x%x) for mda%d on %s", + mda->bad_fields, mda->mda_num, dev_name(dev)); + continue; + } + + /* + * vg_write/vg_commit reread the mda_header which checks the + * mda header fields and fails if any are bad, which stops + * vg_write/vg_commit from continuing. Suppress these header + * field checks when we know the field is bad and we are going + * to replace it. FIXME: do vg_write/vg_commit really need to + * reread and recheck the mda_header again (probably not)? + */ + + if (mda->bad_fields & BAD_MDA_CHECKSUM) + mda->ignore_bad_fields |= BAD_MDA_CHECKSUM; + if (mda->bad_fields & BAD_MDA_VERSION) + mda->ignore_bad_fields |= BAD_MDA_VERSION; + + log_warn("WARNING: repairing bad metadata (0x%x) in mda%d at %llu on %s.", + mda->bad_fields, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + + if (!mda->ops->vg_write(vg->fid, vg, mda)) { + log_warn("WARNING: failed to write VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + + if (!mda->ops->vg_precommit(vg->fid, vg, mda)) { + log_warn("WARNING: failed to precommit VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + + if (!mda->ops->vg_commit(vg->fid, vg, mda)) { + log_warn("WARNING: failed to commit VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + } +} + diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index f57832516..ea7c1def3 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -168,11 +168,24 @@ struct metadata_area_ops { #define MDA_CONTENT_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_CONTENT : DEV_IO_MDA_EXTRA_CONTENT) #define MDA_HEADER_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_HEADER : DEV_IO_MDA_EXTRA_HEADER) +#define BAD_MDA_INTERNAL 0x00000001 /* internal lvm error */ +#define BAD_MDA_READ 0x00000002 /* read io failed */ +#define BAD_MDA_HEADER 0x00000004 /* general problem with header */ +#define BAD_MDA_TEXT 0x00000008 /* general problem with text */ +#define BAD_MDA_CHECKSUM 0x00000010 +#define BAD_MDA_MAGIC 0x00000020 +#define BAD_MDA_VERSION 0x00000040 +#define BAD_MDA_START 0x00000080 + struct metadata_area { struct dm_list list; struct metadata_area_ops *ops; void *metadata_locn; uint32_t status; + uint64_t header_start; /* mda_header.start */ + int mda_num; + uint32_t bad_fields; /* BAD_MDA_ flags are set to indicate errors found when reading */ + uint32_t ignore_bad_fields; /* BAD_MDA_ flags are set to indicate errors to ignore */ }; struct metadata_area *mda_copy(struct dm_pool *mem, struct metadata_area *mda); @@ -501,4 +514,6 @@ struct id pv_vgid(const struct physical_volume *pv); uint64_t find_min_mda_size(struct dm_list *mdas); char *tags_format_and_copy(struct dm_pool *mem, const struct dm_list *tagsl); +void set_pv_devices(struct format_instance *fid, struct volume_group *vg); + #endif diff --git a/lib/metadata/pv.h b/lib/metadata/pv.h index c162acd34..3430c2e1f 100644 --- a/lib/metadata/pv.h +++ b/lib/metadata/pv.h @@ -59,6 +59,7 @@ struct physical_volume { /* This is true whenever the represented PV has a label associated. */ uint64_t is_labelled:1; + uint64_t unused_missing_cleared:1; /* NB. label_sector is valid whenever is_labelled is true */ uint64_t label_sector; diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c index aca4fe783..beddf73de 100644 --- a/lib/metadata/vg.c +++ b/lib/metadata/vg.c @@ -84,7 +84,7 @@ static void _free_vg(struct volume_group *vg) void release_vg(struct volume_group *vg) { - if (!vg || (vg->fid && vg == vg->fid->fmt->orphan_vg)) + if (!vg || is_orphan_vg(vg->name)) return; release_vg(vg->vg_committed); @@ -711,9 +711,9 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, vg->extent_count -= pv_pe_count(pv); /* FIXME: we don't need to vg_read the orphan vg here */ - orphan_vg = vg_read_orphans(cmd, 0, vg->fid->fmt->orphan_vg_name); + orphan_vg = vg_read_orphans(cmd, vg->fid->fmt->orphan_vg_name); - if (vg_read_error(orphan_vg)) + if (!orphan_vg) goto bad; if (!vg_split_mdas(cmd, vg, orphan_vg) || !vg->pv_count) { diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h index 3fd47569d..6e89b3375 100644 --- a/lib/metadata/vg.h +++ b/lib/metadata/vg.h @@ -122,11 +122,6 @@ struct volume_group { struct dm_list removed_pvs; uint32_t open_mode; /* FIXME: read or write - check lock type? */ - /* - * Store result of the last vg_read(). - * 0 for success else appropriate FAILURE_* bits set. - */ - uint32_t read_status; uint32_t mda_copies; /* target number of mdas for this VG */ struct dm_hash_table *hostnames; /* map of creation hostnames */ diff --git a/test/shell/inconsistent-metadata.sh b/test/shell/inconsistent-metadata.sh index b42715de4..eb1508455 100644 --- a/test/shell/inconsistent-metadata.sh +++ b/test/shell/inconsistent-metadata.sh @@ -24,53 +24,48 @@ lvchange -a n $vg/mirror aux backup_dev "${DEVICES[@]}" -init() { +makeold() { + # reset metadata on all devs to starting condition aux restore_dev "${DEVICES[@]}" not check lv_field $vg/resized lv_size "8.00m" + # change the metadata on all devs lvresize -L 8192K $vg/resized + # reset metadata on just dev1 to the previous version aux restore_dev "$dev1" } -init -vgscan 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgscan 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out -check lv_field $vg/resized lv_size "8.00m" +# create old metadata +makeold -# vgdisplay fixes -init -vgdisplay $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgdisplay $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out +# reports old metadata +vgs $vg 2>&1 | tee cmd.out +grep "ignoring old metadata" cmd.out check lv_field $vg/resized lv_size "8.00m" -# lvs fixes up -init -lvs $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgdisplay $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out -check lv_field $vg/resized lv_size "8.00m" +# corrects old metadata +lvcreate -l1 -an $vg -# vgs fixes up as well -init +# no old report vgs $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgs $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out +not grep "ignoring old metadata" cmd.out check lv_field $vg/resized lv_size "8.00m" -echo Check auto-repair of failed vgextend - metadata written to original pv but not new pv + +echo Check auto-repair of failed vgextend +echo - metadata written to original pv but not new pv + vgremove -f $vg pvremove -ff "${DEVICES[@]}" pvcreate "${DEVICES[@]}" + aux backup_dev "$dev2" vgcreate $SHARED $vg "$dev1" vgextend $vg "$dev2" aux restore_dev "$dev2" -vgscan + +vgs -o+vg_mda_count $vg +pvs -o+vg_mda_count + should check compare_fields vgs $vg vg_mda_count pvs "$dev2" vg_mda_count vgremove -ff $vg diff --git a/test/shell/lvconvert-repair-cache.sh b/test/shell/lvconvert-repair-cache.sh index 348dbaf31..760d32900 100644 --- a/test/shell/lvconvert-repair-cache.sh +++ b/test/shell/lvconvert-repair-cache.sh @@ -57,6 +57,9 @@ should not dmsetup remove ${vg}-cpool_cdata-missing_0_0 aux enable_dev "$dev1" +# vg was changed while dev was missing +vgextend --restoremissing $vg "$dev1" + ################## lvcreate --type cache-pool -L10 $vg/cpool "$dev1" @@ -93,6 +96,9 @@ lvconvert --yes --uncache $vg/$lv1 aux enable_dev "$dev2" +# vg was changed while dev was missing +vgextend --restoremissing $vg "$dev2" + # FIXME: temporary workaround lvcreate -L1 -n $lv5 $vg lvremove -ff $vg diff --git a/test/shell/lvconvert-repair-policy.sh b/test/shell/lvconvert-repair-policy.sh index f9fca0028..b69658ea6 100644 --- a/test/shell/lvconvert-repair-policy.sh +++ b/test/shell/lvconvert-repair-policy.sh @@ -24,6 +24,8 @@ aux lvmconf 'allocation/maximise_cling = 0' \ cleanup_() { vgreduce --removemissing $vg for d in "$@"; do aux enable_dev "$d"; done + # clear the outdated metadata on enabled devs before we can reuse them + vgck --updatemetadata $vg for d in "$@"; do vgextend $vg "$d"; done lvremove -ff $vg/mirror lvcreate -aey --type mirror -m 1 --ignoremonitoring -l 2 -n mirror $vg "$dev1" "$dev2" "$dev3:0" diff --git a/test/shell/lvconvert-repair.sh b/test/shell/lvconvert-repair.sh index ae8fa7e98..0d0231e30 100644 --- a/test/shell/lvconvert-repair.sh +++ b/test/shell/lvconvert-repair.sh @@ -106,17 +106,23 @@ lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev1" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev1" aux disable_dev "$dev2" lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev2" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev2" aux disable_dev "$dev3" lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev3" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev3" vgremove -ff $vg diff --git a/test/shell/lvmcache-exercise.sh b/test/shell/lvmcache-exercise.sh index 908cdf813..fcb5a2e0e 100644 --- a/test/shell/lvmcache-exercise.sh +++ b/test/shell/lvmcache-exercise.sh @@ -46,11 +46,12 @@ aux disable_dev "$dev3" lvconvert --yes --repair $vg2/$lv1 aux enable_dev "$dev3" -# here it should fix any reappeared devices -lvs +# put back the dev that was missing during repair +# the vg was written by repair with dev3 having the missing flag +vgextend --restoremissing $vg2 "$dev3" lvs -a $vg2 -o+devices 2>&1 | tee out -not grep reappeared out +not grep missing out # This removes the first "vg1" using its uuid vgremove -ff -S vg_uuid=$UUID1 diff --git a/test/shell/mirror-vgreduce-removemissing.sh b/test/shell/mirror-vgreduce-removemissing.sh index d95a0ac7b..08275c2bd 100644 --- a/test/shell/mirror-vgreduce-removemissing.sh +++ b/test/shell/mirror-vgreduce-removemissing.sh @@ -123,6 +123,8 @@ check_and_cleanup_lvs_() recover_vg_() { aux enable_dev "$@" + # clear outdated metadata on PVs so they can be used again + vgck --updatemetadata $vg pvcreate -ff "$@" vgextend $vg "$@" check_and_cleanup_lvs_ diff --git a/test/shell/pv-ext-flags.sh b/test/shell/pv-ext-flags.sh index bae16df11..22e9b3aac 100644 --- a/test/shell/pv-ext-flags.sh +++ b/test/shell/pv-ext-flags.sh @@ -39,7 +39,6 @@ check pv_field "$dev2" pv_in_use "used" # disable $dev2 and dev1 with 0 MDAs remains, but still # marked as used, so pvcreate/vgcreate/pvremove should fail aux disable_dev "$dev2" -pvscan --cache check pv_field "$dev1" pv_in_use "used" not pvcreate "$dev1" 2>err @@ -71,20 +70,14 @@ vgcreate $vg1 "$dev1" "$dev2" # disable $dev1, then repair the VG - $dev1 is removed from VG aux disable_dev "$dev1" vgreduce --removemissing $vg1 -# now, enable $dev1, automatic repair will happen on pvs call -# (or any other lvm command that does vg_read with repair inside) -aux enable_dev "$dev1" -# FIXME: once persistent cache does not cause races with timestamps -# causing LVM tools to not see the VG inconsistency and once -# VG repair is always done, delete this line which removes -# persistent .cache as a workaround -rm -f "$TESTDIR/etc/.cache" +# now, enable $dev1 and clear the old metadata from it +aux enable_dev "$dev1" +vgck --updatemetadata $vg1 vgck $vg1 -# check $dev1 does not contain the PV_EXT_FLAG anymore - it -# should be removed as part of the repaid during vg_read since -# $dev1 is not part of $vg1 anymore + +# check $dev1 does not contain the PV_EXT_FLAG anymore check pv_field "$dev1" pv_in_use "" ############################################# @@ -105,7 +98,6 @@ check pv_field "$dev2" pv_in_use "used" pvchange --metadataignore y "$dev1" aux disable_dev "$dev2" -pvscan --cache check pv_field "$dev1" pv_in_use "used" not pvcreate "$dev1" 2>err @@ -136,20 +128,14 @@ vgcreate $vg1 "$dev1" "$dev2" # disable $dev1, then repair the VG - $dev1 is removed from VG aux disable_dev "$dev1" vgreduce --removemissing $vg1 -# now, enable $dev1, automatic repair will happen on pvs call -# (or any other lvm command that does vg_read with repair inside) -aux enable_dev "$dev1" -# FIXME: once persistent cache does not cause races with timestamps -# causing LVM tools to not see the VG inconsistency and once -# VG repair is always done, delete this line which removes -# persistent .cache as a workaround -rm -f "$TESTDIR/etc/.cache" +# now, enable $dev1 and clear the old metadata from it +aux enable_dev "$dev1" +vgck --updatemetadata $vg1 vgck $vg1 -# check $dev1 does not contain the PV_EXT_FLAG anymore - it -# should be removed as part of the repaid during vg_read since -# $dev1 is not part of $vg1 anymore + +# check $dev1 does not contain the PV_EXT_FLAG anymore check pv_field "$dev1" pv_in_use "" ########################### diff --git a/test/shell/unlost-pv.sh b/test/shell/unlost-pv.sh index edf7f31e2..50f89287e 100644 --- a/test/shell/unlost-pv.sh +++ b/test/shell/unlost-pv.sh @@ -15,47 +15,59 @@ SKIP_WITH_LVMPOLLD=1 . lib/inittest -check_() { - local cache="" - # vgscan needs --cache option for direct scan if lvmetad is used - test -e LOCAL_LVMETAD && cache="--cache" - vgscan $cache 2>&1 | tee vgscan.out - "$@" grep "Inconsistent metadata found for VG $vg" vgscan.out -} - aux prepare_vg 3 lvcreate -an -Zn --type mirror -m 1 -l 1 -n mirror $vg -#lvchange -a n $vg # try orphaning a missing PV (bz45867) aux disable_dev "$dev1" vgreduce --removemissing --force $vg aux enable_dev "$dev1" -check_ -test -e LOCAL_LVMETAD && pvcreate -f "$dev1" -check_ not +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# erase outdated dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out + -# try to just change metadata; we expect the new version (with MISSING_PV set -# on the reappeared volume) to be written out to the previously missing PV vgextend $vg "$dev1" + lvcreate -l 1 -n boo -a n --zero n $vg + aux disable_dev "$dev1" + lvremove $vg/mirror + aux enable_dev "$dev1" -check_ -test -e LOCAL_LVMETAD && lvremove $vg/boo # FIXME trigger a write :-( -check_ not + +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# write the vg to update the metadata on dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out aux disable_dev "$dev1" + vgreduce --removemissing --force $vg + aux enable_dev "$dev1" vgscan 2>&1 | tee out -grep 'Removing PV' out -vgs 2>&1 | tee out -not grep 'Removing PV' out +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# erase outdated dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out vgremove -ff $vg diff --git a/test/shell/vgck.sh b/test/shell/vgck.sh index 3288d1ba6..b6c2cba08 100644 --- a/test/shell/vgck.sh +++ b/test/shell/vgck.sh @@ -24,11 +24,11 @@ dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" vgscan 2>&1 | tee vgscan.out || true -grep "Failed" vgscan.out +grep "checksum" vgscan.out dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" vgck $vg 2>&1 | tee vgck.out || true -grep Incorrect vgck.out +grep "checksum" vgck.out vgremove -ff $vg diff --git a/tools/args.h b/tools/args.h index b3ba99ecc..3bda9a08c 100644 --- a/tools/args.h +++ b/tools/args.h @@ -1383,6 +1383,9 @@ arg(thin_ARG, 'T', "thin", 0, 0, 0, "See --type thin, --type thin-pool, and --virtualsize.\n" "See \\fBlvmthin\\fP(7) for more information about LVM thin provisioning.\n") +arg(updatemetadata_ARG, '\0', "updatemetadata", 0, 0, 0, + "Update VG metadata to correct problems.\n") + arg(uuid_ARG, 'u', "uuid", 0, 0, 0, "#pvchange\n" "Generate new random UUID for specified PVs.\n" diff --git a/tools/command-lines.in b/tools/command-lines.in index eaa71ea52..8768c6501 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -1610,6 +1610,11 @@ vgck OO: --reportformat ReportFmt OP: VG|Tag ... ID: vgck_general +DESC: Read and display information about a VG. + +vgck --updatemetadata VG +ID: vgck_update_metadata +DESC: Rewrite VG metadata to correct problems. --- diff --git a/tools/polldaemon.c b/tools/polldaemon.c index 877247671..1f5e64dbd 100644 --- a/tools/polldaemon.c +++ b/tools/polldaemon.c @@ -148,6 +148,7 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, struct logical_volume *lv; int finished = 0; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret; if (!parms->wait_before_testing) @@ -168,12 +169,10 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, } /* Locks the (possibly renamed) VG again */ - vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state); - if (vg_read_error(vg)) { + vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state, &error_flags, NULL); + if (!vg) { /* What more could we do here? */ - log_error("ABORTING: Can't reread VG for %s.", id->display_name); - release_vg(vg); - vg = NULL; + log_error("ABORTING: Can't reread VG for %s error flags %x.", id->display_name, error_flags); ret = 0; goto out; } @@ -394,6 +393,7 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i struct volume_group *vg; struct logical_volume *lv; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret; /* @@ -406,10 +406,9 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i * change done locally. */ - vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state); - if (vg_read_error(vg)) { - release_vg(vg); - log_error("Can't reread VG for %s", id->display_name); + vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state, &error_flags, NULL); + if (!vg) { + log_error("Can't reread VG for %s error flags %x", id->display_name, error_flags); ret = 0; goto out_ret; } diff --git a/tools/pvscan.c b/tools/pvscan.c index 098c50261..d517a4d9d 100644 --- a/tools/pvscan.c +++ b/tools/pvscan.c @@ -513,6 +513,8 @@ static int _online_pvscan_one(struct cmd_context *cmd, struct device *dev, if (pvid_without_metadata) *pvid_without_metadata = dm_pool_strdup(cmd->mem, dev->pvid); fmt->ops->destroy_instance(baton.fid); + } else { + set_pv_devices(baton.fid, baton.vg); } ret = _online_pv_found(cmd, dev, dev_args, baton.vg, found_vgnames); diff --git a/tools/toollib.c b/tools/toollib.c index 5206e26d3..eb268e4dd 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -189,11 +189,12 @@ static int _printed_clustered_vg_advice = 0; * Case c covers the other errors returned when reading the VG. * If *skip is 1, it's OK for the caller to read the list of PVs in the VG. */ -static int _ignore_vg(struct volume_group *vg, const char *vg_name, - struct dm_list *arg_vgnames, uint32_t read_flags, - int *skip, int *notfound) +static int _ignore_vg(struct cmd_context *cmd, + uint32_t error_flags, struct volume_group *error_vg, + const char *vg_name, struct dm_list *arg_vgnames, + uint32_t read_flags, int *skip, int *notfound) { - uint32_t read_error = vg_read_error(vg); + uint32_t read_error = error_flags; *skip = 0; *notfound = 0; @@ -203,12 +204,9 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, return 0; } - if ((read_error & FAILED_INCONSISTENT) && (read_flags & READ_ALLOW_INCONSISTENT)) - read_error &= ~FAILED_INCONSISTENT; /* Check for other errors */ - if (read_error & FAILED_CLUSTERED) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { - log_error("Cannot access clustered VG %s.", vg->name); + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { + log_error("Cannot access clustered VG %s.", vg_name); if (!_printed_clustered_vg_advice) { _printed_clustered_vg_advice = 1; log_error("See lvmlockd(8) for changing a clvm/clustered VG to a shared VG."); @@ -233,10 +231,13 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * would expect to fail. */ if (read_error & FAILED_SYSTEMID) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.", - vg->name, vg->system_id, vg->cmd->system_id ? "" : "unknown ", - vg->cmd->system_id ? " " : "", vg->cmd->system_id ? vg->cmd->system_id : ""); + vg_name, + error_vg ? error_vg->system_id : "unknown ", + cmd->system_id ? "" : "unknown ", + cmd->system_id ? " " : "", + cmd->system_id ? cmd->system_id : ""); return 1; } else { read_error &= ~FAILED_SYSTEMID; /* Check for other errors */ @@ -255,10 +256,11 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * command failed to acquire the necessary lock.) */ if (read_error & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { if (read_error & FAILED_LOCK_TYPE) log_error("Cannot access VG %s with lock type %s that requires lvmlockd.", - vg->name, vg->lock_type); + vg_name, + error_vg ? error_vg->lock_type : "unknown"); /* For FAILED_LOCK_MODE, the error is printed in vg_read. */ return 1; } else { @@ -1924,10 +1926,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg = NULL; struct vgnameid_list *vgnl; const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int whole_selected = 0; int ret_max = ECMD_PROCESSED; int ret; @@ -1977,13 +1981,18 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, continue; } - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); goto endvg; } + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); + if (skip || notfound) goto endvg; @@ -2004,8 +2013,7 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, ret_max = ret; } - if (!vg_read_error(vg)) - unlock_vg(cmd, vg, vg_name); + unlock_vg(cmd, vg, vg_name); endvg: release_vg(vg); if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) @@ -3589,11 +3597,13 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg = NULL; struct vgnameid_list *vgnl; struct dm_str_list *sl; struct dm_list *tags_arg; struct dm_list lvnames; uint32_t lockd_state = 0; + uint32_t error_flags = 0; const char *vg_name; const char *vg_uuid; const char *vgn; @@ -3662,13 +3672,18 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag continue; } - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); goto endvg; } + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); + if (skip || notfound) goto endvg; @@ -4208,12 +4223,16 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, struct physical_volume *pv; struct pv_list *pvl; struct device_id_list *dil; + struct device_list *devl; + struct dm_list outdated_devs; const char *pv_name; int process_pv; int do_report_ret_code = 1; int ret_max = ECMD_PROCESSED; int ret = 0; + dm_list_init(&outdated_devs); + log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_PV); vg_uuid[0] = '\0'; @@ -4299,6 +4318,12 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, break; log_set_report_object_name_and_id(NULL, NULL); } + + if (!is_orphan_vg(vg->name)) + lvmcache_get_outdated_devs(cmd, vg->name, (const char *)&vg->id, &outdated_devs); + dm_list_iterate_items(devl, &outdated_devs) + _device_list_remove(all_devices, devl->dev); + do_report_ret_code = 0; out: if (do_report_ret_code) @@ -4336,10 +4361,12 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg; struct vgnameid_list *vgnl; const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret_max = ECMD_PROCESSED; int ret; int skip; @@ -4380,8 +4407,8 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, skip_lock = is_orphan_vg(vg_name) && (read_flags & PROCESS_SKIP_ORPHAN_LOCK); - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, NULL, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, NULL, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); @@ -4393,22 +4420,26 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, goto endvg; /* - * Don't continue when skip is set, because we need to remove - * vg->pvs entries from devices list. + * Don't call "continue" when skip is set, because we need to remove + * error_vg->pvs entries from devices list. */ - ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags, + ret = _process_pvs_in_vg(cmd, vg ? vg : error_vg, all_devices, arg_devices, arg_tags, process_all_pvs, process_all_devices, skip, handle, process_single_pv); if (ret != ECMD_PROCESSED) stack; + report_log_ret_code(ret); + if (ret > ret_max) ret_max = ret; if (!skip && !skip_lock) unlock_vg(cmd, vg, vg->name); endvg: + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); release_vg(vg); if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) stack; @@ -4601,7 +4632,7 @@ int process_each_pv(struct cmd_context *cmd, dm_list_init(&arg_missed_orig); _device_list_copy(cmd, &arg_missed, &arg_missed_orig); - log_verbose("Some PVs were not found in first search, retrying."); + log_warn("WARNING: some PVs were not found in first search, retrying."); lvmcache_label_scan(cmd); @@ -5692,7 +5723,7 @@ do_command: if (pp->preserve_existing && pp->orphan_vg_name) { log_debug("Using existing orphan PVs in %s.", pp->orphan_vg_name); - if (!(orphan_vg = vg_read_orphans(cmd, 0, pp->orphan_vg_name))) { + if (!(orphan_vg = vg_read_orphans(cmd, pp->orphan_vg_name))) { log_error("Cannot read orphans VG %s.", pp->orphan_vg_name); goto bad; } diff --git a/tools/vgcfgbackup.c b/tools/vgcfgbackup.c index 7d061d517..49779cabe 100644 --- a/tools/vgcfgbackup.c +++ b/tools/vgcfgbackup.c @@ -67,9 +67,12 @@ static int _vg_backup_single(struct cmd_context *cmd, const char *vg_name, if (!backup_to_file(filename, vg->cmd->cmd_line, vg)) return_ECMD_FAILED; } else { - if (vg_read_error(vg) == FAILED_INCONSISTENT) { - log_error("No backup taken: specify filename with -f " - "to backup an inconsistent VG"); + if (vg_missing_pv_count(vg)) { + log_error("No backup taken: specify filename with -f to backup with missing PVs."); + return ECMD_FAILED; + } + if (vg_has_unknown_segments(vg)) { + log_error("No backup taken: specify filename with -f to backup with unknown segments."); return ECMD_FAILED; } @@ -97,9 +100,17 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv) handle->custom_handle = &last_filename; + /* + * Just set so that we can do the check ourselves above and + * report a helpful error message in place of the error message + * that would be generated from vg_read. + */ + cmd->handles_missing_pvs = 1; + cmd->handles_unknown_segments = 1; + init_pvmove(1); - ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_ALLOW_INCONSISTENT, 0, + ret = process_each_vg(cmd, argc, argv, NULL, NULL, 0, 0, handle, &_vg_backup_single); free(last_filename); diff --git a/tools/vgck.c b/tools/vgck.c index a126c2924..90fc5a3aa 100644 --- a/tools/vgck.c +++ b/tools/vgck.c @@ -15,6 +15,57 @@ #include "tools.h" +/* + * TODO: we cannot yet repair corruption in label_header, pv_header/locations, + * or corruption of some mda_header fields. + */ + +static int _update_metadata_single(struct cmd_context *cmd __attribute__((unused)), + const char *vg_name, + struct volume_group *vg, + struct processing_handle *handle __attribute__((unused))) +{ + + /* + * Simply calling vg_write can correct or clean up various things: + * . some mda's have old versions of metdadata + * . wipe outdated PVs + * . fix pv_header used flag and version + * . strip historical lvs + * . clear missing pv flag on unused PV + */ + if (!vg_write(vg)) { + log_error("Failed to write VG."); + return 0; + } + + if (!vg_commit(vg)) { + log_error("Failed to commit VG."); + return 0; + } + + /* + * vg_write does not write to "bad" mdas (where "bad" is corrupt, can't + * be processed when reading). bad mdas are not kept in + * fid->metadata_areas_in_use so vg_read and vg_write ignore them, but + * they are saved in lvmcache. this gets them from lvmcache and tries + * to write this metadata to them. + */ + vg_write_commit_bad_mdas(cmd, vg); + + return 1; +} + +static int _update_metadata(struct cmd_context *cmd, int argc, char **argv) +{ + cmd->handles_missing_pvs = 1; + cmd->wipe_outdated_pvs = 1; + cmd->handles_unknown_segments = 1; + + return process_each_vg(cmd, argc, argv, NULL, NULL, READ_FOR_UPDATE, 0, NULL, + &_update_metadata_single); +} + static int vgck_single(struct cmd_context *cmd __attribute__((unused)), const char *vg_name, struct volume_group *vg, @@ -37,6 +88,9 @@ static int vgck_single(struct cmd_context *cmd __attribute__((unused)), int vgck(struct cmd_context *cmd, int argc, char **argv) { + if (arg_is_set(cmd, updatemetadata_ARG)) + return _update_metadata(cmd, argc, argv); + return process_each_vg(cmd, argc, argv, NULL, NULL, 0, 0, NULL, &vgck_single); } diff --git a/tools/vgextend.c b/tools/vgextend.c index c727d75bb..1b674d0f7 100644 --- a/tools/vgextend.c +++ b/tools/vgextend.c @@ -28,16 +28,25 @@ static int _restore_pv(struct volume_group *vg, const char *pv_name) return 0; } - if (!(pvl->pv->status & MISSING_PV)) { - log_warn("WARNING: PV %s was not missing in VG %s", pv_name, vg->name); - return 0; - } - if (!pvl->pv->dev) { log_warn("WARNING: The PV %s is still missing.", pv_name); return 0; } + if (pvl->pv->status & MISSING_PV) + goto clear_flag; + + /* + * when the PV has no used PE's vg_read clears the MISSING_PV flag + * and sets this so we know. + */ + if (pvl->pv->unused_missing_cleared) + goto clear_flag; + + log_warn("WARNING: PV %s was not missing in VG %s", pv_name, vg->name); + return 0; + +clear_flag: pvl->pv->status &= ~MISSING_PV; return 1; } diff --git a/tools/vgremove.c b/tools/vgremove.c index 212085869..3b6b2610a 100644 --- a/tools/vgremove.c +++ b/tools/vgremove.c @@ -115,6 +115,8 @@ int vgremove(struct cmd_context *cmd, int argc, char **argv) return ECMD_FAILED; } + cmd->wipe_outdated_pvs = 1; + cmd->handles_missing_pvs = 1; ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_FOR_UPDATE, 0, diff --git a/tools/vgsplit.c b/tools/vgsplit.c index 87f48df8a..05797aee7 100644 --- a/tools/vgsplit.c +++ b/tools/vgsplit.c @@ -467,6 +467,7 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, int *existing_vg) { struct volume_group *vg_to = NULL; + int exists = 0; log_verbose("Checking for new volume group \"%s\"", vg_name_to); /* @@ -479,13 +480,13 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, * we obtained a WRITE lock and could not find the vgname in the * system. Thus, the split will be into a new VG. */ - vg_to = vg_lock_and_create(cmd, vg_name_to); - if (vg_read_error(vg_to) == FAILED_LOCKING) { + vg_to = vg_lock_and_create(cmd, vg_name_to, &exists); + if (!vg_to && !exists) { log_error("Can't get lock for %s", vg_name_to); release_vg(vg_to); return NULL; } - if (vg_read_error(vg_to) == FAILED_EXIST) { + if (!vg_to && exists) { *existing_vg = 1; release_vg(vg_to); vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0, 0); |