1 files changed, 861 insertions, 1299 deletions
diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
index 9efc35592..f31b4b979 100644
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
@@ -28,11 +28,14 @@
 #include "lib/display/display.h"
 #include "lib/locking/locking.h"
 #include "lib/format_text/archiver.h"
+#include "lib/format_text/format-text.h"
+#include "lib/format_text/layout.h"
+#include "lib/format_text/import-export.h"
 #include "lib/config/defaults.h"
 #include "lib/locking/lvmlockd.h"
-#include "time.h"
 #include "lib/notify/lvmnotify.h"
 
+#include <time.h>
 #include <math.h>
 
 static struct physical_volume *_pv_read(struct cmd_context *cmd,
@@ -222,6 +225,75 @@ out:
 		  (unsigned long long)pv->pe_align_offset, dev_name(pv->dev));
 }
 
+/*
+ * FIXME: we only want to print the warnings when this is called from
+ * vg_read, not from import_vg_from_metadata, so do the warnings elsewhere
+ * or avoid calling this from import_vg_from.
+ */
+static void _set_pv_device(struct format_instance *fid,
+			   struct volume_group *vg,
+			   struct physical_volume *pv)
+{
+	char buffer[64] __attribute__((aligned(8)));
+	uint64_t size;
+
+	if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, &pv->label_sector))) {
+		if (!id_write_format(&pv->id, buffer, sizeof(buffer)))
+			buffer[0] = '\0';
+
+		if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single)
+			log_error_once("Couldn't find device with uuid %s.", buffer);
+		else
+			log_debug_metadata("Couldn't find device with uuid %s.", buffer);
+	}
+
+	/*
+	 * A previous command wrote the VG while this dev was missing, so
+	 * the MISSING flag was included in the PV.
+	 */
+	if ((pv->status & MISSING_PV) && pv->dev)
+		log_warn("WARNING: VG %s was previously updated while PV %s was missing.", vg->name, dev_name(pv->dev));
+
+	/*
+	 * If this command writes the VG, we want the MISSING flag to be
+	 * written for this PV with no device.
+	 */
+	if (!pv->dev)
+		pv->status |= MISSING_PV;
+
+	/* is this correct? */
+	if ((pv->status & MISSING_PV) && pv->dev && (pv_mda_used_count(pv) == 0)) {
+		pv->status &= ~MISSING_PV;
+		log_info("Found a previously MISSING PV %s with no MDAs.", pv_dev_name(pv));
+	}
+
+	/* Fix up pv size if missing or impossibly large */
+	if ((!pv->size || pv->size > (1ULL << 62)) && pv->dev) {
+		if (!dev_get_size(pv->dev, &pv->size)) {
+			log_error("%s: Couldn't get size.", pv_dev_name(pv));
+			return;
+		}
+		log_verbose("Fixing up missing size (%s) for PV %s", display_size(fid->fmt->cmd, pv->size),
+			    pv_dev_name(pv));
+		size = pv->pe_count * (uint64_t) vg->extent_size + pv->pe_start;
+		if (size > pv->size)
+			log_warn("WARNING: Physical Volume %s is too large "
+				 "for underlying device", pv_dev_name(pv));
+	}
+}
+
+/*
+ * Finds the 'struct device' that correponds to each PV in the metadata,
+ * and may make some adjustments to vg fields based on the dev properties.
+ */
+void set_pv_devices(struct format_instance *fid, struct volume_group *vg)
+{
+	struct pv_list *pvl;
+
+	dm_list_iterate_items(pvl, &vg->pvs)
+		_set_pv_device(fid, vg, pvl->pv);
+}
+
 void add_pvl_to_vgs(struct volume_group *vg, struct pv_list *pvl)
 {
 	dm_list_add(&vg->pvs, &pvl->list);
@@ -370,48 +442,6 @@ int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
 	return 1;
 }
 
-static int _copy_pv(struct dm_pool *pvmem,
-		    struct physical_volume *pv_to,
-		    struct physical_volume *pv_from)
-{
-	memcpy(pv_to, pv_from, sizeof(*pv_to));
-
-	/* We must use pv_set_fid here to update the reference counter! */
-	pv_to->fid = NULL;
-	pv_set_fid(pv_to, pv_from->fid);
-
-	if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
-		return_0;
-
-	if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
-		return_0;
-
-	if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
-		return_0;
-
-	return 1;
-}
-
-static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
-{
-	struct pv_list *pvl_to = NULL;
-
-	if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
-		return_NULL;
-
-	if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
-		goto_bad;
-
-	if (!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
-		goto_bad;
-
-	return pvl_to;
-
-bad:
-	dm_pool_free(pvmem, pvl_to);
-	return NULL;
-}
-
 static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
 		    const char *pv_name, int enforce_pv_from_source)
 {
@@ -584,7 +614,7 @@ int vg_remove_check(struct volume_group *vg)
 {
 	unsigned lv_count;
 
-	if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
+	if (vg_missing_pv_count(vg)) {
 		log_error("Volume group \"%s\" not found, is inconsistent "
 			  "or has PVs missing.", vg ? vg->name : "");
 		log_error("Consider vgreduce --removemissing if metadata "
@@ -963,36 +993,6 @@ static int _vg_update_embedded_copy(struct volume_group *vg, struct volume_group
 	return 1;
 }
 
-/*
- * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
- * possible failure code or zero for success.
- */
-static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
-					    struct volume_group *vg,
-					    uint32_t failure)
-{
-	/* Never return a cached VG structure for a failure */
-	if (vg && vg->vginfo && failure != SUCCESS) {
-		release_vg(vg);
-		vg = NULL;
-	}
-
-	if (!vg && !(vg = alloc_vg("vg_make_handle", cmd, NULL)))
-		return_NULL;
-
-	vg->read_status = failure;
-
-	/*
-	 * If we hold a write lock and might be changing the VG contents, embed a pristine 
-	 * copy of the VG metadata for the activation code to use later
-	 */
-	if (vg->fid && !dm_pool_locked(vg->vgmem) && !vg->vg_committed && !is_orphan_vg(vg->name))
-		if (vg_write_lock_held() && !_vg_update_embedded_copy(vg, &vg->vg_committed))
-			vg->read_status |= FAILED_ALLOCATION;
-
-	return vg;
-}
-
 int lv_has_unknown_segments(const struct logical_volume *lv)
 {
 	struct lv_segment *seg;
@@ -1014,24 +1014,24 @@ int vg_has_unknown_segments(const struct volume_group *vg)
 	return 0;
 }
 
-struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name)
+struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, int *exists)
 {
 	uint32_t rc;
 	struct volume_group *vg;
 
 	if (!validate_name(vg_name)) {
 		log_error("Invalid vg name %s", vg_name);
-		/* FIXME: use _vg_make_handle() w/proper error code */
 		return NULL;
 	}
 
 	rc = vg_lock_newname(cmd, vg_name);
+	if (rc == FAILED_EXIST)
+		*exists = 1;
 	if (rc != SUCCESS)
-		/* NOTE: let caller decide - this may be check for existence */
-		return _vg_make_handle(cmd, NULL, rc);
+		return NULL;
 
 	vg = vg_create(cmd, vg_name);
-	if (!vg || vg_read_error(vg))
+	if (!vg)
 		unlock_vg(cmd, NULL, vg_name);
 
 	return vg;
@@ -1039,12 +1039,8 @@ struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_
 
 /*
  * Create a VG with default parameters.
- * Returns:
- * - struct volume_group* with SUCCESS code: VG structure created
- * - NULL or struct volume_group* with FAILED_* code: error creating VG structure
- * Use vg_read_error() to determine success or failure.
- * FIXME: cleanup usage of _vg_make_handle()
  */
+
 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
 {
 	struct volume_group *vg;
@@ -1084,11 +1080,10 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
 			  vg_name);
 		goto bad;
 	}
-	return _vg_make_handle(cmd, vg, SUCCESS);
+	return vg;
 
 bad:
 	unlock_and_release_vg(cmd, vg, vg_name);
-	/* FIXME: use _vg_make_handle() w/proper error code */
 	return NULL;
 }
 
@@ -2807,57 +2802,6 @@ static int _pv_in_pv_list(struct physical_volume *pv, struct dm_list *head)
 	return 0;
 }
 
-/*
- * Check if any of the PVs in VG still contain old PV headers
- * and if yes, schedule them for PV header update.
- */
-static int _vg_update_old_pv_ext_if_needed(struct volume_group *vg)
-{
-	struct pv_list *pvl, *new_pvl;
-	int pv_needs_rewrite;
-
-	if (!(vg->fid->fmt->features & FMT_PV_FLAGS))
-		return 1;
-
-	dm_list_iterate_items(pvl, &vg->pvs) {
-		if (is_missing_pv(pvl->pv) ||
-		    !pvl->pv->fmt->ops->pv_needs_rewrite)
-			continue;
-
-		if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list))
-			continue;
-
-		if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv,
-							 &pv_needs_rewrite))
-			return_0;
-
-		if (pv_needs_rewrite) {
-			/*
-			 * Schedule PV for writing only once!
-			 */
-			if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list))
-				continue;
-
-			if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl)))) {
-				log_error("pv_to_write allocation for '%s' failed", pv_dev_name(pvl->pv));
-				return 0;
-			}
-			new_pvl->pv = pvl->pv;
-			dm_list_add(&vg->pv_write_list, &new_pvl->list);
-			log_debug("PV %s has old extension header, updating to newest version.",
-				  pv_dev_name(pvl->pv));
-		}
-	}
-
-	if (!dm_list_empty(&vg->pv_write_list) &&
-	    (!vg_write(vg) || !vg_commit(vg))) {
-		log_error("Failed to update old PV extension headers in VG %s.", vg->name);
-		return 0;
-	}
-
-	return 1;
-}
-
 static int _check_historical_lv_is_valid(struct historical_logical_volume *hlv)
 {
 	struct glv_list *glvl;
@@ -2922,6 +2866,69 @@ static int _handle_historical_lvs(struct volume_group *vg)
 	return 1;
 }
 
+static void _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg)
+{
+	struct dm_list devs;
+	struct dm_list *mdas = NULL;
+	struct device_list *devl;
+	struct device *dev;
+	struct metadata_area *mda;
+	struct label *label;
+	struct lvmcache_info *info;
+	uint32_t ext_flags;
+
+	dm_list_init(&devs);
+
+	/*
+	 * When vg_read selected a good copy of the metadata, it used it to
+	 * update the lvmcache representation of the VG (lvmcache_update_vg).
+	 * At that point outdated PVs were recognized and moved into the
+	 * vginfo->outdated_infos list.  Here we clear the PVs on that list.
+	 */
+
+	lvmcache_get_outdated_devs(cmd, vg->name, (const char *)&vg->id, &devs);
+
+	dm_list_iterate_items(devl, &devs) {
+		dev = devl->dev;
+
+		lvmcache_get_outdated_mdas(cmd, vg->name, (const char *)&vg->id, dev, &mdas);
+
+		if (mdas) {
+			dm_list_iterate_items(mda, mdas) {
+				log_warn("WARNING: wiping mda on outdated PV %s", dev_name(dev));
+
+				if (!text_wipe_outdated_pv_mda(cmd, dev, mda))
+					log_warn("WARNING: failed to wipe mda on outdated PV %s", dev_name(dev));
+			}
+		}
+
+		if (!(label = lvmcache_get_dev_label(dev))) {
+			log_error("_wipe_outdated_pvs no label for %s", dev_name(dev));
+			continue;
+		}
+
+		info = label->info;
+		ext_flags = lvmcache_ext_flags(info);
+		ext_flags &= ~PV_EXT_USED;
+		lvmcache_set_ext_version(info, PV_HEADER_EXTENSION_VSN);
+		lvmcache_set_ext_flags(info, ext_flags);
+
+		log_warn("WARNING: wiping header on outdated PV %s", dev_name(dev));
+
+		if (!label_write(dev, label))
+			log_warn("WARNING: failed to wipe header on outdated PV %s", dev_name(dev));
+
+		lvmcache_del(info);
+	}
+
+	/*
+	 * A vgremove will involve many vg_write() calls (one for each lv
+	 * removed) but we only need to wipe pvs once, so clear the outdated
+	 * list so it won't be wiped again.
+	 */
+	lvmcache_del_outdated_devs(cmd, vg->name, (const char *)&vg->id);
+}
+
 /*
  * After vg_write() returns success,
  * caller MUST call either vg_commit() or vg_revert()
@@ -2929,9 +2936,10 @@ static int _handle_historical_lvs(struct volume_group *vg)
 int vg_write(struct volume_group *vg)
 {
 	struct dm_list *mdah;
-	struct pv_list *pvl, *pvl_safe;
+	struct pv_list *pvl, *pvl_safe, *new_pvl;
 	struct metadata_area *mda;
 	struct lv_list *lvl;
+	struct device *mda_dev;
 	int revert = 0, wrote = 0;
 
 	if (vg_is_shared(vg)) {
@@ -2986,6 +2994,9 @@ int vg_write(struct volume_group *vg)
 		return 0;
 	}
 
+	if (vg->cmd->wipe_outdated_pvs)
+		_wipe_outdated_pvs(vg->cmd, vg);
+
 	if (critical_section())
 		log_error(INTERNAL_ERROR
 			  "Writing metadata in critical section.");
@@ -2994,6 +3005,26 @@ int vg_write(struct volume_group *vg)
 	memlock_unlock(vg->cmd);
 	vg->seqno++;
 
+	dm_list_iterate_items(pvl, &vg->pvs) {
+		int update_pv_header = 0;
+
+		if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list))
+			continue;
+
+		if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv, &update_pv_header))
+			continue;
+
+		if (!update_pv_header)
+			continue;
+
+		if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl))))
+			continue;
+
+		new_pvl->pv = pvl->pv;
+		dm_list_add(&vg->pv_write_list, &new_pvl->list);
+		log_warn("WARNING: updating PV header on %s for VG %s.", pv_dev_name(pvl->pv), vg->name);
+	}
+
 	dm_list_iterate_items_safe(pvl, pvl_safe, &vg->pv_write_list) {
 		if (!pv_write(vg->cmd, pvl->pv, 1))
 			return_0;
@@ -3002,8 +3033,27 @@ int vg_write(struct volume_group *vg)
 
 	/* Write to each copy of the metadata area */
 	dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
+		mda_dev = mda_get_device(mda);
+
 		if (mda->status & MDA_FAILED)
 			continue;
+
+		/*
+		 * When the scan and vg_read find old metadata in an mda, they
+		 * leave the info struct in lvmcache, and leave the mda in
+		 * info->mdas.  That means we use the mda here to write new
+		 * metadata into.  This means that a command writing a VG will
+		 * automatically update old metadata to the latest.
+		 *
+		 * This can also happen if the metadata was ignored on this
+		 * dev, and then it's later changed to not ignored, and
+		 * we see the old metadata.
+		 */
+		if (lvmcache_has_old_metadata(vg->cmd, vg->name, (const char *)&vg->id, mda_dev)) {
+			log_warn("WARNING: updating old metadata to %u on %s for VG %s.",
+				 vg->seqno, dev_name(mda_dev), vg->name);
+		}
+
 		if (!mda->ops->vg_write) {
 			log_error("Format does not support writing volume"
 				  "group metadata areas");
@@ -3072,6 +3122,7 @@ static int _vg_commit_mdas(struct volume_group *vg)
 	struct metadata_area *mda, *tmda;
 	struct dm_list ignored;
 	int failed = 0;
+	int good = 0;
 	int cache_updated = 0;
 
 	/* Rearrange the metadata_areas_in_use so ignored mdas come first. */
@@ -3092,27 +3143,31 @@ static int _vg_commit_mdas(struct volume_group *vg)
 		    !mda->ops->vg_commit(vg->fid, vg, mda)) {
 			stack;
 			failed = 1;
-		}
+		} else
+			good++;
+
 		/* Update cache first time we succeed */
 		if (!failed && !cache_updated) {
-			lvmcache_update_vg(vg, 0);
+			lvmcache_update_vg_from_write(vg);
 			cache_updated = 1;
 		}
 	}
-	return cache_updated;
+	if (good)
+		return 1;
+	return 0;
 }
 
 /* Commit pending changes */
 int vg_commit(struct volume_group *vg)
 {
-	int cache_updated = 0;
 	struct pv_list *pvl;
+	int ret;
 
-	cache_updated = _vg_commit_mdas(vg);
+	ret = _vg_commit_mdas(vg);
 
 	set_vg_notify(vg->cmd);
 
-	if (cache_updated) {
+	if (ret) {
 		/*
 		 * We need to clear old_name after a successful commit.
 		 * The volume_group structure could be reused later.
@@ -3126,7 +3181,7 @@ int vg_commit(struct volume_group *vg)
 	}
 
 	/* If at least one mda commit succeeded, it was committed */
-	return cache_updated;
+	return ret;
 }
 
 /* Don't commit any pending changes */
@@ -3152,14 +3207,6 @@ void vg_revert(struct volume_group *vg)
 	}
 }
 
-static int _check_mda_in_use(struct metadata_area *mda, void *_in_use)
-{
-	int *in_use = _in_use;
-	if (!mda_is_ignored(mda))
-		*in_use = 1;
-	return 1;
-}
-
 struct _vg_read_orphan_baton {
 	struct cmd_context *cmd;
 	struct volume_group *vg;
@@ -3197,6 +3244,14 @@ struct _vg_read_orphan_baton {
  */
 
 #if 0
+static int _check_mda_in_use(struct metadata_area *mda, void *_in_use)
+{
+	int *in_use = _in_use;
+	if (!mda_is_ignored(mda))
+		*in_use = 1;
+	return 1;
+}
+
 static int _check_or_repair_orphan_pv_ext(struct physical_volume *pv,
 					  struct lvmcache_info *info,
 					  struct _vg_read_orphan_baton *b)
@@ -3331,9 +3386,7 @@ static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton)
 }
 
 /* Make orphan PVs look like a VG. */
-struct volume_group *vg_read_orphans(struct cmd_context *cmd,
-					     uint32_t warn_flags,
-					     const char *orphan_vgname)
+struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname)
 {
 	const struct format_type *fmt;
 	struct lvmcache_vginfo *vginfo;
@@ -3394,40 +3447,6 @@ struct volume_group *vg_read_orphans(struct cmd_context *cmd,
 	return vg;
 }
 
-static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
-{
-	struct pv_list *pvl, *pvl2;
-
-	dm_list_iterate_items(pvl, &vg->pvs) {
-		dm_list_iterate_items(pvl2, all_pvs) {
-			if (pvl->pv->dev == pvl2->pv->dev)
-				goto next_pv;
-		}
-
-		/*
-		 * PV is not on list so add it.
-		 */
-		if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
-			log_error("pv_list allocation for '%s' failed",
-				  pv_dev_name(pvl->pv));
-			return 0;
-		}
-		dm_list_add(all_pvs, &pvl2->list);
-  next_pv:
-		;
-	}
-
-	return 1;
-}
-
-static void _free_pv_list(struct dm_list *all_pvs)
-{
-	struct pv_list *pvl;
-
-	dm_list_iterate_items(pvl, all_pvs)
-		pvl->pv->fid->fmt->ops->destroy_instance(pvl->pv->fid);
-}
-
 static void _destroy_fid(struct format_instance **fid)
 {
 	if (*fid) {
@@ -3447,812 +3466,49 @@ int vg_missing_pv_count(const struct volume_group *vg)
 	return ret;
 }
 
-static int _check_reappeared_pv(struct volume_group *correct_vg,
-				struct physical_volume *pv, int act)
-{
-	struct pv_list *pvl;
-	int rv = 0;
-
-        /*
-         * Skip these checks in case the tool is going to deal with missing
-         * PVs, especially since the resulting messages can be pretty
-         * confusing.
-         */
-        if (correct_vg->cmd->handles_missing_pvs)
-		return rv;
-
-	/*
-	 * Skip this if there is no underlying device present for this PV.
-	 */
-	if (!pv->dev)
-		return rv;
-
-	dm_list_iterate_items(pvl, &correct_vg->pvs)
-		if (pv->dev == pvl->pv->dev && is_missing_pv(pvl->pv)) {
-			if (act)
-				log_warn("WARNING: Missing device %s reappeared, updating "
-					 "metadata for VG %s to version %u.",
-					 pv_dev_name(pvl->pv),  pv_vg_name(pvl->pv), 
-					 correct_vg->seqno);
-			if (pvl->pv->pe_alloc_count == 0) {
-				if (act) {
-					pv->status &= ~MISSING_PV;
-					pvl->pv->status &= ~MISSING_PV;
-				}
-				++ rv;
-			} else if (act)
-				log_warn("WARNING: Device %s still marked missing because of allocated data "
-					 "on it, remove volumes and consider vgreduce --removemissing.",
-					 pv_dev_name(pvl->pv));
-		}
-
-	return rv;
-}
-
 static int _is_foreign_vg(struct volume_group *vg)
 {
 	return vg->cmd->system_id && strcmp(vg->system_id, vg->cmd->system_id);
 }
 
-static int _repair_inconsistent_vg(struct volume_group *vg, uint32_t lockd_state)
-{
-	unsigned saved_handles_missing_pvs = vg->cmd->handles_missing_pvs;
-
-	if (lvmcache_found_duplicate_pvs()) {
-		log_debug_metadata("Skip metadata repair with duplicates.");
-		return 0;
-	}
-
-	/* Cannot write foreign VGs, the owner will repair it. */
-	if (_is_foreign_vg(vg)) {
-		log_verbose("Skip metadata repair for foreign VG.");
-		return 0;
-	}
-
-	if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) {
-		log_verbose("Skip metadata repair for shared VG without exclusive lock.");
-		return 0;
-	}
-
-	log_warn("WARNING: Inconsistent metadata found for VG %s - updating to use version %u", vg->name, vg->seqno);
-
-	vg->cmd->handles_missing_pvs = 1;
-	if (!vg_write(vg)) {
-		log_error("Automatic metadata correction failed");
-		vg->cmd->handles_missing_pvs = saved_handles_missing_pvs;
-		return 0;
-	}
-
-	vg->cmd->handles_missing_pvs = saved_handles_missing_pvs;
-
-	if (!vg_commit(vg)) {
-		log_error("Automatic metadata correction commit failed");
-		return 0;
-	}
-
-	return 1;
-}
-
-static int _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg, struct dm_list *to_check, uint32_t lockd_state)
-{
-	struct pv_list *pvl, *pvl2;
-	char uuid[64] __attribute__((aligned(8)));
-
-	if (lvmcache_found_duplicate_pvs()) {
-		log_debug_metadata("Skip wiping outdated PVs with duplicates.");
-		return 0;
-	}
-
-	/*
-	 * Cannot write foreign VGs, the owner will repair it.
-	 * Also, if another host is updating its VG, we may read
-	 * the PVs while some are written but not others, making
-	 * some PVs look outdated to us just because we're reading
-	 * the VG while it's only partially written out.
-	 */
-	if (_is_foreign_vg(vg)) {
-		log_debug_metadata("Skip wiping outdated PVs for foreign VG.");
-		return 0;
-	}
-
-	if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) {
-		log_verbose("Skip wiping outdated PVs for shared VG without exclusive lock.");
-		return 0;
-	}
-
-	dm_list_iterate_items(pvl, to_check) {
-		dm_list_iterate_items(pvl2, &vg->pvs) {
-			if (pvl->pv->dev == pvl2->pv->dev)
-				goto next_pv;
-		}
-
-
-		if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid)))
-			return_0;
-		log_warn("WARNING: Removing PV %s (%s) that no longer belongs to VG %s",
-			 pv_dev_name(pvl->pv), uuid, vg->name);
-		if (!pv_write_orphan(cmd, pvl->pv))
-			return_0;
-next_pv:
-		;
-	}
-	return 1;
-}
-
-static int _check_or_repair_pv_ext(struct cmd_context *cmd,
-				   struct volume_group *vg,
-				   uint32_t lockd_state,
-				   int repair, int *inconsistent_pvs)
+static int _check_pv_ext(struct cmd_context *cmd, struct volume_group *vg)
 {
-	char uuid[64] __attribute__((aligned(8)));
 	struct lvmcache_info *info;
 	uint32_t ext_version, ext_flags;
 	struct pv_list *pvl;
-	unsigned pvs_fixed = 0;
-	int r = 0;
 
-	*inconsistent_pvs = 0;
+	if (_is_foreign_vg(vg))
+		return 1;
+
+	if (vg_is_shared(vg))
+		return 1;
 
 	dm_list_iterate_items(pvl, &vg->pvs) {
-		/* Missing PV - nothing to do. */
 		if (is_missing_pv(pvl->pv))
 			continue;
 
-		if (!pvl->pv->dev) {
-			/* is_missing_pv doesn't catch NULL dev */
-			memset(&uuid, 0, sizeof(uuid));
-			if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid)))
-				goto_out;
-			log_warn("WARNING: Not repairing PV %s with missing device.", uuid);
+		/* is_missing_pv doesn't catch NULL dev */
+		if (!pvl->pv->dev)
 			continue;
-		}
 
-		if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0))) {
-			log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv));
-			goto out;
-		}
+		if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0)))
+			continue;
 
 		ext_version = lvmcache_ext_version(info);
-		if (ext_version < 2)
+		if (ext_version < PV_HEADER_EXTENSION_VSN) {
+			log_warn("WARNING: PV %s in VG %s is using an old PV header, modify the VG to update.",
+				 dev_name(pvl->pv->dev), vg->name);
 			continue;
+		}
 
 		ext_flags = lvmcache_ext_flags(info);
 		if (!(ext_flags & PV_EXT_USED)) {
-			if (!repair) {
-				*inconsistent_pvs = 1;
-				/* we're not repairing now, so no need to
-				 * check further PVs - inconsistent_pvs is already
-				 * set and that will trigger the repair next time */
-				return 1;
-			}
-
-			if (_is_foreign_vg(vg)) {
-				log_verbose("Skip repair of PV %s that is in foreign "
-					    "VG %s but not marked as used.",
-					    pv_dev_name(pvl->pv), vg->name);
-				*inconsistent_pvs = 1;
-			} else if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) {
-				log_warn("Skip repair of PV %s that is in shared "
-					    "VG %s but not marked as used.",
-					    pv_dev_name(pvl->pv), vg->name);
-				*inconsistent_pvs = 1;
-			} else {
-				log_warn("WARNING: Repairing Physical Volume %s that is "
-					 "in Volume Group %s but not marked as used.",
-					  pv_dev_name(pvl->pv), vg->name);
-
-				/* pv write will set correct ext_flags */
-				if (!pv_write(cmd, pvl->pv, 1)) {
-					*inconsistent_pvs = 1;
-					log_error("Failed to repair physical volume \"%s\".",
-						  pv_dev_name(pvl->pv));
-					goto out;
-				}
-				pvs_fixed++;
-			}
-		}
-	}
-
-	r = 1;
-out:
-	if ((pvs_fixed > 0) && !_repair_inconsistent_vg(vg, lockd_state))
-		return_0;
-
-	return r;
-}
-
-/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
- * inconsistent metadata on disk (i.e. the VG write lock is held).
- * This guarantees only consistent metadata is returned.
- * If consistent is 0, caller must check whether consistent == 1 on return
- * and take appropriate action if it isn't (e.g. abort; get write lock
- * and call vg_read_internal again).
- *
- * If precommitted is set, use precommitted metadata if present.
- *
- * Either of vgname or vgid may be NULL.
- *
- * Note: vginfo structs must not be held or used as parameters
- *       across the call to this function.
- */
-static struct volume_group *_vg_read(struct cmd_context *cmd,
-				     const char *vgname,
-				     const char *vgid,
-				     uint32_t lockd_state, 
-				     uint32_t warn_flags, 
-				     int enable_repair,
-				     int *mdas_consistent,
-				     unsigned precommitted)
-{
-	struct format_instance *fid = NULL;
-	struct format_instance_ctx fic;
-	const struct format_type *fmt;
-	struct volume_group *vg, *correct_vg = NULL;
-	struct metadata_area *mda;
-	struct lvmcache_info *info;
-	int inconsistent = 0;
-	int inconsistent_vgid = 0;
-	int inconsistent_pvs = 0;
-	int inconsistent_mdas = 0;
-	int inconsistent_mda_count = 0;
-	int strip_historical_lvs = enable_repair;
-	int update_old_pv_ext = enable_repair;
-	unsigned use_precommitted = precommitted;
-	struct dm_list *pvids;
-	struct pv_list *pvl;
-	struct dm_list all_pvs;
-	char uuid[64] __attribute__((aligned(8)));
-	int skipped_rescan = 0;
-	struct cached_vg_fmtdata *vg_fmtdata = NULL;	/* Additional format-specific data about the vg */
-	unsigned use_previous_vg;
-
-	*mdas_consistent = 1;
-
-	if (is_orphan_vg(vgname)) {
-		log_very_verbose("Reading VG %s", vgname);
-
-		if (use_precommitted) {
-			log_error(INTERNAL_ERROR "vg_read_internal requires vgname "
-				  "with pre-commit.");
-			return NULL;
-		}
-		return vg_read_orphans(cmd, warn_flags, vgname);
-	}
-
-	uuid[0] = '\0';
-	if (vgid && !id_write_format((const struct id*)vgid, uuid, sizeof(uuid)))
-		stack;
-
-	log_very_verbose("Reading VG %s %s", vgname ?: "<no name>", vgid ? uuid : "<no vgid>");
-
-	/*
-	 * Rescan the devices that are associated with this vg in lvmcache.
-	 * This repeats what was done by the command's initial label scan,
-	 * but only the devices associated with this VG.
-	 *
-	 * The lvmcache info about these devs is from the initial label scan
-	 * performed by the command before the vg lock was held.  Now the VG
-	 * lock is held, so we rescan all the info from the devs in case
-	 * something changed between the initial scan and now that the lock
-	 * is held.
-	 *
-	 * Some commands (e.g. reporting) are fine reporting data read by
-	 * the label scan.  It doesn't matter if the devs changed between
-	 * the label scan and here, we can report what was seen in the
-	 * scan, even though it is the old state, since we will not be
-	 * making any modifications.  If the VG was being modified during
-	 * the scan, and caused us to see inconsistent metadata on the
-	 * different PVs in the VG, then we do want to rescan the devs
-	 * here to get a consistent view of the VG.  Note that we don't
-	 * know if the scan found all the PVs in the VG at this point.
-	 * We don't know that until vg_read looks at the list of PVs in
-	 * the metadata and compares that to the devices found by the scan.
-	 *
-	 * It's possible that a change made to the VG during scan was
-	 * adding or removing a PV from the VG.  In this case, the list
-	 * of devices associated with the VG in lvmcache would change
-	 * due to the rescan.
-	 *
-	 * The devs in the VG may be persistently inconsistent due to some
-	 * previous problem.  In this case, rescanning the labels here will
-	 * find the same inconsistency.  The VG repair (mistakenly done by
-	 * vg_read below) is supposed to fix that.
-	 *
-	 * FIXME: sort out the usage of the global lock (which is mixed up
-	 * with the orphan lock), and when we can tell that the global
-	 * lock is taken prior to the label scan, and still held here,
-	 * we can also skip the rescan in that case.
-	 */
-	if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) {
-		/* the skip rescan special case is for clvmd vg_read_by_vgid */
-		/* FIXME: this is not a warn flag, pass this differently */
-		if (warn_flags & SKIP_RESCAN)
-			goto find_vg;
-		skipped_rescan = 0;
-		log_debug_metadata("Rescanning devices for %s", vgname);
-		lvmcache_label_rescan_vg(cmd, vgname, vgid);
-	} else {
-		log_debug_metadata("Skipped rescanning devices for %s", vgname);
-		skipped_rescan = 1;
-	}
-
- find_vg:
-
-	if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) {
-		log_debug_metadata("Cache did not find fmt for vgname %s", vgname);
-		return_NULL;
-	}
-
-	/* Now determine the correct vgname if none was supplied */
-	if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) {
-		log_debug_metadata("Cache did not find VG name from vgid %s", uuid);
-		return_NULL;
-	}
-
-	/* Determine the correct vgid if none was supplied */
-	if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) {
-		log_debug_metadata("Cache did not find VG vgid from name %s", vgname);
-		return_NULL;
-	}
-
-	if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
-		use_precommitted = 0;
-
-	/*
-	 * A "format instance" is an abstraction for a VG location,
-	 * i.e. where a VG's metadata exists on disk.
-	 *
-	 * An fic (format_instance_ctx) is a temporary struct used
-	 * to create an fid (format_instance).  The fid hangs around
-	 * and is used to create a 'vg' to which it connected (vg->fid).
-	 *
-	 * The 'fic' describes a VG in terms of fmt/name/id.
-	 *
-	 * The 'fid' describes a VG in more detail than the fic,
-	 * holding information about where to find the VG metadata.
-	 *
-	 * The 'vg' describes the VG in the most detail representing
-	 * all the VG metadata.
-	 *
-	 * The fic and fid are set up by create_instance() to describe
-	 * the VG location.  This happens before the VG metadata is
-	 * assembled into the more familiar struct volume_group "vg".
-	 *
-	 * The fid has one main purpose: to keep track of the metadata
-	 * locations for a given VG.  It does this by putting 'mda'
-	 * structs on fid->metadata_areas_in_use, which specify where
-	 * metadata is located on disk.  It gets this information
-	 * (metadata locations for a specific VG) from the command's
-	 * initial label scan.  The info is passed indirectly via
-	 * lvmcache info/vginfo structs, which are created by the
-	 * label scan and then copied into fid by create_instance().
-	 */
-
-	/* create format instance with appropriate metadata area */
-	fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS;
-	fic.context.vg_ref.vg_name = vgname;
-	fic.context.vg_ref.vg_id = vgid;
-	if (!(fid = fmt->ops->create_instance(fmt, &fic))) {
-		log_error("Failed to create format instance");
-		return NULL;
-	}
-
-	/* Store pvids for later so we can check if any are missing */
-	if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) {
-		_destroy_fid(&fid);
-		return_NULL;
-	}
-
-	/*
-	 * We use the fid globally here so prevent the release_vg
-	 * call to destroy the fid - we may want to reuse it!
-	 */
-	fid->ref_count++;
-	/* Ensure contents of all metadata areas match - else do recovery */
-	inconsistent_mda_count=0;
-	dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
-		struct device *mda_dev = mda_get_device(mda);
-
-		use_previous_vg = 0;
-
-		log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev));
-
-		if ((use_precommitted &&
-		     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
-		    (!use_precommitted &&
-		     !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) {
-			inconsistent = 1;
-			vg_fmtdata = NULL;
-			continue;
-		}
-
-		/* Use previous VG because checksum matches */
-		if (!vg) {
-			vg = correct_vg;
-			continue;
-		}
-
-		if (!correct_vg) {
-			correct_vg = vg;
-			continue;
-		}
-
-		/* FIXME Also ensure contents same - checksum compare? */
-		if (correct_vg->seqno != vg->seqno) {
-			if (cmd->metadata_read_only || skipped_rescan)
-				log_warn("Not repairing metadata for VG %s.", vgname);
-			else
-				inconsistent = 1;
-
-			if (vg->seqno > correct_vg->seqno) {
-				release_vg(correct_vg);
-				correct_vg = vg;
-			} else {
-				mda->status |= MDA_INCONSISTENT;
-				++inconsistent_mda_count;
-			}
-		}
-
-		if (vg != correct_vg) {
-			release_vg(vg);
-			vg_fmtdata = NULL;
-		}
-	}
-	fid->ref_count--;
-
-	/* Ensure every PV in the VG was in the cache */
-	if (correct_vg) {
-		/*
-		 * Update the seqno from the cache, for the benefit of
-		 * retro-style metadata formats like LVM1.
-		 */
-		// correct_vg->seqno = seqno > correct_vg->seqno ? seqno : correct_vg->seqno;
-
-		/*
-		 * If the VG has PVs without mdas, or ignored mdas, they may
-		 * still be orphans in the cache: update the cache state here,
-		 * and update the metadata lists in the vg.
-		 */
-		if (!inconsistent &&
-		    dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
-			dm_list_iterate_items(pvl, &correct_vg->pvs) {
-				if (!pvl->pv->dev) {
-					inconsistent_pvs = 1;
-					break;
-				}
-
-				if (str_list_match_item(pvids, pvl->pv->dev->pvid))
-					continue;
-
-				/*
-				 * PV not marked as belonging to this VG in cache.
-				 * Check it's an orphan without metadata area
-				 * not ignored.
-				 */
-				if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 1)) ||
-				    !lvmcache_is_orphan(info)) {
-					inconsistent_pvs = 1;
-					break;
-				}
-
-				if (lvmcache_mda_count(info)) {
-					if (!lvmcache_fid_add_mdas_pv(info, fid)) {
-						release_vg(correct_vg);
-						return_NULL;
-					}
-
-					log_debug_metadata("Empty mda found for VG %s on %s.",
-							   vgname, dev_name(pvl->pv->dev));
-
-#if 0
-					/*
-					 * If we are going to do any repair we have to be using 
-					 * the latest metadata on disk, so we have to rescan devs
-					 * if we skipped that at the start of the vg_read.  We'll
-					 * likely come back through here, but without having
-					 * skipped_rescan.
-					 *
-					 * FIXME: in some cases we don't want to do this.
-					 */
-					if (skipped_rescan && cmd->can_use_one_scan) {
-						log_debug_metadata("Restarting read to rescan devs.");
-						cmd->can_use_one_scan = 0;
-						release_vg(correct_vg);
-						correct_vg = NULL;
-						lvmcache_del(info);
-						label_read(pvl->pv->dev);
-						goto restart_scan;
-					}
-#endif
-
-					if (inconsistent_mdas)
-						continue;
-
-					/*
-					 * If any newly-added mdas are in-use then their
-					 * metadata needs updating.
-					 */
-					lvmcache_foreach_mda(info, _check_mda_in_use,
-							     &inconsistent_mdas);
-				}
-			}
-
-			/* If the check passed, let's update VG and recalculate pvids */
-			if (!inconsistent_pvs) {
-				log_debug_metadata("Updating cache for PVs without mdas "
-						   "in VG %s.", vgname);
-				/*
-				 * If there is no precommitted metadata, committed metadata
-				 * is read and stored in the cache even if use_precommitted is set
-				 */
-				lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED);
-
-				if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) {
-					release_vg(correct_vg);
-					return_NULL;
-				}
-			}
-		}
-
-		fid->ref_count++;
-		if (dm_list_size(&correct_vg->pvs) !=
-		    dm_list_size(pvids) + vg_missing_pv_count(correct_vg)) {
-			log_debug_metadata("Cached VG %s had incorrect PV list",
-					   vgname);
-
-			if (prioritized_section())
-				inconsistent = 1;
-			else {
-				release_vg(correct_vg);
-				correct_vg = NULL;
-			}
-		} else dm_list_iterate_items(pvl, &correct_vg->pvs) {
-			if (is_missing_pv(pvl->pv))
-				continue;
-			if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
-				log_debug_metadata("Cached VG %s had incorrect PV list",
-						   vgname);
-				release_vg(correct_vg);
-				correct_vg = NULL;
-				break;
-			}
-		}
-
-		if (correct_vg && inconsistent_mdas) {
-			release_vg(correct_vg);
-			correct_vg = NULL;
-		}
-		fid->ref_count--;
-	}
-
-	dm_list_init(&all_pvs);
-
-	/* Failed to find VG where we expected it - full scan and retry */
-	if (!correct_vg) {
-		/*
-		 * Free outstanding format instance that remained unassigned
-		 * from previous step where we tried to get the "correct_vg",
-		 * but we failed to do so (so there's a dangling fid now).
-		 */
-		_destroy_fid(&fid);
-		vg_fmtdata = NULL;
-
-		inconsistent = 0;
-
-		if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0)))
-			return_NULL;
-
-		if (precommitted && !(fmt->features & FMT_PRECOMMIT))
-			use_precommitted = 0;
-
-		/* create format instance with appropriate metadata area */
-		fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS;
-		fic.context.vg_ref.vg_name = vgname;
-		fic.context.vg_ref.vg_id = vgid;
-		if (!(fid = fmt->ops->create_instance(fmt, &fic))) {
-			log_error("Failed to create format instance");
-			return NULL;
-		}
-
-		/*
-		 * We use the fid globally here so prevent the release_vg
-		 * call to destroy the fid - we may want to reuse it!
-		*/
-		fid->ref_count++;
-		/* Ensure contents of all metadata areas match - else recover */
-		inconsistent_mda_count=0;
-		dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
-			use_previous_vg = 0;
-
-			if ((use_precommitted &&
-			     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
-			    (!use_precommitted &&
-			     !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) {
-				inconsistent = 1;
-				vg_fmtdata = NULL;
-				continue;
-			}
-
-			/* Use previous VG because checksum matches */
-			if (!vg) {
-				vg = correct_vg;
-				continue;
-			}
-
-			if (!correct_vg) {
-				correct_vg = vg;
-				if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
-					_free_pv_list(&all_pvs);
-					fid->ref_count--;
-					release_vg(vg);
-					return_NULL;
-				}
-				continue;
-			}
-
-			if (!id_equal(&vg->id, &correct_vg->id)) {
-				inconsistent = 1;
-				inconsistent_vgid = 1;
-			}
-
-			/* FIXME Also ensure contents same - checksums same? */
-			if (correct_vg->seqno != vg->seqno) {
-				/* Ignore inconsistent seqno if told to skip repair logic */
-				if (cmd->metadata_read_only || skipped_rescan)
-					log_warn("Not repairing metadata for VG %s.", vgname);
-				else
-					inconsistent = 1;
-
-				if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
-					_free_pv_list(&all_pvs);
-					fid->ref_count--;
-					release_vg(vg);
-					release_vg(correct_vg);
-					return_NULL;
-				}
-				if (vg->seqno > correct_vg->seqno) {
-					release_vg(correct_vg);
-					correct_vg = vg;
-				} else {
-					mda->status |= MDA_INCONSISTENT;
-					++inconsistent_mda_count;
-				}
-			}
-
-			if (vg != correct_vg) {
-				release_vg(vg);
-				vg_fmtdata = NULL;
-			}
-		}
-		fid->ref_count--;
-
-		/* Give up looking */
-		if (!correct_vg) {
-			_free_pv_list(&all_pvs);
-			_destroy_fid(&fid);
-			return_NULL;
-		}
-	}
-
-	/*
-	 * If there is no precommitted metadata, committed metadata
-	 * is read and stored in the cache even if use_precommitted is set
-	 */
-	lvmcache_update_vg(correct_vg, (correct_vg->status & PRECOMMITTED));
-
-	if (inconsistent) {
-		/* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
-		if (use_precommitted) {
-			log_error("Inconsistent pre-commit metadata copies "
-				  "for volume group %s", vgname);
-
-			/*
-			 * Check whether all of the inconsistent MDAs were on
-			 * MISSING PVs -- in that case, we should be safe.
-			 */
-			dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
-				if (mda->status & MDA_INCONSISTENT) {
-					log_debug_metadata("Checking inconsistent MDA: %s", dev_name(mda_get_device(mda)));
-					dm_list_iterate_items(pvl, &correct_vg->pvs) {
-						if (mda_get_device(mda) == pvl->pv->dev &&
-						    (pvl->pv->status & MISSING_PV))
-							--inconsistent_mda_count;
-					}
-				}
-			}
-
-			if (inconsistent_mda_count < 0)
-				log_error(INTERNAL_ERROR "Too many inconsistent MDAs.");
-
-			if (!inconsistent_mda_count) {
-				_free_pv_list(&all_pvs);
-				return correct_vg;
-			}
-			_free_pv_list(&all_pvs);
-			release_vg(correct_vg);
-			return NULL;
-		}
-
-		if (!enable_repair) {
-			_free_pv_list(&all_pvs);
-			*mdas_consistent = 0;
-			return correct_vg;
-		}
-
-		if (skipped_rescan) {
-			log_warn("Not repairing metadata for VG %s.", vgname);
-			_free_pv_list(&all_pvs);
-			release_vg(correct_vg);
-			return_NULL;
-		}
-
-		/* Don't touch if vgids didn't match */
-		if (inconsistent_vgid) {
-			log_warn("WARNING: Inconsistent metadata UUIDs found for volume group %s.", vgname);
-			_free_pv_list(&all_pvs);
-			*mdas_consistent = 0;
-			return correct_vg;
-		}
-
-		/*
-		 * If PV is marked missing but we found it,
-		 * update metadata and remove MISSING flag
-		 */
-		dm_list_iterate_items(pvl, &all_pvs)
-			_check_reappeared_pv(correct_vg, pvl->pv, 1);
-
-		if (!_repair_inconsistent_vg(correct_vg, lockd_state)) {
-			_free_pv_list(&all_pvs);
-			release_vg(correct_vg);
-			return NULL;
-		}
-
-		if (!_wipe_outdated_pvs(cmd, correct_vg, &all_pvs, lockd_state)) {
-			_free_pv_list(&all_pvs);
-			release_vg(correct_vg);
-			return_NULL;
-		}
-	}
-
-	_free_pv_list(&all_pvs);
-
-	if (vg_missing_pv_count(correct_vg)) {
-		log_verbose("There are %d physical volumes missing.",
-			    vg_missing_pv_count(correct_vg));
-		vg_mark_partial_lvs(correct_vg, 1);
-	}
-
-	if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
-		log_error("Interrupted pvmove detected in volume group %s.",
-			  correct_vg->name);
-		log_print("Please restore the metadata by running vgcfgrestore.");
-		release_vg(correct_vg);
-		return NULL;
-	}
-
-	/* We have the VG now finally, check if PV ext info is in sync with VG metadata. */
-	if (!_check_or_repair_pv_ext(cmd, correct_vg, lockd_state, skipped_rescan ? 0 : enable_repair,
-				     &inconsistent_pvs)) {
-		release_vg(correct_vg);
-		return_NULL;
-	}
-
-	if (correct_vg && enable_repair && !skipped_rescan) {
-		if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) {
-			release_vg(correct_vg);
-			return_NULL;
-		}
-
-		if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) {
-			release_vg(correct_vg);
-			return_NULL;
+			log_warn("WARNING: PV %s in VG %s is missing the used flag in PV header.",
+				 dev_name(pvl->pv->dev), vg->name);
 		}
 	}
 
-	if (inconsistent_pvs)
-		*mdas_consistent = 0;
-
-	return correct_vg;
+	return 1;
 }
 
 #define DEV_LIST_DELIM ", "
@@ -4333,7 +3589,7 @@ static int _check_devs_used_correspond_with_lv(struct dm_pool *mem, struct dm_li
 	return 1;
 }
 
-static int _check_devs_used_correspond_with_vg(struct volume_group *vg)
+static void _check_devs_used_correspond_with_vg(struct volume_group *vg)
 {
 	struct dm_pool *mem;
 	char vgid[ID_LEN + 1];
@@ -4343,9 +3599,6 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg)
 	struct device_list *dl;
 	int found_inconsistent = 0;
 
-	if (is_orphan_vg(vg->name))
-		return 1;
-
 	strncpy(vgid, (const char *) vg->id.uuid, sizeof(vgid));
 	vgid[ID_LEN] = '\0';
 
@@ -4366,7 +3619,7 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg)
 	}
 
 	if (!(list = dev_cache_get_dev_list_for_vgid(vgid)))
-		return 1;
+		return;
 
 	dm_list_iterate_items(dl, list) {
 		if (!(dl->dev->flags & DEV_OPEN_FAILURE) &&
@@ -4378,79 +3631,19 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg)
 
 	if (found_inconsistent) {
 		if (!(mem = dm_pool_create("vg_devs_check", 1024)))
-			return_0;
+			return;
 
 		dm_list_iterate_items(lvl, &vg->lvs) {
 			if (!_check_devs_used_correspond_with_lv(mem, list, lvl->lv)) {
 				dm_pool_destroy(mem);
-				return_0;
+				return;
 			}
 		}
 
 		dm_pool_destroy(mem);
 	}
 
-	return 1;
-}
-
-struct volume_group *vg_read_internal(struct cmd_context *cmd,
-				      const char *vgname, const char *vgid,
-				      uint32_t lockd_state, uint32_t warn_flags,
-				      int enable_repair,
-				      int *mdas_consistent)
-{
-	struct volume_group *vg;
-	struct lv_list *lvl;
-
-	if (!(vg = _vg_read(cmd, vgname, vgid, lockd_state,
-			    warn_flags, enable_repair, mdas_consistent, 0)))
-		goto_out;
-
-	if (!check_pv_dev_sizes(vg))
-		log_warn("One or more devices used as PVs in VG %s "
-			 "have changed sizes.", vg->name);
-
-	if (!check_pv_segments(vg)) {
-		log_error(INTERNAL_ERROR "PV segments corrupted in %s.",
-			  vg->name);
-		release_vg(vg);
-		vg = NULL;
-		goto out;
-	}
-
-	dm_list_iterate_items(lvl, &vg->lvs) {
-		if (!check_lv_segments(lvl->lv, 0)) {
-			log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
-				  lvl->lv->name);
-			release_vg(vg);
-			vg = NULL;
-			goto out;
-		}
-	}
-
-	dm_list_iterate_items(lvl, &vg->lvs) {
-		/*
-		 * Checks that cross-reference other LVs.
-		 */
-		if (!check_lv_segments(lvl->lv, 1)) {
-			log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
-				  lvl->lv->name);
-			release_vg(vg);
-			vg = NULL;
-			goto out;
-		}
-	}
-
-	(void) _check_devs_used_correspond_with_vg(vg);
-out:
-	if (!*mdas_consistent && (warn_flags & WARN_INCONSISTENT)) {
-		if (is_orphan_vg(vgname))
-			log_warn("WARNING: Found inconsistent standalone Physical Volumes.");
-		else
-			log_warn("WARNING: Volume Group %s is not consistent.", vgname);
-	}
-
-	return vg;
+	return;
 }
 
 void free_pv_fid(struct physical_volume *pv)
@@ -4699,10 +3892,6 @@ uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status)
 {
 	uint32_t failure = 0;
 
-	if ((status & CLUSTERED) && !_access_vg_clustered(vg->cmd, vg))
-		/* Return because other flags are considered undefined. */
-		return FAILED_CLUSTERED;
-
 	if ((status & EXPORTED_VG) &&
 	    vg_is_exported(vg)) {
 		log_error("Volume group %s is exported", vg->name);
@@ -4734,48 +3923,6 @@ int vg_check_status(const struct volume_group *vg, uint64_t status)
 	return !vg_bad_status_bits(vg, status);
 }
 
-/*
- * VG is left unlocked on failure
- */
-static struct volume_group *_recover_vg(struct cmd_context *cmd,
-			 const char *vg_name, const char *vgid,
-			 int is_shared, uint32_t lockd_state)
-{
-	int mdas_consistent = 0;
-	struct volume_group *vg;
-	uint32_t state = 0;
-
-	unlock_vg(cmd, NULL, vg_name);
-
-	if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL))
-		return_NULL;
-
-	/*
-	 * Convert vg lock in lvmlockd from sh to ex.
-	 */
-	if (is_shared && !(lockd_state & LDST_FAIL) && !(lockd_state & LDST_EX)) {
-		log_debug("Upgrade lvmlockd lock to repair vg %s.", vg_name);
-		if (!lockd_vg(cmd, vg_name, "ex", 0, &state)) {
-			log_warn("Skip repair for shared VG without exclusive lock.");
-			return NULL;
-		}
-		lockd_state |= LDST_EX;
-	}
-
-	if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, 0, 1, &mdas_consistent))) {
-		unlock_vg(cmd, NULL, vg_name);
-		return_NULL;
-	}
-
-	if (!mdas_consistent) {
-		release_vg(vg);
-		unlock_vg(cmd, NULL, vg_name);
-		return_NULL;
-	}
-
-	return (struct volume_group *)vg;
-}
-
 static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id)
 {
 	const struct dm_config_node *cn;
@@ -4805,9 +3952,6 @@ static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id
 static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg,
 				uint32_t lockd_state, uint32_t *failure)
 {
-	if (!is_real_vg(vg->name))
-		return 1;
-
 	if (cmd->lockd_vg_disable)
 		return 1;
 
@@ -4954,225 +4098,15 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg)
 }
 
 /*
- * FIXME: move vg_bad_status_bits() checks in here.
- */
-static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg,
-				uint32_t lockd_state, uint32_t *failure)
-{
-	if (!is_real_vg(vg->name)) {
-		return 1;
-	}
-
-	if (!_access_vg_clustered(cmd, vg)) {
-		*failure |= FAILED_CLUSTERED;
-		return 0;
-	}
-
-	if (!_access_vg_lock_type(cmd, vg, lockd_state, failure)) {
-		/* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */
-		return 0;
-	}
-
-	if (!_access_vg_systemid(cmd, vg)) {
-		*failure |= FAILED_SYSTEMID;
-		return 0;
-	}
-
-	return 1;
-}
-
-/*
- * Consolidated locking, reading, and status flag checking.
- *
- * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
- * read_flags will return it with FAILED_INCONSISTENT set instead of 
- * giving you nothing.
- *
- * Use vg_read_error(vg) to determine the result.  Nonzero means there were
- * problems reading the volume group.
- * Zero value means that the VG is open and appropriate locks are held.
- */
-static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
-			       const char *vgid,
-			       uint32_t lock_flags,
-			       uint64_t status_flags,
-			       uint32_t read_flags,
-			       uint32_t lockd_state)
-{
-	struct volume_group *vg = NULL;
-	uint32_t failure = 0;
-	uint32_t warn_flags = 0;
-	int mdas_consistent = 1;
-	int enable_repair = 1;
-	int is_shared = 0;
-	int skip_lock = is_orphan_vg(vg_name) && (read_flags & PROCESS_SKIP_ORPHAN_LOCK);
-
-	if ((read_flags & READ_ALLOW_INCONSISTENT) || (lock_flags != LCK_VG_WRITE)) {
-		enable_repair = 0;
-		warn_flags |= WARN_INCONSISTENT;
-	}
-
-	if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
-		log_error("Volume group name \"%s\" has invalid characters.",
-			  vg_name);
-		return NULL;
-	}
-
-	if (!skip_lock &&
-	    !lock_vol(cmd, vg_name, lock_flags, NULL)) {
-		log_error("Can't get lock for %s", vg_name);
-		return _vg_make_handle(cmd, vg, FAILED_LOCKING);
-	}
-
-	if (skip_lock)
-		log_very_verbose("Locking %s already done", vg_name);
-
-	if (is_orphan_vg(vg_name))
-		status_flags &= ~LVM_WRITE;
-
-	if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, warn_flags, enable_repair, &mdas_consistent))) {
-		if (!(read_flags & READ_OK_NOTFOUND))
-			log_error("Volume group \"%s\" not found", vg_name);
-		failure |= FAILED_NOTFOUND;
-		goto bad;
-	}
-
-	if (!_vg_access_permitted(cmd, vg, lockd_state, &failure))
-		goto bad;
-
-	/*
-	 * If we called vg_read_internal above without repair enabled,
-	 * and the read found inconsistent mdas, then then get a write/ex
-	 * lock and call it again with repair enabled so it will fix
-	 * the inconsistent mdas.
-	 *
-	 * FIXME: factor vg repair out of vg_read.  The vg_read caller
-	 * should get an error about the vg have problems and then call
-	 * a repair-specific function if it wants to.  (NB there are
-	 * other kinds of repairs hidden in _vg_read that should be
-	 * pulled out in addition to _recover_vg).
-	 */
-	if (!mdas_consistent && !enable_repair) {
-		is_shared = vg_is_shared(vg);
-		release_vg(vg);
-
-		if (!(vg = _recover_vg(cmd, vg_name, vgid, is_shared, lockd_state))) {
-			if (is_orphan_vg(vg_name))
-				log_error("Recovery of standalone physical volumes failed.");
-			else
-				log_error("Recovery of volume group \"%s\" failed.", vg_name);
-			failure |= FAILED_RECOVERY;
-			goto bad_no_unlock;
-		}
-	}
-
-	/*
-	 * Check that the tool can handle tricky cases -- missing PVs and
-	 * unknown segment types.
-	 */
-
-	if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
-	    lock_flags == LCK_VG_WRITE) {
-		log_error("Cannot change VG %s while PVs are missing.", vg->name);
-		log_error("Consider vgreduce --removemissing.");
-		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
-		goto bad;
-	}
-
-	if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
-	    lock_flags == LCK_VG_WRITE) {
-		log_error("Cannot change VG %s with unknown segments in it!",
-			  vg->name);
-		failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
-		goto bad;
-	}
-
-	failure |= vg_bad_status_bits(vg, status_flags);
-	if (failure)
-		goto_bad;
-
-	if (!(vg = _vg_make_handle(cmd, vg, failure)) || vg_read_error(vg))
-		if (!skip_lock)
-			unlock_vg(cmd, vg, vg_name);
-
-	return vg;
-
-bad:
-	if (!skip_lock)
-		unlock_vg(cmd, vg, vg_name);
-
-bad_no_unlock:
-	return _vg_make_handle(cmd, vg, failure);
-}
-
-/*
- * vg_read: High-level volume group metadata read function.
- *
- * vg_read_error() must be used on any handle returned to check for errors.
- *
- *  - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
- *  - VG is read-only: FAILED_READ_ONLY
- *  - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
- *  - VG is not RESIZEABLE: FAILED_RESIZEABLE
- *  - locking failed: FAILED_LOCKING
- *
- * On failures, all locks are released, unless one of the following applies:
- *  - vgname_is_locked(lock_name) is true
- * FIXME: remove the above 2 conditions if possible and make an error always
- * release the lock.
- *
- * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
- *
- * Checking for VG existence:
- *
- * FIXME: We want vg_read to attempt automatic recovery after acquiring a
- * temporary write lock: if that fails, we bail out as usual, with failed &
- * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
- * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
- * *consistent = 1.
- */
-struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
-			     const char *vgid, uint32_t read_flags, uint32_t lockd_state)
-{
-	uint64_t status_flags = UINT64_C(0);
-	uint32_t lock_flags = LCK_VG_READ;
-
-	if (read_flags & READ_FOR_UPDATE) {
-		status_flags |= EXPORTED_VG | LVM_WRITE;
-		lock_flags = LCK_VG_WRITE;
-	}
-
-	if (read_flags & READ_ALLOW_EXPORTED)
-		status_flags &= ~EXPORTED_VG;
-
-	return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status_flags, read_flags, lockd_state);
-}
-
-/*
- * A high-level volume group metadata reading function. Open a volume group for
- * later update (this means the user code can change the metadata and later
- * request the new metadata to be written and committed).
- */
-struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
-			 const char *vgid, uint32_t read_flags, uint32_t lockd_state)
-{
-	struct volume_group *vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state);
-
-	if (!vg || vg_read_error(vg))
-		stack;
-
-	return vg;
-}
-
-/*
  * Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
+ * FIXME: drop this function
  */
 uint32_t vg_read_error(struct volume_group *vg_handle)
 {
 	if (!vg_handle)
 		return FAILED_ALLOCATION;
 
-	return vg_handle->read_status;
+	return SUCCESS;
 }
 
 /*
@@ -5668,3 +4602,631 @@ int lv_on_pmem(struct logical_volume *lv)
 	return 0;
 }
 
+static struct volume_group *_vg_read(struct cmd_context *cmd,
+				     const char *vgname,
+				     const char *vgid,
+				     unsigned precommitted)
+{
+	const struct format_type *fmt = cmd->fmt;
+	struct format_instance *fid = NULL;
+	struct format_instance_ctx fic;
+	struct volume_group *vg, *vg_ret = NULL;
+	struct metadata_area *mda, *mda2;
+	unsigned use_precommitted = precommitted;
+	struct device *mda_dev, *dev_ret;
+	struct cached_vg_fmtdata *vg_fmtdata = NULL;	/* Additional format-specific data about the vg */
+	int found_old_metadata = 0;
+	unsigned use_previous_vg;
+
+	log_debug_metadata("Reading VG %s %s", vgname ?: "<no name>", vgid ?: "<no vgid>");
+
+	/*
+	 * Rescan the devices that are associated with this vg in lvmcache.
+	 * This repeats what was done by the command's initial label scan,
+	 * but only the devices associated with this VG.
+	 *
+	 * The lvmcache info about these devs is from the initial label scan
+	 * performed by the command before the vg lock was held.  Now the VG
+	 * lock is held, so we rescan all the info from the devs in case
+	 * something changed between the initial scan and now that the lock
+	 * is held.
+	 *
+	 * Some commands (e.g. reporting) are fine reporting data read by
+	 * the label scan.  It doesn't matter if the devs changed between
+	 * the label scan and here, we can report what was seen in the
+	 * scan, even though it is the old state, since we will not be
+	 * making any modifications.  If the VG was being modified during
+	 * the scan, and caused us to see inconsistent metadata on the
+	 * different PVs in the VG, then we do want to rescan the devs
+	 * here to get a consistent view of the VG.  Note that we don't
+	 * know if the scan found all the PVs in the VG at this point.
+	 * We don't know that until vg_read looks at the list of PVs in
+	 * the metadata and compares that to the devices found by the scan.
+	 *
+	 * It's possible that a change made to the VG during scan was
+	 * adding or removing a PV from the VG.  In this case, the list
+	 * of devices associated with the VG in lvmcache would change
+	 * due to the rescan.
+	 *
+	 * The devs in the VG may be persistently inconsistent due to some
+	 * previous problem.  In this case, rescanning the labels here will
+	 * find the same inconsistency.  The VG repair (mistakenly done by
+	 * vg_read below) is supposed to fix that.
+	 *
+	 * FIXME: sort out the usage of the global lock (which is mixed up
+	 * with the orphan lock), and when we can tell that the global
+	 * lock is taken prior to the label scan, and still held here,
+	 * we can also skip the rescan in that case.
+	 */
+	if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) {
+		log_debug_metadata("Rescanning devices for %s", vgname);
+		lvmcache_label_rescan_vg(cmd, vgname, vgid);
+	} else {
+		log_debug_metadata("Skipped rescanning devices for %s", vgname);
+	}
+
+	/* Now determine the correct vgname if none was supplied */
+	if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) {
+		log_debug_metadata("Cache did not find VG name from vgid %s", vgid);
+		return NULL;
+	}
+
+	/* Determine the correct vgid if none was supplied */
+	if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) {
+		log_debug_metadata("Cache did not find VG vgid from name %s", vgname);
+		return NULL;
+	}
+
+	/*
+	 * A "format instance" is an abstraction for a VG location,
+	 * i.e. where a VG's metadata exists on disk.
+	 *
+	 * An fic (format_instance_ctx) is a temporary struct used
+	 * to create an fid (format_instance).  The fid hangs around
+	 * and is used to create a 'vg' to which it connected (vg->fid).
+	 *
+	 * The 'fic' describes a VG in terms of fmt/name/id.
+	 *
+	 * The 'fid' describes a VG in more detail than the fic,
+	 * holding information about where to find the VG metadata.
+	 *
+	 * The 'vg' describes the VG in the most detail representing
+	 * all the VG metadata.
+	 *
+	 * The fic and fid are set up by create_instance() to describe
+	 * the VG location.  This happens before the VG metadata is
+	 * assembled into the more familiar struct volume_group "vg".
+	 *
+	 * The fid has one main purpose: to keep track of the metadata
+	 * locations for a given VG.  It does this by putting 'mda'
+	 * structs on fid->metadata_areas_in_use, which specify where
+	 * metadata is located on disk.  It gets this information
+	 * (metadata locations for a specific VG) from the command's
+	 * initial label scan.  The info is passed indirectly via
+	 * lvmcache info/vginfo structs, which are created by the
+	 * label scan and then copied into fid by create_instance().
+	 *
+	 * FIXME: just use the vginfo/info->mdas lists directly instead
+	 * of copying them into the fid list.
+	 */
+
+	fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS;
+	fic.context.vg_ref.vg_name = vgname;
+	fic.context.vg_ref.vg_id = vgid;
+
+	/*
+	 * Sets up the metadata areas that we need to read below.
+	 * For each info in vginfo->infos, for each mda in info->mdas,
+	 * (found during label_scan), copy the mda to fid->metadata_areas_in_use
+	 */
+	if (!(fid = fmt->ops->create_instance(fmt, &fic))) {
+		log_error("Failed to create format instance");
+		return NULL;
+	}
+
+	/*
+	 * We use the fid globally here so prevent the release_vg
+	 * call to destroy the fid - we may want to reuse it!
+	 */
+	fid->ref_count++;
+
+
+	/*
+	 * label_scan found PVs for this VG and set up lvmcache to describe the
+	 * VG/PVs that we use here to read the VG.  It created 'vginfo' for the
+	 * VG, and created an 'info' attached to vginfo for each PV.  It also
+	 * added a metadata_area struct to info->mdas for each metadata area it
+	 * found on the PV.  The info->mdas structs are copied to
+	 * fid->metadata_areas_in_use by create_instance above, and here we
+	 * read VG metadata from each of those mdas.
+	 */
+	dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
+		mda_dev = mda_get_device(mda);
+
+		/* I don't think this can happen */
+		if (!mda_dev) {
+			log_warn("Ignoring metadata for VG %s from missing dev.", vgname);
+			continue;
+		}
+
+		use_previous_vg = 0;
+
+		if (use_precommitted) {
+			log_debug_metadata("Reading VG %s precommit metadata from %s %llu",
+				 vgname, dev_name(mda_dev), (unsigned long long)mda->header_start);
+
+			vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg);
+
+			if (!vg && !use_previous_vg) {
+				log_warn("WARNING: Reading VG %s precommit on %s failed.", vgname, dev_name(mda_dev));
+				vg_fmtdata = NULL;
+				continue;
+			}
+		} else {
+			log_debug_metadata("Reading VG %s metadata from %s %llu",
+				 vgname, dev_name(mda_dev), (unsigned long long)mda->header_start);
+
+			vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg);
+
+			if (!vg && !use_previous_vg) {
+				log_warn("WARNING: Reading VG %s on %s failed.", vgname, dev_name(mda_dev));
+				vg_fmtdata = NULL;
+				continue;
+			}
+		}
+
+		if (!vg)
+			continue;
+
+		if (vg && !vg_ret) {
+			vg_ret = vg;
+			dev_ret = mda_dev;
+			continue;
+		}
+
+		/* 
+		 * Use the newest copy of the metadata found on any mdas.
+		 * Above, We could check if the scan found an old metadata
+		 * seqno in this mda and just skip reading it again; then these
+		 * seqno checks would just be sanity checks.
+		 */
+
+		if (vg->seqno == vg_ret->seqno) {
+			release_vg(vg);
+			continue;
+		}
+
+		if (vg->seqno > vg_ret->seqno) {
+			log_warn("WARNING: ignoring old metadata seqno %u on %s vs new metadata seqno %u on %s for VG %s.",
+				 vg_ret->seqno, dev_name(dev_ret),
+				 vg->seqno, dev_name(mda_dev), vg->name);
+			found_old_metadata = 1;
+			release_vg(vg_ret);
+			vg_ret = vg;
+			dev_ret = mda_dev;
+			vg_fmtdata = NULL;
+			continue;
+		}
+
+		if (vg_ret->seqno > vg->seqno) {
+			log_warn("WARNING: ignoring old metadata seqno %u on %s vs new metadata seqno %u on %s for VG %s.",
+				 vg->seqno, dev_name(mda_dev),
+				 vg_ret->seqno, dev_name(dev_ret), vg->name);
+			found_old_metadata = 1;
+			release_vg(vg);
+			vg_fmtdata = NULL;
+			continue;
+		}
+	}
+
+	if (found_old_metadata)
+		log_warn("WARNING: Inconsistent metadata found for VG %s", vgname);
+
+	vg = NULL;
+
+	if (vg_ret)
+		set_pv_devices(fid, vg_ret);
+
+	fid->ref_count--;
+
+	if (!vg_ret) {
+		_destroy_fid(&fid);
+		goto_out;
+	}
+
+	/*
+	 * Correct the lvmcache representation of the VG using the metadata
+	 * that we have chosen above (vg_ret).
+	 *
+	 * The vginfo/info representation created by label_scan was not
+	 * entirely correct since it did not use the full or final metadata.
+	 *
+	 * In lvmcache, PVs with no mdas were not attached to the vginfo during
+	 * label_scan because label_scan didn't know where they should go.  Now
+	 * that we have the VG metadata we can tell, so use that to attach those
+	 * info's to the vginfo.
+	 *
+	 * Also, outdated PVs that have been removed from the VG were incorrectly
+	 * attached to the vginfo during label_scan, and now need to be detached.
+	 */
+	lvmcache_update_vg_from_read(vg_ret, vg_ret->status & PRECOMMITTED);
+
+	/*
+	 * lvmcache_update_vg identified outdated mdas that we read above that
+	 * are not actually part of the VG.  Remove those outdated mdas from
+	 * the fid's list of mdas.
+	 */
+	dm_list_iterate_items_safe(mda, mda2, &fid->metadata_areas_in_use) {
+		mda_dev = mda_get_device(mda);
+		if (lvmcache_is_outdated_dev(cmd, vg_ret->name, (const char *)&vg_ret->id, mda_dev)) {
+			log_debug_metadata("vg_read %s ignore mda for outdated dev %s",
+					   vg_ret->name, dev_name(mda_dev));
+			/* FIXME: use _del_mda */
+			dm_list_del(&mda->list);
+		}
+	}
+
+out:
+	return vg_ret;
+}
+
+struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid,
+			     uint32_t read_flags, uint32_t lockd_state,
+			     uint32_t *error_flags, struct volume_group **error_vg)
+{
+	struct volume_group *vg = NULL;
+	struct lv_list *lvl;
+	struct pv_list *pvl;
+	int missing_pv_dev = 0;
+	int missing_pv_flag = 0;
+	uint32_t failure = 0;
+	int writing = (read_flags & READ_FOR_UPDATE);
+
+	/*
+	 * FIXME: is this function still used to read orphans?
+	 * If so, replace any callers with vg_read_orphans.
+	 */
+	if (is_orphan_vg(vg_name)) {
+		int skip_lock = read_flags & PROCESS_SKIP_ORPHAN_LOCK;
+		log_very_verbose("Reading orphan VG %s", vg_name);
+
+		if (!skip_lock && !lock_vol(cmd, vg_name, LCK_VG_READ, NULL))
+			return_NULL;
+
+		vg = vg_read_orphans(cmd, vg_name);
+
+		if (!skip_lock)
+			unlock_vg(cmd, vg, vg_name);
+
+		*error_flags = 0;
+		*error_vg = NULL;
+		return vg;
+	}
+
+	if (!validate_name(vg_name)) {
+		log_error("Volume group name \"%s\" has invalid characters.", vg_name);
+		return NULL;
+	}
+
+	if (!lock_vol(cmd, vg_name, writing ? LCK_VG_WRITE : LCK_VG_READ, NULL)) {
+		log_error("Can't get lock for %s", vg_name);
+		failure |= FAILED_LOCKING;
+		goto_bad;
+	}
+
+	if (!(vg = _vg_read(cmd, vg_name, vgid, 0))) {
+		/* Some callers don't care if the VG doesn't exist and don't want an error message. */
+		if (!(read_flags & READ_OK_NOTFOUND))
+			log_error("Volume group \"%s\" not found", vg_name);
+		failure |= FAILED_NOTFOUND;
+		goto_bad;
+	}
+
+	/*
+	 * Check and warn if PV ext info is not in sync with VG metadata
+	 * (vg_write fixes.)
+	 */
+	_check_pv_ext(cmd, vg);
+
+	if (!vg_strip_outdated_historical_lvs(vg))
+		log_warn("WARNING: failed to strip outdated historical lvs.");
+
+	/*
+	 * Check for missing devices in the VG.  In most cases a VG cannot be
+	 * changed while it's missing devices.  This restriction is implemented
+	 * here in vg_read.  Below we return an error from vg_read if the
+	 * vg_read flag indicates that the command is going to modify the VG.
+	 * (We should probably implement this restriction elsewhere instead of
+	 * returning an error from vg_read.)
+	 *
+	 * The PV's device may be present while the PV for the device has the
+	 * MISSING_PV flag set in the metadata.  This happened because the VG
+	 * was written while this dev was missing, so the MISSING flag was
+	 * written in the metadata for PV.  Now the device has reappeared.
+	 * However, the VG has changed since the device was last present, and
+	 * if the device has outdated data it may not be safe to just start
+	 * using it again.
+	 *
+	 * If there were no PE's used on the PV, we can just clear the MISSING
+	 * flag, but if there were PE's used we need to continue to treat the
+	 * PV as if the device is missing, limiting operations like the VG has
+	 * a missing device, and requiring the user to remove the reappeared
+	 * device from the VG, like a missing device, with vgreduce
+	 * --removemissing.
+	 */
+	dm_list_iterate_items(pvl, &vg->pvs) {
+		if (!pvl->pv->dev) {
+			/* The obvious and common case of a missing device. */
+
+			log_warn("WARNING: VG %s is missing PVID %s.", vg_name, (const char *)&pvl->pv->id);
+			missing_pv_dev++;
+
+		} else if (pvl->pv->status & MISSING_PV) {
+			/* A device that was missing but has reappeared. */
+
+			if (pvl->pv->pe_alloc_count == 0) {
+				log_warn("WARNING: VG %s has unused reappeared PV %s.", vg_name, dev_name(pvl->pv->dev));
+				pvl->pv->status &= ~MISSING_PV;
+				/* tell vgextend restoremissing that MISSING flag was cleared here */
+				pvl->pv->unused_missing_cleared = 1;
+			} else {
+				log_warn("WARNING: VG %s was missing PV %s.", vg_name, dev_name(pvl->pv->dev));
+				missing_pv_flag++;
+			}
+		}
+	}
+
+	if (missing_pv_dev || missing_pv_flag)
+		vg_mark_partial_lvs(vg, 1);
+
+	if (!check_pv_segments(vg)) {
+		log_error(INTERNAL_ERROR "PV segments corrupted in %s.", vg->name);
+		failure |= FAILED_INTERNAL_ERROR;
+		goto_bad;
+	}
+
+	dm_list_iterate_items(lvl, &vg->lvs) {
+		if (!check_lv_segments(lvl->lv, 0)) {
+			log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name);
+			failure |= FAILED_INTERNAL_ERROR;
+			goto_bad;
+		}
+	}
+
+	dm_list_iterate_items(lvl, &vg->lvs) {
+		/* Checks that cross-reference other LVs. */
+		if (!check_lv_segments(lvl->lv, 1)) {
+			log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name);
+			failure |= FAILED_INTERNAL_ERROR;
+			goto_bad;
+		}
+	}
+
+	if (!check_pv_dev_sizes(vg))
+		log_warn("WARNING: One or more devices used as PVs in VG %s have changed sizes.", vg->name);
+
+	_check_devs_used_correspond_with_vg(vg);
+
+	if (!_access_vg_lock_type(cmd, vg, lockd_state, &failure)) {
+		/* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */
+		goto_bad;
+	}
+
+	if (!_access_vg_systemid(cmd, vg)) {
+		failure |= FAILED_SYSTEMID;
+		goto_bad;
+	}
+
+	if (!_access_vg_clustered(cmd, vg)) {
+		failure |= FAILED_CLUSTERED;
+		goto_bad;
+	}
+
+	if (writing && !(read_flags & READ_ALLOW_EXPORTED) && vg_is_exported(vg)) {
+		log_error("Volume group %s is exported", vg->name);
+		failure |= FAILED_EXPORTED;
+		goto_bad;
+	}
+
+	if (writing && !(vg->status & LVM_WRITE)) {
+		log_error("Volume group %s is read-only", vg->name);
+		failure |= FAILED_READ_ONLY;
+		goto_bad;
+	}
+
+	if (!cmd->handles_missing_pvs && (missing_pv_dev || missing_pv_flag) && writing) {
+		log_error("Cannot change VG %s while PVs are missing.", vg->name);
+		log_error("See vgreduce --removemissing and vgextend --restoremissing.");
+		failure |= FAILED_NOT_ENABLED;
+		goto_bad;
+	}
+
+	if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && writing) {
+		log_error("Cannot change VG %s with unknown segments in it!", vg->name);
+		failure |= FAILED_NOT_ENABLED; /* FIXME new failure code here? */
+		goto_bad;
+	}
+
+	/*
+	 * When we are reading the VG with the intention of writing it,
+	 * we save a second copy of the VG in vg->vg_committed.  This
+	 * copy remains unmodified by the command operation, and is used
+	 * later if there is an error and we want to reactivate LVs.
+	 * FIXME: be specific about exactly when this works correctly.
+	 */
+	if (writing) {
+		struct dm_config_tree *cft;
+
+		if (dm_pool_locked(vg->vgmem)) {
+			/* FIXME: can this happen? */
+			log_warn("WARNING: vg_read no vg copy: pool locked");
+			goto out;
+		}
+
+		if (vg->vg_committed) {
+			/* FIXME: can this happen? */
+			log_warn("WARNING: vg_read no vg copy: copy exists");
+			release_vg(vg->vg_committed);
+			vg->vg_committed = NULL;
+		}
+
+		if (vg->vg_precommitted) {
+			/* FIXME: can this happen? */
+			log_warn("WARNING: vg_read no vg copy: pre copy exists");
+			release_vg(vg->vg_precommitted);
+			vg->vg_precommitted = NULL;
+		}
+
+		if (!(cft = export_vg_to_config_tree(vg))) {
+			log_warn("WARNING: vg_read no vg copy: copy export failed");
+			goto out;
+		}
+
+		if (!(vg->vg_committed = import_vg_from_config_tree(cft, vg->fid)))
+			log_warn("WARNING: vg_read no vg copy: copy import failed");
+
+		dm_config_destroy(cft);
+	} else {
+		if (vg->vg_precommitted)
+			log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted);
+		if (vg->vg_committed)
+			log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed);
+	}
+out:
+	/* We return with the VG lock held when read is successful. */
+	*error_flags = SUCCESS;
+	if (error_vg)
+		*error_vg = NULL;
+	return vg;
+
+bad:
+	*error_flags = failure;
+
+	/*
+	 * FIXME: get rid of this case so we don't have to return the vg when
+	 * there's an error.  It is here for process_each_pv() which wants to
+	 * eliminate the VG's devs from the list of devs it is processing, even
+	 * when it can't access the VG because of wrong system id or similar.
+	 * This could be done by looking at lvmcache info structs intead of 'vg'.
+	 * It's also used by process_each_vg/process_each_lv which want to
+	 * include error_vg values (like system_id) in error messages.
+	 * These values could also be found from lvmcache vginfo.
+	 */
+	if (error_vg && vg) {
+		if (vg->vg_precommitted)
+			log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted);
+		if (vg->vg_committed)
+			log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed);
+
+		/* caller must unlock_vg and release_vg */
+		*error_vg = vg;
+		return_NULL;
+	}
+
+	if (vg) {
+		unlock_vg(cmd, vg, vg_name);
+		release_vg(vg);
+	}
+	if (error_vg)
+		*error_vg = NULL;
+	return_NULL;
+}
+
+/*
+ * Simply a version of vg_read() that automatically sets the READ_FOR_UPDATE
+ * flag, which means the caller intends to write the VG after reading it,
+ * so vg_read should acquire an exclusive file lock on the vg.
+ */
+struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
+			 const char *vgid, uint32_t read_flags, uint32_t lockd_state)
+{
+	struct volume_group *vg;
+	uint32_t error_flags = 0;
+
+	vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state, &error_flags, NULL);
+
+	return vg;
+}
+
+void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg)
+{
+	struct dm_list bad_mdas;
+	struct metadata_area *mda;
+	struct device *dev;
+
+	dm_list_init(&bad_mdas);
+
+	lvmcache_get_bad_mdas(cmd, vg->name, (const char *)&vg->id, &bad_mdas);
+
+	dm_list_iterate_items(mda, &bad_mdas) {
+		dev = mda_get_device(mda);
+
+		/*
+		 * bad_fields:
+		 *
+		 * 0: shouldn't happen
+		 *
+		 * READ|INTERNAL: there's probably nothing wrong on disk
+		 *
+		 * MAGIC|START: there's a good chance that we were
+		 * reading the mda_header from the wrong location; maybe
+		 * the pv_header location was wrong.  We don't want to
+		 * write new metadata to the wrong location.  To handle
+		 * this we would want to do some further verification that
+		 * we have the mda location correct.
+		 *
+		 * VERSION|CHECKSUM: when the others are correct these
+		 * look safe to repair.
+		 *
+		 * HEADER: general error related to header, covered by fields
+		 * above.
+		 *
+		 * TEXT: general error related to text metadata, we can repair.
+		 */
+		if (!mda->bad_fields ||
+		    (mda->bad_fields & BAD_MDA_READ) ||
+		    (mda->bad_fields & BAD_MDA_INTERNAL) ||
+		    (mda->bad_fields & BAD_MDA_MAGIC) ||
+		    (mda->bad_fields & BAD_MDA_START)) {
+			log_warn("WARNING: not repairing bad metadata (0x%x) for mda%d on %s",
+				 mda->bad_fields, mda->mda_num, dev_name(dev));
+			continue;
+		}
+
+		/*
+		 * vg_write/vg_commit reread the mda_header which checks the
+		 * mda header fields and fails if any are bad, which stops
+		 * vg_write/vg_commit from continuing.  Suppress these header
+		 * field checks when we know the field is bad and we are going
+		 * to replace it.  FIXME: do vg_write/vg_commit really need to
+		 * reread and recheck the mda_header again (probably not)?
+		 */
+
+		if (mda->bad_fields & BAD_MDA_CHECKSUM)
+			mda->ignore_bad_fields |= BAD_MDA_CHECKSUM;
+		if (mda->bad_fields & BAD_MDA_VERSION)
+			mda->ignore_bad_fields |= BAD_MDA_VERSION;
+
+		log_warn("WARNING: repairing bad metadata (0x%x) in mda%d at %llu on %s.",
+			 mda->bad_fields, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev));
+
+		if (!mda->ops->vg_write(vg->fid, vg, mda)) {
+			log_warn("WARNING: failed to write VG %s metadata to bad mda%d at %llu on %s.",
+				 vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev));
+			continue;
+		}
+
+		if (!mda->ops->vg_precommit(vg->fid, vg, mda)) {
+			log_warn("WARNING: failed to precommit VG %s metadata to bad mda%d at %llu on %s.",
+				 vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev));
+			continue;
+		}
+
+		if (!mda->ops->vg_commit(vg->fid, vg, mda)) {
+			log_warn("WARNING: failed to commit VG %s metadata to bad mda%d at %llu on %s.",
+				 vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev));
+			continue;
+		}
+	}
+}
+