diff options
author | David Teigland <teigland@redhat.com> | 2017-10-11 10:39:22 -0500 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2017-10-18 16:23:22 -0500 |
commit | b2ad2272b073aaf83ae877ccad0881dd2c914190 (patch) | |
tree | 8e3ffe385e3dbf8fbe1abdeea75671eefb9efe52 | |
parent | 7fe6cb3dab5bf7b6f8d59337752ae2299c55cc80 (diff) | |
download | lvm2-dev-dct-vg-read-1.tar.gz |
new vg_read in progressdev-dct-vg-read-1
35 files changed, 1924 insertions, 1183 deletions
diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c index e872fbe49..7a5ae1ae4 100644 --- a/daemons/clvmd/lvm-functions.c +++ b/daemons/clvmd/lvm-functions.c @@ -838,20 +838,20 @@ static void check_config(void) void lvm_do_backup(const char *vgname) { struct volume_group * vg; - int consistent = 0; DEBUGLOG("Triggering backup of VG metadata for %s.\n", vgname); pthread_mutex_lock(&lvm_lock); - vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, WARN_PV_READ, &consistent); + vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, 0, NULL, NULL, NULL); - if (vg && consistent) + if (vg) check_current_backup(vg); else log_error("Error backing up metadata, can't find VG for group %s", vgname); - release_vg(vg); + if (vg) + release_vg(vg); dm_pool_empty(cmd->mem); pthread_mutex_unlock(&lvm_lock); diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 4f8706bf1..1c145e7ac 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -63,6 +63,8 @@ struct lvmcache_vginfo { char *lock_type; uint32_t mda_checksum; size_t mda_size; + int seqno; + int scan_mismatch; int independent_metadata_location; /* metadata read from independent areas */ /* * The following are not related to lvmcache or vginfo, @@ -589,6 +591,16 @@ struct lvmcache_vginfo *lvmcache_vginfo_from_vgname(const char *vgname, const ch return vginfo; } +const struct format_type *lvmcache_get_fmt(struct cmd_context *cmd, + const char *vgname, const char *vgid) +{ + struct lvmcache_vginfo *vginfo; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) + return_NULL; + return vginfo->fmt; +} + const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, const char *vgname, const char *vgid, unsigned revalidate_labels) @@ -1946,28 +1958,6 @@ out: return 1; } -static int _lvmcache_update_vg_mda_info(struct lvmcache_info *info, uint32_t mda_checksum, - size_t mda_size) -{ - if (!info || !info->vginfo || !mda_size) - return 1; - - if (info->vginfo->mda_checksum == mda_checksum || info->vginfo->mda_size == mda_size) - return 1; - - info->vginfo->mda_checksum = mda_checksum; - info->vginfo->mda_size = mda_size; - - /* FIXME Add checksum index */ - - log_debug_cache("lvmcache %s: VG %s: stored metadata checksum 0x%08" - PRIx32 " with size %" PRIsize_t ".", - dev_name(info->dev), info->vginfo->vgname, - mda_checksum, mda_size); - - return 1; -} - int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) { if (!_lock_hash && !lvmcache_init()) { @@ -1980,6 +1970,7 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { + struct lvmcache_vginfo *vginfo; const char *vgname = vgsummary->vgname; const char *vgid = (char *)&vgsummary->vgid; @@ -2000,16 +1991,65 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg if (!is_orphan_vg(vgname)) info->status &= ~CACHE_INVALID; - if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, - vgsummary->creation_host, info->fmt) || - !_lvmcache_update_vgid(info, info->vginfo, vgid) || - !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id) || - !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size)) - return_0; + + if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } + + if (!_lvmcache_update_vgid(info, info->vginfo, vgid)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } + + if (!_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } + + /* + * label scan sets seqno in summary, other callers do not. + * FIXME: use separate functions for updating from label + * scan summary and from real metadata. + * FIXME: clear scan_mismatch if a second scan is fine + */ + + if (!vgsummary->seqno && !vgsummary->mda_size && !vgsummary->mda_checksum) + return 1; + + if (!(vginfo = info->vginfo)) + return 1; + + if (!vginfo->seqno) { + vginfo->seqno = vgsummary->seqno; + + } else if (vgsummary->seqno != vginfo->seqno) { + log_warn("Scan of VG %s from %s found metadata seqno %d vs previous %d.", + vgname, dev_name(info->dev), vgsummary->seqno, vginfo->seqno); + vginfo->scan_mismatch = 1; + return 1; + } + + if (!vginfo->mda_size) { + vginfo->mda_checksum = vgsummary->mda_checksum; + vginfo->mda_size = vgsummary->mda_size; + + } else if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { + log_warn("Scan of VG %s from %s found mda_checksum %x mda_size %zu vs previous %x %zu", + vgname, dev_name(info->dev), vgsummary->mda_checksum, vgsummary->mda_size, + vginfo->mda_checksum, vginfo->mda_size); + vginfo->scan_mismatch = 1; + } + + log_debug_cache("lvmcache %s: VG %s: set seqno to %d mda_checksum to %x mda_size to %zu", + dev_name(info->dev), vginfo->vgname, vginfo->seqno, + vginfo->mda_checksum, vginfo->mda_size); return 1; } +/* FIXME: move all callers of this function to use update_vg_from_metadata */ + int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) { struct pv_list *pvl; @@ -2037,6 +2077,230 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) } /* + * vginfo fields are first set during label scan (while the + * VG is not locked). The metadata is later read with the + * VG lock. If vginfo fields chanced between those two, + * then update the vginfo fields from the metadata. + */ + +static int _update_vginfo_from_metadata(struct lvmcache_vginfo *vginfo, struct volume_group *vg, + uint32_t meta_checksum, size_t meta_size) +{ + int updated = 0; + + if (vginfo->status != vg->status) { + log_debug_cache("lvmcache updating %s status from 0x%llx to 0x%llx", + vg->name, (unsigned long long)vginfo->status, + (unsigned long long)vg->status); + vginfo->status = vg->status; + updated = 1; + } + + /* FIXME: one or the other NULL vs not NULL */ + + if (vginfo->system_id && vg->system_id && strcmp(vginfo->system_id, vg->system_id)) { + log_debug_cache("lvmcache updating %s system_id from %s to %s", + vg->name, vginfo->system_id, vg->system_id); + dm_free(vginfo->system_id); + if (!(vginfo->system_id = dm_strdup(vg->system_id))) + return 0; + updated = 1; + } + + if (vginfo->lock_type && vg->lock_type && strcmp(vginfo->lock_type, vg->lock_type)) { + log_debug_cache("lvmcache updating %s lock_type from %s to %s", + vg->name, vginfo->lock_type, vg->lock_type); + dm_free(vginfo->lock_type); + if (!(vginfo->lock_type = dm_strdup(vg->lock_type))) + return 0; + updated = 1; + } + + if ((vginfo->mda_checksum != meta_checksum) || (vginfo->mda_size != meta_size)) { + log_debug_cache("lvmcache updating %s mda_checksum %x mda_size %zu to %x %zu", + vg->name, vginfo->mda_checksum, vginfo->mda_size, + meta_checksum, meta_size); + vginfo->mda_checksum = meta_checksum; + vginfo->mda_size = meta_size; + updated = 1; + } + + if (vginfo->seqno != vg->seqno) { + log_debug_cache("lvmcache updating %s seqno from %u to %u", + vg->name, vginfo->seqno, vg->seqno); + vginfo->seqno = vg->seqno; + updated = 1; + } + + if (!updated) + log_debug_cache("lvmcache VG %s info unchanged since scan.", vg->name); + + return 1; +} + +/* + * Make sure that the lvmcache representation of the VG (in terms of + * vginfo/infos) matches the VG metadata. If there are any differences, make + * lvmcache vginfo/infos match the VG metadata. + * + * Only for real VGs, does not apply to orphan VG. + */ + +int lvmcache_update_vg_from_metadata(struct volume_group *vg, unsigned precommitted, + uint32_t meta_checksum, size_t meta_size) +{ + char vgid_s[ID_LEN + 1] __attribute__((aligned(8))); + char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); + struct lvmcache_vginfo *vginfo, *vginfo2; + struct lvmcache_info *info, *info2; + struct pv_list *pvl; + int found; + + /* get vginfo, check/update any fields from vg */ + /* look at all infos for vginfo and chec/update any fields from vg */ + /* look for info's not attached to vginfo and update/attach them to vginfo */ + + log_debug_cache("lvmcache updating %s with metadata.", vg->name); + + if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, NULL))) { + /* shouldn't happen */ + log_error(INTERNAL_ERROR "lvmcache_update_vg_from_metadata no vginfo for %s", vg->name); + return_0; + } + + memset(vgid_s, 0, sizeof(vgid_s)); + strncpy(vgid_s, (char *) &vg->id, sizeof(vgid_s) - 1); + + if (strcmp(vginfo->vgid, vgid_s)) { + log_error("lvmcache updating %s id %s with wrong vg name %s id %s.", + vginfo->vgname, vginfo->vgid, vg->name, vgid_s); + return 0; + } + + /* Verify the vgid to vginfo hash mapping is also correct. */ + + vginfo2 = lvmcache_vginfo_from_vgid(vginfo->vgid); + if (!vginfo2 || (vginfo2 != vginfo)) { + /* shouldn't happen */ + log_error(INTERNAL_ERROR "lvmcache_update_vg_from_metadata other vginfo for %s", vg->name); + return_0; + } + + /* + * Update vg fields that were saved in vginfo during label scan. + */ + _update_vginfo_from_metadata(vginfo, vg, meta_checksum, meta_size); + + /* + * Go through vginfo->infos, matching each to vg->pvs. + */ + dm_list_iterate_items_safe(info, info2, &vginfo->infos) { + found = 0; + + dm_list_iterate_items(pvl, &vg->pvs) { + if (info->dev != pvl->pv->dev) + continue; + found = 1; + break; + } + + if (found) + continue; + + /* + * label scan thought this dev/info belonged to this vg, + * but the vg metadata doesn't think so. We don't know + * where this dev/info belongs, so drop it into the + * defective list where it won't be used. + */ + + log_warn("lvmcache update %s removing %s from VG.", + vg->name, dev_name(info->dev)); + + lvmcache_add_defective_dev(info->dev); + lvmcache_del(info); + } + + /* + * Go through vg->pvs, matching each to vginfo->infos. + */ + dm_list_iterate_items(pvl, &vg->pvs) { + memset(pvid_s, 0, sizeof(pvid_s)); + strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s) - 1); + + found = 0; + + dm_list_iterate_items(info, &vginfo->infos) { + if (info->dev != pvl->pv->dev) + continue; + found = 1; + break; + } + + if (found) + continue; + + + if (!(info = lvmcache_info_from_pvid(pvid_s, pvl->pv->dev, 0))) { + /* FIXME: a common missing dev case? */ + log_warn("lvmcache_update_vg_from_metadata %s no info for %s", + vg->name, dev_name(pvl->pv->dev)); + continue; + } + + /* + * The dev was apparently connected to one VG during label + * scan, but now is connected to a different VG. Perhaps this + * can happen if the dev is removed from one VG and added to + * another, between the time of label scan and vg_read. + */ + if (info->vginfo && strcmp(info->vginfo->vgname, vg->name)) { + log_warn("lvmcache updating %s info %s was in vginfo %s", + vg->name, dev_name(info->dev), info->vginfo->vgname); + dm_list_del(&info->list); + info->vginfo = NULL; + _vginfo_attach_info(vginfo, info); + continue; + } + +#if 0 + /* + * This happens for devs without mdas where the VG isn't + * known during label scan, so we only know which vginfo + * they should be attached to once we have metadata. + */ + if (info->status & VG_PENDING) { + log_debug_cache("lvmcache updating %s with pending dev %s", + vg->name, dev_name(info->dev)); + info->status &= ~VG_PENDING; + _vginfo_attach_info(vginfo, info); + info->fmt = vginfo->fmt; + continue; + } +#endif + + /* + * A defective dev can't be used, and should be handled like + * a missing dev, so it shouldn't be connected to a vginfo. + * (Doesn't adding dev to defective list will remove it from + * lvmcache, so we will not hit this case?) + */ + if (lvmcache_dev_is_defective(info->dev)) { + if (info->vginfo) { + dm_list_del(&info->list); + info->vginfo = NULL; + } + continue; + } + + log_error("lvmcache updating %s found %s in unrecognized state.", + vg->name, dev_name(info->dev)); + } + + return 1; +} + +/* * We can see multiple different devices with the * same pvid, i.e. duplicates. * @@ -2223,6 +2487,9 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, } update_vginfo: + /* + * In label scan, the only vg passed to lvmcache_add is the orphan vg. + */ vgsummary.vgstatus = vgstatus; vgsummary.vgname = vgname; if (vgid) diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 4ded9aac3..133c92275 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -25,24 +25,6 @@ #define ORPHAN_PREFIX VG_ORPHANS #define ORPHAN_VG_NAME(fmt) ORPHAN_PREFIX "_" fmt -/* FIXME: use a different prefix because other flags already use FAILED_ */ -#define FAILED_INTERNAL 0x00000001 -#define FAILED_LABEL_CHECKSUM 0x00000002 -#define FAILED_LABEL_SECTOR_NUMBER 0x00000004 -#define FAILED_PV_HEADER 0x00000008 -#define FAILED_MDA_HEADER 0x00000010 -#define FAILED_MDA_HEADER_IO 0x00000020 -#define FAILED_MDA_HEADER_CHECKSUM 0x00000040 -#define FAILED_MDA_HEADER_FIELD 0x00000080 -#define FAILED_MDA_HEADER_RLOCN 0x00000100 -#define FAILED_VG_METADATA 0x00000200 -#define FAILED_VG_METADATA_IO 0x00000400 -#define FAILED_VG_METADATA_CHECKSUM 0x00000800 -#define FAILED_VG_METADATA_PARSE 0x00001000 -#define FAILED_VG_METADATA_FIELD 0x00002000 -#define FAILED_VG_METADATA_SIZE 0x00004000 - - /* LVM specific per-volume info */ /* Eventual replacement for struct physical_volume perhaps? */ @@ -77,6 +59,7 @@ struct lvmcache_vgsummary { const char *lock_type; uint32_t mda_checksum; size_t mda_size; + int seqno; }; int lvmcache_init(void); @@ -243,5 +226,9 @@ void lvmcache_remove_defective_dev(struct device *dev); int lvmcache_add_defective_dev(struct device *dev); int lvmcache_dev_is_defective(struct device *dev); int lvmcache_get_defective_devs(struct cmd_context *cmd, struct dm_list *head); +const struct format_type *lvmcache_get_fmt(struct cmd_context *cmd, const char *vgname, const char *vgid); +int lvmcache_update_vg_from_metadata(struct volume_group *vg, unsigned precommitted, + uint32_t meta_checksum, size_t meta_size); + #endif diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 5d9cda0ff..542b15972 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -1774,9 +1774,14 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) ld = get_label_read_data(b->cmd, mda_dev); if (mda_is_ignored(mda) || - !(vg = mda->ops->vg_read(b->fid, "", mda, ld, NULL, NULL))) + !(vg = mda->ops->vg_read(b->fid, "", mda, ld, 0, 0, NULL))) return 1; + if (mda->read_failed_flags) { + release_vg(vg); + return 1; + } + /* FIXME Also ensure contents match etc. */ if (!b->vg || vg->seqno > b->vg->seqno) b->vg = vg; @@ -1803,9 +1808,14 @@ static int _lvmetad_pvscan_vg_single(struct metadata_area *mda, void *baton) ld = get_label_read_data(b->cmd, mda_dev); - if (!(vg = mda->ops->vg_read(b->fid, "", mda, ld, NULL, NULL))) + if (!(vg = mda->ops->vg_read(b->fid, "", mda, ld, 0, 0, NULL))) return 1; + if (mda->read_failed_flags) { + release_vg(vg); + return 1; + } + if (!b->vg) b->vg = vg; else if (vg->seqno > b->vg->seqno) { diff --git a/lib/config/config.c b/lib/config/config.c index 79a0b8dc4..3b190e91e 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -499,7 +499,7 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *bu off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, int checksum_only, int no_dup_node_check, - uint32_t *failed_flags) + uint64_t *failed_flags) { char *fb, *fe; int r = 0; @@ -602,7 +602,7 @@ int config_file_read(struct dm_config_tree *cft) struct config_source *cs = dm_config_get_custom(cft); struct config_file *cf; struct stat info; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; int r; if (!config_file_check(cft, &filename, &info)) diff --git a/lib/config/config.h b/lib/config/config.h index 9cc30aadd..5fd25f680 100644 --- a/lib/config/config.h +++ b/lib/config/config.h @@ -242,7 +242,7 @@ struct dm_config_tree *config_open(config_source_t source, const char *filename, int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async, off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, - int skip_parse, int no_dup_node_check, uint32_t *failed_flags); + int skip_parse, int no_dup_node_check, uint64_t *failed_flags); int config_file_read(struct dm_config_tree *cft); struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source, struct cmd_context *cmd); diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index 0f51a416b..6d76e2fd5 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -1562,3 +1562,61 @@ const char *dev_name(const struct device *dev) return (dev && dev->aliases.n) ? dm_list_item(dev->aliases.n, struct dm_str_list)->str : unknown_device_name(); } + +int device_list_remove(struct dm_list *devices, struct device *dev) +{ + struct device_id_list *dil; + + dm_list_iterate_items(dil, devices) { + if (dil->dev == dev) { + dm_list_del(&dil->list); + return 1; + } + } + + return 0; +} + +struct device_id_list *device_list_find_dev(struct dm_list *devices, struct device *dev) +{ + struct device_id_list *dil; + + dm_list_iterate_items(dil, devices) { + if (dil->dev == dev) + return dil; + } + + return NULL; +} + +struct device_id_list *device_list_find_pvid(struct dm_list *devices, const char *pvid) +{ + struct device_id_list *dil; + + dm_list_iterate_items(dil, devices) { + if (!strncmp(dil->pvid, pvid, ID_LEN)) + return dil; + } + + return NULL; +} + +int device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst) +{ + struct device_id_list *dil; + struct device_id_list *dil_new; + + dm_list_iterate_items(dil, src) { + if (!(dil_new = dm_pool_alloc(cmd->mem, sizeof(*dil_new)))) { + log_error("device_id_list alloc failed."); + return 0; + } + + dil_new->dev = dil->dev; + strncpy(dil_new->pvid, dil->pvid, ID_LEN); + dm_list_add(dst, &dil_new->list); + } + + return 1; +} + diff --git a/lib/device/dev-cache.h b/lib/device/dev-cache.h index 546b1fe2a..516add7ad 100644 --- a/lib/device/dev-cache.h +++ b/lib/device/dev-cache.h @@ -72,4 +72,10 @@ struct device *dev_iter_get(struct dev_iter *iter); void dev_reset_error_count(struct cmd_context *cmd); +/* device_id_list elements */ +int device_list_remove(struct dm_list *devices, struct device *dev); +struct device_id_list *device_list_find_dev(struct dm_list *devices, struct device *dev); +struct device_id_list *device_list_find_pvid(struct dm_list *devices, const char *pvid); +int device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst); + #endif diff --git a/lib/device/device.h b/lib/device/device.h index eb88fc83d..7be48e402 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -85,6 +85,12 @@ struct device_list { struct device *dev; }; +struct device_id_list { + struct dm_list list; + struct device *dev; + char pvid[ID_LEN + 1]; +}; + struct device_area { struct device *dev; uint64_t start; /* Bytes */ diff --git a/lib/format1/format1.c b/lib/format1/format1.c index 816e2580f..c4020c752 100644 --- a/lib/format1/format1.c +++ b/lib/format1/format1.c @@ -181,8 +181,9 @@ static struct volume_group *_format1_vg_read(struct format_instance *fid, const char *vg_name, struct metadata_area *mda __attribute__((unused)), struct label_read_data *ld __attribute__((unused)), - struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), - unsigned *use_previous_vg __attribute__((unused))) + uint32_t last_meta_checksum __attribute__((unused)), + size_t last_meta_size __attribute__((unused)), + unsigned *last_meta_matches __attribute__((unused))) { struct volume_group *vg; struct disk_list *dl; diff --git a/lib/format1/lvm1-label.c b/lib/format1/lvm1-label.c index 5303e740e..e491627e7 100644 --- a/lib/format1/lvm1-label.c +++ b/lib/format1/lvm1-label.c @@ -56,7 +56,7 @@ static int _lvm1_write(struct label *label __attribute__((unused)), void *buf __ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, struct label_read_data *ld, - struct label **label, uint32_t *failed_flags) + struct label **label, uint64_t *failed_flags) { struct pv_disk *pvd = (struct pv_disk *) buf; struct vg_disk vgd; diff --git a/lib/format_pool/format_pool.c b/lib/format_pool/format_pool.c index 93214a179..dc85e0e1c 100644 --- a/lib/format_pool/format_pool.c +++ b/lib/format_pool/format_pool.c @@ -102,8 +102,9 @@ static struct volume_group *_pool_vg_read(struct format_instance *fid, const char *vg_name, struct metadata_area *mda __attribute__((unused)), struct label_read_data *ld __attribute__((unused)), - struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), - unsigned *use_previous_vg __attribute__((unused))) + uint32_t last_meta_checksum __attribute__((unused)), + size_t last_meta_size __attribute__((unused)), + unsigned *last_meta_matches __attribute__((unused))) { struct volume_group *vg; struct user_subpool *usp; diff --git a/lib/format_pool/pool_label.c b/lib/format_pool/pool_label.c index 5c703a130..3e741c65d 100644 --- a/lib/format_pool/pool_label.c +++ b/lib/format_pool/pool_label.c @@ -57,7 +57,7 @@ static int _pool_write(struct label *label __attribute__((unused)), void *buf __ static int _pool_read(struct labeller *l, struct device *dev, void *buf, struct label_read_data *ld, - struct label **label, uint32_t *failed_flags) + struct label **label, uint64_t *failed_flags) { struct pool_list pl; diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index aa448735e..b5fb1500b 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd, } dm_list_iterate_items(mda, &tf->metadata_areas_in_use) { - if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, NULL))) + if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, 0, 0, NULL))) stack; break; } diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 2c7fb91a7..d7a769f2c 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -179,7 +179,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, char *buf=NULL; struct device_area *area; struct mda_context *mdac; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; int r=0; mdac = (struct mda_context *) mda->metadata_locn; @@ -318,7 +318,7 @@ static void _xlate_mdah(struct mda_header *mdah) } static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, - struct label_read_data *ld, uint32_t *failed_flags) + struct label_read_data *ld, uint64_t *failed_flags) { if (!dev_open_readonly(dev_area->dev)) { *failed_flags |= FAILED_INTERNAL; @@ -389,7 +389,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev struct mda_header *raw_read_mda_header(const struct format_type *fmt, struct device_area *dev_area, struct label_read_data *ld, - uint32_t *failed_flags) + uint64_t *failed_flags) { struct mda_header *mdah; @@ -436,7 +436,7 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, struct label_read_data *ld, const char *vgname, int *precommitted, - uint32_t *failed_flags) + uint64_t *failed_flags) { size_t len; char vgnamebuf[NAME_LEN + 2] __attribute__((aligned(8))); @@ -520,7 +520,7 @@ static int _raw_holds_vgname(struct format_instance *fid, int r = 0; int noprecommit = 0; struct mda_header *mdah; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; if (!dev_open_readonly(dev_area->dev)) return_0; @@ -547,8 +547,9 @@ static struct volume_group *_read_mda_header_and_metadata_vg(struct format_insta struct metadata_area *mda, struct device_area *area, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches, int precommitted) { struct volume_group *vg = NULL; @@ -557,7 +558,7 @@ static struct volume_group *_read_mda_header_and_metadata_vg(struct format_insta time_t when; char *desc; uint32_t wrap = 0; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; if (!(mdah = raw_read_mda_header(fid->fmt, area, ld, &failed_flags))) { log_debug_metadata("MDA header on %s at %"PRIu64" is not valid.", @@ -567,6 +568,10 @@ static struct volume_group *_read_mda_header_and_metadata_vg(struct format_insta goto_out; } + /* pass back to vg_read() */ + if (mda) + mda->header_start = mdah->start; + /* * N.B. in the label scan path: * read_mda_header_and_metadata_summary() calls only @@ -597,11 +602,18 @@ static struct volume_group *_read_mda_header_and_metadata_vg(struct format_insta goto out; } - vg = text_read_metadata_vg(fid, area->dev, NULL, ld, vg_fmtdata, use_previous_vg, + /* pass back to vg_read() */ + mda->vg_read_meta_checksum = rlocn->checksum; + mda->vg_read_meta_size = rlocn->size; + + vg = text_read_metadata_vg(fid, area->dev, NULL, ld, (off_t) (area->start + rlocn->offset), (uint32_t) (rlocn->size - wrap), (off_t) (area->start + MDA_HEADER_SIZE), wrap, + last_meta_checksum, + last_meta_size, + last_meta_matches, calc_crc, rlocn->checksum, &when, &desc, &failed_flags); @@ -702,8 +714,9 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg) + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -713,7 +726,8 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, return_NULL; } - vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld, vg_fmtdata, use_previous_vg, 0); + vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld, + last_meta_checksum, last_meta_size, last_meta_matches, 0); /* FIXME: move this into vg_read() */ if (vg) @@ -729,8 +743,9 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg) + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -740,7 +755,8 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, return_NULL; } - vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld, vg_fmtdata, use_previous_vg, 1); + vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld, + last_meta_checksum, last_meta_size, last_meta_matches, 1); /* FIXME: move this into vg_read() */ if (vg) @@ -765,7 +781,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, int found = 0; int noprecommit = 0; const char *old_vg_name = NULL; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; /* Ignore any mda on a PV outside the VG. vgsplit relies on this */ dm_list_iterate_items(pvl, &vg->pvs) { @@ -875,7 +891,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, int found = 0; int noprecommit = 0; const char *old_vg_name = NULL; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; /* Ignore any mda on a PV outside the VG. vgsplit relies on this */ dm_list_iterate_items(pvl, &vg->pvs) { @@ -996,7 +1012,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, struct raw_locn *rlocn; int r = 0; int noprecommit = 0; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; if (!dev_open(mdac->area.dev)) return_0; @@ -1065,8 +1081,9 @@ static struct volume_group *_vg_read_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg __attribute__((unused))) + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches) { struct text_context *tc = (struct text_context *) mda->metadata_locn; @@ -1077,8 +1094,9 @@ static struct volume_group *_vg_read_precommit_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg __attribute__((unused))) + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches) { struct text_context *tc = (struct text_context *) mda->metadata_locn; struct volume_group *vg; @@ -1321,20 +1339,13 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) return 1; } -/* - * FIXME: take a failed_flags arg from the caller and set a flag - * there according to the particular errors found here that cause - * us to return 0. Also pass a failed_flags down to - * text_read_metadata_summary() and _read_vgsummary() to get - * specific errors from the lowest levels and propagate those upward. - */ int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, struct label_read_data *ld, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors, - uint32_t *failed_flags) + uint64_t *failed_flags) { struct raw_locn *rlocn; uint32_t wrap = 0; @@ -1402,7 +1413,6 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } - /* Did we see this metadata before? */ vgsummary->mda_checksum = rlocn->checksum; vgsummary->mda_size = rlocn->size; @@ -1455,7 +1465,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu struct format_instance fid; struct lvmcache_vgsummary vgsummary = { 0 }; struct mda_header *mdah; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; raw_list = &((struct mda_lists *) fmt->private)->raws; @@ -1482,7 +1492,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu /* TODO: caching as in read_metadata_location_summary() (trigger this code?) */ if (read_metadata_location_summary(fmt, mdah, NULL, &rl->dev_area, &vgsummary, NULL, &failed_flags)) { - vg = _read_mda_header_and_metadata_vg(&fid, vgsummary.vgname, NULL, &rl->dev_area, NULL, NULL, NULL, 0); + vg = _read_mda_header_and_metadata_vg(&fid, vgsummary.vgname, NULL, &rl->dev_area, NULL, 0, 0, NULL, 0); if (vg) { lvmcache_update_vg(vg, 0); lvmcache_set_independent_location(vg->name); diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h index 7a9f088d2..f63da36e9 100644 --- a/lib/format_text/import-export.h +++ b/lib/format_text/import-export.h @@ -50,13 +50,13 @@ struct text_vg_version_ops { struct volume_group *(*read_vg) (struct format_instance * fid, const struct dm_config_tree *cf, unsigned allow_lvmetad_extensions, - uint32_t *failed_flags); + uint64_t *failed_flags); void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf, time_t *when, char **desc); int (*read_vgsummary) (const struct format_type *fmt, const struct dm_config_tree *cft, struct lvmcache_vgsummary *vgsummary, - uint32_t *failed_flags); + uint64_t *failed_flags); }; struct text_vg_version_ops *text_vg_vsn1_init(void); @@ -75,17 +75,18 @@ struct volume_group *text_read_metadata_file(struct format_instance *fid, /* Called in the vg_read path to return the full VG. */ struct volume_group *text_read_metadata_vg(struct format_instance *fid, - struct device *dev, - const char *file, - struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, - off_t offset, uint32_t size, - off_t offset2, uint32_t size2, - checksum_fn_t checksum_fn, - uint32_t checksum, - time_t *when, char **desc, - uint32_t *failed_flags); + struct device *dev, + const char *file, + struct label_read_data *ld, + off_t offset, uint32_t size, + off_t offset2, uint32_t size2, + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches, + checksum_fn_t checksum_fn, + uint32_t checksum, + time_t *when, char **desc, + uint64_t *failed_flags); /* Called in the label_scan path to return a partial VG summary. */ int text_read_metadata_summary(const struct format_type *fmt, @@ -96,7 +97,7 @@ int text_read_metadata_summary(const struct format_type *fmt, checksum_fn_t checksum_fn, int checksum_only, struct lvmcache_vgsummary *vgsummary, - uint32_t *failed_flags); + uint64_t *failed_flags); void set_pv_devices(struct format_instance *fid, struct volume_group *vg); diff --git a/lib/format_text/import.c b/lib/format_text/import.c index 2ff061f40..565604400 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -40,7 +40,7 @@ int text_read_metadata_summary(const struct format_type *fmt, checksum_fn_t checksum_fn, int checksum_only, struct lvmcache_vgsummary *vgsummary, - uint32_t *failed_flags) + uint64_t *failed_flags) { struct dm_config_tree *cft; struct text_vg_version_ops **vsn; @@ -121,57 +121,25 @@ int text_read_metadata_summary(const struct format_type *fmt, return r; } -struct cached_vg_fmtdata { - uint32_t cached_mda_checksum; - size_t cached_mda_size; -}; - -/* - * FIXME: this function's results / return values are very - * badly defined. It returns NULL both on an error, and - * when it skips parsing because of an optimization. The - * use_previous_vg might help except that the way it's used - * means it won't tell you the result of each call. So, - * failed_flags being returned as non-zero ends up being - * the only way to tell if a particular call to this - * function failed or not. Toss out the whole mess and - * rewrite it sanely. - */ - struct volume_group *text_read_metadata_vg(struct format_instance *fid, struct device *dev, const char *file, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, off_t offset, uint32_t size, off_t offset2, uint32_t size2, + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches, checksum_fn_t checksum_fn, uint32_t checksum, time_t *when, char **desc, - uint32_t *failed_flags) + uint64_t *failed_flags) { struct volume_group *vg = NULL; struct dm_config_tree *cft; struct text_vg_version_ops **vsn; char *buf = NULL; - int skip_parse; - - /* - * This struct holds the checksum and size of the VG metadata - * that was read from a previous device. When we read the VG - * metadata from this device, we can skip parsing it into a - * cft (saving time) if the checksum of the metadata buffer - * we read from this device matches the size/checksum saved in - * the mda_header/rlocn struct on this device, and matches the - * size/checksum from the previous device. - */ - if (vg_fmtdata && !*vg_fmtdata && - !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) { - log_error("Failed to allocate VG fmtdata for text format."); - *failed_flags |= FAILED_INTERNAL; - return NULL; - } + unsigned last_matches; _init_text_import(); @@ -183,10 +151,14 @@ struct volume_group *text_read_metadata_vg(struct format_instance *fid, return_NULL; } - /* Does the metadata match the already-cached VG? */ - skip_parse = vg_fmtdata && - ((*vg_fmtdata)->cached_mda_checksum == checksum) && - ((*vg_fmtdata)->cached_mda_size == (size + size2)); + if (last_meta_checksum && last_meta_size && + (checksum == last_meta_checksum) && ((size + size2) == last_meta_size)) + last_matches = 1; + else + last_matches = 0; + + if (last_meta_matches) + *last_meta_matches = last_matches; if (ld) { if (ld->buf_len >= (offset + size)) @@ -215,7 +187,7 @@ struct volume_group *text_read_metadata_vg(struct format_instance *fid, if (!config_file_read_fd(cft, dev, buf, offset, size, offset2, size2, checksum_fn, checksum, - skip_parse, 1, failed_flags)) { + last_matches, 1, failed_flags)) { log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); /* We have to be certain this has been set since it's the @@ -234,10 +206,9 @@ struct volume_group *text_read_metadata_vg(struct format_instance *fid, } } - if (skip_parse) { - if (use_previous_vg) - *use_previous_vg = 1; - log_debug_metadata("Skipped parsing metadata on %s", dev_name(dev)); + if (last_matches) { + log_debug_metadata("Skipped parsing metadata on %s with matching checksum 0x%x size %zu.", + dev_name(dev), last_meta_checksum, last_meta_size); goto out; } @@ -258,14 +229,6 @@ struct volume_group *text_read_metadata_vg(struct format_instance *fid, break; } - if (vg && vg_fmtdata && *vg_fmtdata) { - (*vg_fmtdata)->cached_mda_size = (size + size2); - (*vg_fmtdata)->cached_mda_checksum = checksum; - } - - if (use_previous_vg) - *use_previous_vg = 0; - out: config_destroy(cft); return vg; @@ -275,13 +238,12 @@ struct volume_group *text_read_metadata_file(struct format_instance *fid, const char *file, time_t *when, char **desc) { - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; - return text_read_metadata_vg(fid, NULL, file, NULL, NULL, NULL, - (off_t)0, 0, (off_t)0, 0, - NULL, - 0, - when, desc, &failed_flags); + return text_read_metadata_vg(fid, NULL, file, NULL, + (off_t)0, 0, (off_t)0, 0, + 0, 0, NULL, NULL, 0, + when, desc, &failed_flags); } static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft, @@ -290,7 +252,7 @@ static struct volume_group *_import_vg_from_config_tree(const struct dm_config_t { struct volume_group *vg = NULL; struct text_vg_version_ops **vsn; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; int vg_missing; _init_text_import(); diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index 354326d56..ad4ce7e65 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -976,7 +976,7 @@ static int _read_sections(struct format_instance *fid, static struct volume_group *_read_vg(struct format_instance *fid, const struct dm_config_tree *cft, unsigned for_lvmetad, - uint32_t *failed_flags) + uint64_t *failed_flags) { const struct dm_config_node *vgn; const struct dm_config_value *cv; @@ -1256,11 +1256,12 @@ static void _read_desc(struct dm_pool *mem, * FIXME: why are these separate? */ static int _read_vgsummary(const struct format_type *fmt, const struct dm_config_tree *cft, - struct lvmcache_vgsummary *vgsummary, uint32_t *failed_flags) + struct lvmcache_vgsummary *vgsummary, uint64_t *failed_flags) { const struct dm_config_node *vgn; struct dm_pool *mem = fmt->cmd->mem; const char *str; + uint64_t vgstatus; if (!dm_config_get_str(cft->root, "creation_host", &str)) str = ""; @@ -1288,13 +1289,19 @@ static int _read_vgsummary(const struct format_type *fmt, const struct dm_config return 0; } - if (!_read_flag_config(vgn, &vgsummary->vgstatus, VG_FLAGS)) { + if (!_read_flag_config(vgn, &vgstatus, VG_FLAGS)) { *failed_flags |= FAILED_VG_METADATA_FIELD; log_error("Couldn't find status flags for volume group %s.", vgsummary->vgname); return 0; } + if (vgstatus & LVM_WRITE_LOCKED) { + vgstatus |= LVM_WRITE; + vgstatus &= ~LVM_WRITE_LOCKED; + } + vgsummary->vgstatus = vgstatus; + if (dm_config_get_str(vgn, "system_id", &str) && (!(vgsummary->system_id = dm_pool_strdup(mem, str)))) goto fail_internal; @@ -1303,6 +1310,11 @@ static int _read_vgsummary(const struct format_type *fmt, const struct dm_config (!(vgsummary->lock_type = dm_pool_strdup(mem, str)))) goto fail_internal; + if (!_read_int32(vgn, "seqno", &vgsummary->seqno)) { + log_error("Couldn't read 'seqno' for volume group %s.", + vgsummary->vgname); + return 0; + } return 1; fail_internal: diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index ecb7b197b..b298ea503 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -83,7 +83,7 @@ struct mda_header { struct mda_header *raw_read_mda_header(const struct format_type *fmt, struct device_area *dev_area, struct label_read_data *ld, - uint32_t *failed_flags); + uint64_t *failed_flags); struct mda_lists { struct dm_list dirs; @@ -111,6 +111,6 @@ int read_metadata_location_summary(const struct format_type *fmt, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors, - uint32_t *failed_flags); + uint64_t *failed_flags); #endif diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 029c0d105..54e1fa451 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -334,7 +334,7 @@ static int _read_mda_header_and_metadata_summary(struct metadata_area *mda, void struct mda_header *mdah; struct raw_locn *rl; int rl_count = 0; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; struct lvmcache_vgsummary vgsummary = { 0 }; if (!dev_open_readonly(mdac->area.dev)) { @@ -445,7 +445,7 @@ out: static int _text_read(struct labeller *l, struct device *dev, void *label_buf, struct label_read_data *ld, struct label **label, - uint32_t *failed_flags) + uint64_t *failed_flags) { char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); char uuid[64] __attribute__((aligned(8))); diff --git a/lib/label/label.c b/lib/label/label.c index 18513fd32..cb7a72bef 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -153,7 +153,7 @@ static struct labeller *_find_label_header(struct device *dev, char *label_buf, uint64_t *label_sector, uint64_t scan_sector, - uint32_t *failed_flags) + uint64_t *failed_flags) { struct labeller_i *li; struct labeller *r = NULL; @@ -366,7 +366,7 @@ int label_read(struct device *dev, struct label **labelp, uint64_t scan_sector) struct label *label; struct labeller *l; uint64_t sector; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; struct lvmcache_info *info; int r = 0; @@ -450,7 +450,7 @@ static int _label_read_sync(struct cmd_context *cmd, struct device *dev) struct label *label = NULL; struct labeller *l; uint64_t sector; - uint32_t failed_flags; + uint64_t failed_flags; int r = 0; memset(scanbuf, 0, sizeof(scanbuf)); @@ -557,7 +557,7 @@ int label_verify(struct device *dev) char label_buf[LABEL_SIZE] __attribute__((aligned(8))); struct labeller *l; uint64_t sector; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; int r = 0; if (!dev_open_readonly(dev)) @@ -683,7 +683,7 @@ static int _label_read_data_process(struct cmd_context *cmd, struct label_read_d struct label *label = NULL; struct labeller *l; uint64_t sector; - uint32_t failed_flags = 0; + uint64_t failed_flags = 0; int r = 0; if ((ld->result < 0) || (ld->result != ld->buf_len)) { diff --git a/lib/label/label.h b/lib/label/label.h index d3e47b1c3..f85302aa0 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -77,7 +77,7 @@ struct label_ops { int (*read) (struct labeller * l, struct device * dev, void *label_buf, struct label_read_data *ld, struct label ** label, - uint32_t *failed_flags); + uint64_t *failed_flags); /* * Additional consistency checks for the paranoid. diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 81632e359..c1edf3da1 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -27,6 +27,48 @@ #include "lv.h" #include "lvm-percent.h" +#define FAILED_INTERNAL 0x0000000000000001 +#define FAILED_LABEL_CHECKSUM 0x0000000000000002 +#define FAILED_LABEL_SECTOR_NUMBER 0x0000000000000004 +#define FAILED_PV_HEADER 0x0000000000000008 +#define FAILED_MDA_HEADER 0x0000000000000010 +#define FAILED_MDA_HEADER_IO 0x0000000000000020 +#define FAILED_MDA_HEADER_CHECKSUM 0x0000000000000040 +#define FAILED_MDA_HEADER_FIELD 0x0000000000000080 +#define FAILED_MDA_HEADER_RLOCN 0x0000000000000100 +#define FAILED_VG_METADATA 0x0000000000000200 +#define FAILED_VG_METADATA_IO 0x0000000000000400 +#define FAILED_VG_METADATA_CHECKSUM 0x0000000000000800 +#define FAILED_VG_METADATA_PARSE 0x0000000000001000 +#define FAILED_VG_METADATA_FIELD 0x0000000000002000 +#define FAILED_VG_METADATA_SIZE 0x0000000000004000 +#define FAILED_NOT_FOUND 0x0000000000008000 +#define FAILED_BADNAME 0x0000000000010000 +#define FAILED_VG_LOCKING 0x0000000000020000 +#define FAILED_ACCESS 0x0000000000040000 +#define FAILED_MISSING_PVS 0x0000000000080000 +#define FAILED_MISSING_DEVS 0x0000000000100000 +#define FAILED_PV_DEV_SIZES 0x0000000000200000 +#define FAILED_BAD_PV_SEGS 0x0000000000400000 +#define FAILED_UNKNOWN_LV_SEGS 0x0000000000800000 +#define FAILED_BAD_LV_SEGS 0x0000000001000000 +#define FAILED_BAD_MDAS 0x0000000002000000 +#define FAILED_OLD_PVS 0x0000000004000000 +#define FAILED_REAPPEARED_PVS 0x0000000008000000 +#define FAILED_PARTIAL_LVS 0x0000000010000000 +#define FAILED_OUTDATED_HISTORICAL_LVS 0x0000000020000000 +#define FAILED_WRONG_PV_EXT 0x0000000040000000 +#define FAILED_CLUSTERED 0x0000000080000000 +#define FAILED_EXPORTED 0x0000000100000000 +#define FAILED_READ_ONLY 0x0000000200000000 +#define FAILED_RESIZEABLE 0x0000000400000000 +#define FAILED_LOCK_TYPE 0x0000000800000000 +#define FAILED_LOCK_MODE 0x0000001000000000 +#define FAILED_SYSTEMID 0x0000002000000000 +#define FAILED_REPAIR_UPDATE 0x0000004000000000 +#define FAILED_ERROR 0x8000000000000000 + + #define MAX_STRIPES 128U #define SECTOR_SHIFT 9L #define SECTOR_SIZE ( 1L << SECTOR_SHIFT ) @@ -175,27 +217,12 @@ #define MIRROR_SKIP_INIT_SYNC 0x00000010U /* skip initial sync */ /* vg_read and vg_read_for_update flags */ -#define READ_ALLOW_INCONSISTENT 0x00010000U +#define READ_NO_REPAIR 0x00010000U #define READ_ALLOW_EXPORTED 0x00020000U #define READ_OK_NOTFOUND 0x00040000U -#define READ_WARN_INCONSISTENT 0x00080000U -#define READ_FOR_UPDATE 0x00100000U /* A meta-flag, useful with toollib for_each_* functions. */ - -/* vg's "read_status" field */ -#define FAILED_INCONSISTENT 0x00000001U -#define FAILED_LOCKING 0x00000002U -#define FAILED_NOTFOUND 0x00000004U -#define FAILED_READ_ONLY 0x00000008U -#define FAILED_EXPORTED 0x00000010U -#define FAILED_RESIZEABLE 0x00000020U -#define FAILED_CLUSTERED 0x00000040U -#define FAILED_ALLOCATION 0x00000080U -#define FAILED_EXIST 0x00000100U -#define FAILED_RECOVERY 0x00000200U -#define FAILED_SYSTEMID 0x00000400U -#define FAILED_LOCK_TYPE 0x00000800U -#define FAILED_LOCK_MODE 0x00001000U -#define SUCCESS 0x00000000U +#define READ_FOR_UPDATE 0x00080000U /* A meta-flag, useful with toollib for_each_* functions. */ +#define READ_ALLOW_ERRORS 0x00100000U +#define READ_NO_LOCK 0x00200000U /* vg_read should not do any vg locking */ #define VGMETADATACOPIES_ALL UINT32_MAX #define VGMETADATACOPIES_UNMANAGED 0 @@ -628,8 +655,11 @@ void pvcreate_params_set_defaults(struct pvcreate_params *pp); int vg_write(struct volume_group *vg); int vg_commit(struct volume_group *vg); void vg_revert(struct volume_group *vg); -struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t warn_flags, int *consistent); +struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vg_name, const char *vgid, + int read_precommit_mdas, + uint64_t *failed_flags, + struct dm_list *bad_mdas, + struct dm_list *old_pvs); #define get_pvs( cmd ) get_pvs_internal((cmd), NULL, NULL) #define get_pvs_perserve_vg( cmd, pv_list, vg_list ) get_pvs_internal((cmd), (pv_list), (vg_list)) @@ -664,7 +694,7 @@ int vg_missing_pv_count(const struct volume_group *vg); int vgs_are_compatible(struct cmd_context *cmd, struct volume_group *vg_from, struct volume_group *vg_to); -uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname); +int vg_lock_newname(struct cmd_context *cmd, const char *vgname, int *lock_failed, int *name_exists); int lv_resize(struct logical_volume *lv, struct lvresize_params *lp, @@ -673,15 +703,8 @@ int lv_resize(struct logical_volume *lv, /* * Return a handle to VG metadata. */ -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state); -struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state); - -/* - * Test validity of a VG handle. - */ -uint32_t vg_read_error(struct volume_group *vg_handle); +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, uint64_t *failed_flags); /* pe_start and pe_end relate to any existing data so that new metadata * areas can avoid overlap */ @@ -709,7 +732,8 @@ uint32_t pv_list_extents_free(const struct dm_list *pvh); int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name); int vg_validate(struct volume_group *vg); struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name); -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name); +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, + int *lock_failed, int *name_exists); int vg_remove_mdas(struct volume_group *vg); int vg_remove_check(struct volume_group *vg); void vg_remove_pvs(struct volume_group *vg); @@ -1289,6 +1313,6 @@ int is_system_id_allowed(struct cmd_context *cmd, const char *system_id); int vg_strip_outdated_historical_lvs(struct volume_group *vg); -const char *failed_flags_str(uint32_t failed_flags); +const char *failed_flags_str(uint64_t failed_flags); #endif diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index f37008d2a..8380d4b78 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -314,7 +314,7 @@ struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_nam } if (pp->pva.idp) { - if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL, NULL)) && + if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL)) && (dev != dev_cache_get(pv_name, cmd->full_filter))) { if (!id_write_format((const struct id*)&pp->pva.idp->uuid, buffer, sizeof(buffer))) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 8db492885..ec7ee37cd 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -339,15 +339,20 @@ int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name, { struct volume_group *vg; struct pv_list *pvl; - uint32_t warn_flags = WARN_PV_READ | WARN_INCONSISTENT; - int r = 0, consistent = 0; + uint64_t failed_flags = 0; + int r = 0; - if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, warn_flags, &consistent))) { + if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, 0, &failed_flags, NULL, NULL))) { log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s", vg_name); return 0; } + if (failed_flags & FAILED_ERROR) { + release_vg(vg); + return 0; + } + dm_list_iterate_items(pvl, &vg->pvs) { if (id_equal(&pvl->pv->id, (const struct id *) pvid)) { if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) { @@ -530,7 +535,7 @@ int vg_remove_check(struct volume_group *vg) { unsigned lv_count; - if (vg_read_error(vg) || vg_missing_pv_count(vg)) { + if (vg_missing_pv_count(vg)) { log_error("Volume group \"%s\" not found, is inconsistent " "or has PVs missing.", vg ? vg->name : ""); log_error("Consider vgreduce --removemissing if metadata " @@ -947,32 +952,6 @@ static int _vg_update_vg_committed(struct volume_group *vg) return 1; } -/* - * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a - * possible failure code or zero for success. - */ -static struct volume_group *_vg_make_handle(struct cmd_context *cmd, - struct volume_group *vg, - uint32_t failure) -{ - - /* Never return a cached VG structure for a failure */ - if (vg && vg->vginfo && failure != SUCCESS) { - release_vg(vg); - vg = NULL; - } - - if (!vg && !(vg = alloc_vg("vg_make_handle", cmd, NULL))) - return_NULL; - - vg->read_status = failure; - - if (vg->fid && !_vg_update_vg_committed(vg)) - vg->read_status |= FAILED_ALLOCATION; - - return vg; -} - int lv_has_unknown_segments(const struct logical_volume *lv) { struct lv_segment *seg; @@ -994,36 +973,18 @@ int vg_has_unknown_segments(const struct volume_group *vg) return 0; } -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name) +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, + int *lock_failed, int *name_exists) { - uint32_t rc; - struct volume_group *vg; - - if (!validate_name(vg_name)) { - log_error("Invalid vg name %s", vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ - return NULL; - } - - rc = vg_lock_newname(cmd, vg_name); - if (rc != SUCCESS) - /* NOTE: let caller decide - this may be check for existence */ - return _vg_make_handle(cmd, NULL, rc); - - vg = vg_create(cmd, vg_name); - if (!vg || vg_read_error(vg)) - unlock_vg(cmd, NULL, vg_name); + if (!vg_lock_newname(cmd, vg_name, lock_failed, name_exists)) + return_NULL; - return vg; + /* vg_create calls unlock_vg on an error */ + return vg_create(cmd, vg_name); } /* * Create a VG with default parameters. - * Returns: - * - struct volume_group* with SUCCESS code: VG structure created - * - NULL or struct volume_group* with FAILED_* code: error creating VG structure - * Use vg_read_error() to determine success or failure. - * FIXME: cleanup usage of _vg_make_handle() */ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) { @@ -1066,11 +1027,10 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) vg_name); goto bad; } - return _vg_make_handle(cmd, vg, SUCCESS); + return vg; bad: unlock_and_release_vg(cmd, vg, vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ return NULL; } @@ -3135,6 +3095,7 @@ static int _vg_commit_mdas(struct volume_group *vg) /* Commit to each copy of the metadata area */ dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) { + /* MDA_FAILED is set if vg_write() fails on the mda */ if (mda->status & MDA_FAILED) continue; failed = 0; @@ -3427,10 +3388,7 @@ static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton) } /* Make orphan PVs look like a VG. */ -static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, - uint32_t warn_flags, - const char *orphan_vgname, - int *consistent) +static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname) { const struct format_type *fmt; struct lvmcache_vginfo *vginfo; @@ -3464,10 +3422,7 @@ static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, baton.cmd = cmd; baton.fmt = fmt; - baton.warn_flags = warn_flags; baton.vg = vg; - baton.consistent = 1; - baton.repair = *consistent; while ((pvl = (struct pv_list *) dm_list_first(&head.list))) { dm_list_del(&pvl->list); @@ -3479,7 +3434,6 @@ static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, if (!lvmcache_foreach_pv(vginfo, _vg_read_orphan_pv, &baton)) return_NULL; - *consistent = baton.consistent; return vg; } @@ -3763,87 +3717,68 @@ out: return r; } -/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct - * inconsistent metadata on disk (i.e. the VG write lock is held). - * This guarantees only consistent metadata is returned. - * If consistent is 0, caller must check whether consistent == 1 on return - * and take appropriate action if it isn't (e.g. abort; get write lock - * and call vg_read_internal again). - * - * If precommitted is set, use precommitted metadata if present. - * - * Either of vgname or vgid may be NULL. +struct bad_mda { + struct dm_list list; + struct device *dev; + uint64_t mda_header_start; + uint64_t failed_flags; +}; + +/* + * If a vg can be read at all, return it. + * If there are errors such that the vg should + * not be used by the caller, set FAILED_ERROR + * (along with more specific failed_flags for the case.) * - * Note: vginfo structs must not be held or used as parameters - * across the call to this function. + * If there are problems but the vg can still be used + * by the caller (or could be used depending on conditions + * in the caller), set the failed_flags and return the vg. */ -static struct volume_group *_vg_read(struct cmd_context *cmd, + +struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname, const char *vgid, - uint32_t warn_flags, - int *consistent, unsigned precommitted) + int read_precommit_mdas, + uint64_t *failed_flags, + struct dm_list *bad_mdas, + struct dm_list *old_pvs) { struct format_instance *fid = NULL; struct format_instance_ctx fic; const struct format_type *fmt; - struct volume_group *vg, *correct_vg = NULL; + struct volume_group *vg; struct metadata_area *mda; - struct lvmcache_info *info; - int inconsistent = 0; - int inconsistent_vgid = 0; - int inconsistent_pvs = 0; - int inconsistent_mdas = 0; - int inconsistent_mda_count = 0; - int strip_historical_lvs = *consistent; - int update_old_pv_ext = *consistent; - unsigned use_precommitted = precommitted; - struct dm_list *pvids; + struct volume_group *correct_vg; + struct metadata_area *correct_mda; + struct device *correct_dev; + uint32_t correct_meta_checksum; + size_t correct_meta_size; struct pv_list *pvl; - struct dm_list all_pvs; - int reappeared = 0; - struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ - unsigned use_previous_vg; + struct device_id_list *dil, *dil2; + struct bad_mda *bad; + struct dm_list all_pvs_in_metadata; /* device_id_list */ + int inconsistent_mda_pv = 0; + int retried = 0; + int found; + + dm_list_init(&all_pvs_in_metadata); log_very_verbose("Reading VG %s %.32s", vgname ?: "<no name>", vgid ?: "<no vgid>"); if (is_orphan_vg(vgname)) { - if (use_precommitted) { - log_error(INTERNAL_ERROR "vg_read_internal requires vgname " - "with pre-commit."); - return NULL; - } - return _vg_read_orphans(cmd, warn_flags, vgname, consistent); + log_error(INTERNAL_ERROR "Wrong function to read orphan vg."); + return NULL; } - if (lvmetad_used() && !use_precommitted) { + if (lvmetad_used()) { if ((correct_vg = lvmetad_vg_lookup(cmd, vgname, vgid))) { - dm_list_iterate_items(pvl, &correct_vg->pvs) - reappeared += _check_reappeared_pv(correct_vg, pvl->pv, *consistent); - if (reappeared && *consistent) - *consistent = _repair_inconsistent_vg(correct_vg); - else - *consistent = !reappeared; - if (_wipe_outdated_pvs(cmd, correct_vg, &correct_vg->pvs_outdated)) { - /* clear the list */ - dm_list_init(&correct_vg->pvs_outdated); - lvmetad_vg_clear_outdated_pvs(correct_vg); - } - } - - - if (correct_vg) { - if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } - - if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } + log_debug_metadata("Reading VG %s found metadata in lvmetad.", vgname); + return correct_vg; } - - return correct_vg; + log_debug_metadata("Reading VG %s not found in lvmetad.", vgname); + *failed_flags |= FAILED_NOT_FOUND; + *failed_flags |= FAILED_ERROR; + return NULL; } /* @@ -3856,6 +3791,12 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, * lock is held, so we rescan all the info from the devs in case * something changed between the initial scan and now that the lock * is held. + * + * FIXME: if we are reading the VG to report it, not to modify it, + * and if the label scan had no problems and saw the same metadata + * in all mdas (vginfo->scan_mismatch is 0), then we could skip + * this label rescan. Then in the common case, reporting a VG + * would only require a single read of each device. */ log_debug_metadata("Reading VG rereading labels for %s", vgname); @@ -3865,447 +3806,577 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, lvmcache_label_scan(cmd); } - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { - log_debug_metadata("Cache did not find fmt for vgname %s", vgname); - return_NULL; - } - /* Now determine the correct vgname if none was supplied */ if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { - log_debug_metadata("Cache did not find VG name from vgid %.32s", vgid); - return_NULL; + log_error("Reading VG unknown name from vgid %.32s", vgid); + *failed_flags |= FAILED_NOT_FOUND; + *failed_flags |= FAILED_ERROR; + return NULL; } /* Determine the correct vgid if none was supplied */ if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) { - log_debug_metadata("Cache did not find VG vgid from name %s", vgname); - return_NULL; + log_error("Reading VG %s cannot find vgid from name.", vgname); + *failed_flags |= FAILED_NOT_FOUND; + *failed_flags |= FAILED_ERROR; + return NULL; } - if (use_precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; + /* + * The fmt-specific label scan sets the fmt in the info structs it + * creates in lvmcache. The vginfo struct gets fmt copied from its + * info structs. So, there should never be a case where vginfo doesn't + * have fmt. (This should just be 'fmt = vginfo->fmt'; fix this by + * exposing the lvmcache structs.) + */ + + if (!(fmt = lvmcache_get_fmt(cmd, vgname, vgid))) { + log_error(INTERNAL_ERROR "Reading VG %s cannot find format type.", vgname); + *failed_flags |= FAILED_INTERNAL; + *failed_flags |= FAILED_ERROR; + return NULL; + } /* - * A "format instance" is an abstraction for a VG location, - * i.e. where a VG's metadata exists on disk. + * Reading a VG means reading the VG metadata from the mdas + * on all the PVs in the VG, comparing the metadata from each + * to ensure the copies are consistent with each other, and + * returning a single copy of the metadata. + * + * label_scan has recorded, using vginfo/info structs in + * lvmcache, which devices should be read to get metadata + * for this VG. + * + * We know what VG name we want to read, but not where to read it from. + * create_instance() copies the location of mdas from lvmcache into + * fid->metadata_areas_in_use. We will read the VG metadata from each + * of these mdas. label_scan saved the mda locations in the info + * structs attached to the vginfo for this VG. + * + * Note that the lvmcache vginfo/info mapping of vgname to devs with + * metadata is initially created without the vg lock held. This can + * lead to errors or inconsistencies if the VG was being modified + * during the label_scan. We check for these problems with various + * comparisions below. + * + * A "format instance" is an abstraction for a VG location, i.e. where + * a VG's metadata exists on disk. * - * An fic/fid pair (format_instance_ctx/format_instance) exists - * for each VG. The fic/fid is set up by create_instance() to - * describe the VG location. This happens before the VG metadata - * is assembled into the more familiar struct volume_group "vg". + * An fic/fid pair (format_instance_ctx/format_instance) exists for + * each VG. The fic/fid is set up by create_instance() to describe the + * VG location. This happens before the VG metadata is assembled into + * the more familiar struct volume_group "vg". * * The fic/fid has one main purpose: to keep track of the metadata - * locations for a given VG. It does this by putting 'mda' - * structs on fid->metadata_areas_in_use, which specify where - * metadata is located on disk. It gets this information - * (metadata locations for a specific VG) from the command's - * initial label scan. The info is passed indirectly via - * lvmcache info/vginfo structs, which are created by the - * label scan and then copied into fic/fid by create_instance(). + * locations for a given VG. It does this by putting 'mda' structs on + * fid->metadata_areas_in_use, which specify where metadata is located + * on disk. */ - - /* create format instance with appropriate metadata area */ fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; fic.context.vg_ref.vg_name = vgname; fic.context.vg_ref.vg_id = vgid; + if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); + /* FIXME: are there only internal reasons for failures here? */ + log_error("Reading VG %s failed to create format instance.", vgname); + *failed_flags |= FAILED_INTERNAL; + *failed_flags |= FAILED_ERROR; return NULL; } - /* Store pvids for later so we can check if any are missing */ - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - _destroy_fid(&fid); - return_NULL; - } + log_debug_metadata("Reading VG %s from %d mdas.", vgname, dm_list_size(&fid->metadata_areas_in_use)); /* * We use the fid globally here so prevent the release_vg * call to destroy the fid - we may want to reuse it! */ fid->ref_count++; - /* Ensure contents of all metadata areas match - else do recovery */ - inconsistent_mda_count=0; + + /* + * Read the VG metadata from each mda, checking that all copies match. + * mdas on this list does not include those with the IGNORED flag. + * (those are on the fid->metadata_areas_ignored) + * + * The mda locations on this list will not include missing devices since + * it is based on devices that label_scan read from, and it won't include + * devices that label_scan determined were defective and placed on the + * defective list. + * + * For each mda, set one of: + * mda->vg_read_skipped + * mda->vg_read_failed (mda->read_failed_flags also usually set) + * mda->vg_read_success (mda->vg_read_seqno also set) + * + * The lower level code also sets: + * mda->vg_read_meta_checksum + * mda->vg_read_meta_size + */ + correct_vg = NULL; + correct_mda = NULL; + correct_dev = NULL; + correct_meta_checksum = 0; + correct_meta_size = 0; + dm_list_init(&all_pvs_in_metadata); + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { struct device *mda_dev = mda_get_device(mda); struct label_read_data *ld; + unsigned meta_matches; + + /* _vg_read_raw sets these */ + mda->header_start = 0; + mda->read_failed_flags = 0; + mda->vg_read_failed = 0; + mda->vg_read_skipped = 0; + mda->vg_read_success = 0; + mda->vg_read_seqno = 0; + mda->vg_read_meta_checksum = 0; + mda->vg_read_meta_size = 0; - use_previous_vg = 0; - - log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev)); - - ld = get_label_read_data(cmd, mda_dev); + /* + * Will be set to 1 by _vg_read_raw if the metadata from this mda + * has checksum and size matching the values we pass in. + */ + meta_matches = 0; - if ((use_precommitted && !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, ld, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && !(vg = mda->ops->vg_read(fid, vgname, mda, ld, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; + if (!mda_dev) { + log_error(INTERNAL_ERROR "Reading VG %s from mda with no dev.", vgname); + mda->vg_read_skipped = 1; continue; } - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; + if (lvmcache_dev_is_defective(mda_dev)) { + log_error(INTERNAL_ERROR "Reading VG %s from mda on defective dev %s.", vgname, dev_name(mda_dev)); + mda->vg_read_skipped = 1; continue; } - if (!correct_vg) { - correct_vg = vg; - continue; - } - - /* FIXME Also ensure contents same - checksum compare? */ - if (correct_vg->seqno != vg->seqno) { - if (cmd->metadata_read_only) - log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) " - "as global/metadata_read_only is set.", - vgname, vg->seqno, correct_vg->seqno); - else - inconsistent = 1; - - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } - - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } - } - fid->ref_count--; - - /* Ensure every PV in the VG was in the cache */ - if (correct_vg) { /* - * Update the seqno from the cache, for the benefit of - * retro-style metadata formats like LVM1. + * When possible use the data that label scan read from the device. */ - // correct_vg->seqno = seqno > correct_vg->seqno ? seqno : correct_vg->seqno; + ld = get_label_read_data(cmd, mda_dev); /* - * If the VG has PVs without mdas, or ignored mdas, they may - * still be orphans in the cache: update the cache state here, - * and update the metadata lists in the vg. + * lv_from_lvid() used in activation is the only place reading + * a precommitted vg. */ - if (!inconsistent && - dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) { - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (!pvl->pv->dev) { - inconsistent_pvs = 1; - break; - } - - if (str_list_match_item(pvids, pvl->pv->dev->pvid)) - continue; + if (read_precommit_mdas && !(fmt->features & FMT_PRECOMMIT)) + read_precommit_mdas = 0; - /* - * PV not marked as belonging to this VG in cache. - * Check it's an orphan without metadata area - * not ignored. - */ - if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 1)) || - !lvmcache_is_orphan(info)) { - inconsistent_pvs = 1; - break; - } + if (read_precommit_mdas) { + log_debug_metadata("Reading VG %s from precommit mda on %s.", vgname, dev_name(mda_dev)); - if (lvmcache_mda_count(info)) { - if (!lvmcache_fid_add_mdas_pv(info, fid)) { - release_vg(correct_vg); - return_NULL; - } + /* _vg_read_precommit_raw() */ + vg = mda->ops->vg_read_precommit(fid, vgname, mda, ld, correct_meta_checksum, correct_meta_size, &meta_matches); - log_debug_metadata("Empty mda found for VG %s.", vgname); + if (!vg && !meta_matches && correct_meta_checksum) { + log_warn("WARNING: Reading VG %s from precommit mda on %s at %llu failed from %s.", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start, + failed_flags_str(mda->read_failed_flags)); + mda->vg_read_failed = 1; + continue; + } + } else { + log_debug_metadata("Reading VG %s from mda on %s.", vgname, dev_name(mda_dev)); - if (inconsistent_mdas) - continue; + /* _vg_read_raw() */ + vg = mda->ops->vg_read(fid, vgname, mda, ld, correct_meta_checksum, correct_meta_size, &meta_matches); - /* - * If any newly-added mdas are in-use then their - * metadata needs updating. - */ - lvmcache_foreach_mda(info, _check_mda_in_use, - &inconsistent_mdas); - } + if (!vg && !meta_matches && correct_meta_checksum) { + log_warn("WARNING: Reading VG %s from mda on %s at %llu failed from %s.", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start, + failed_flags_str(mda->read_failed_flags)); + mda->vg_read_failed = 1; + continue; } + } - /* If the check passed, let's update VG and recalculate pvids */ - if (!inconsistent_pvs) { - log_debug_metadata("Updating cache for PVs without mdas " - "in VG %s.", vgname); - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED); + /* + * No vg struct is returned when meta_matches because the + * lower level code detected that the metadata checksum + * and size matched, so it skipped parsing the metadata + * into a vg struct and returned NULL. + * + * Cases where a VG is returned and meta_matches=0 are + * checked below. + * + * FIXME: add a config setting to disable this optimization + * because the text metadata could be corrupted while the + * checksum/size from the header structs remains fine, in + * which case the corrupted metadata wouldn't be caught. + */ + if (meta_matches) { + mda->vg_read_success = 1; + mda->vg_read_seqno = correct_vg->seqno; + continue; + } - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - release_vg(correct_vg); - return_NULL; - } - } + if (!id_equal(&vg->id, (const struct id *)vgid)) { + /* put this dev on the defective list and ignore it */ + log_warn("WARNING: Reading VG %s from mda on %s at %llu found wrong VG ID.", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + lvmcache_add_defective_dev(mda_dev); + mda->vg_read_failed = 1; + release_vg(vg); + continue; } - fid->ref_count++; - if (dm_list_size(&correct_vg->pvs) != - dm_list_size(pvids) + vg_missing_pv_count(correct_vg)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - - if (critical_section()) - inconsistent = 1; - else { - release_vg(correct_vg); - correct_vg = NULL; - } - } else dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (is_missing_pv(pvl->pv)) + /* + * We may be able to use the vg with some less serious read_failed_flags, + * and either repair or accept the conditions. + */ + if (mda->read_failed_flags) { + log_warn("WARNING: Reading VG %s from mda on %s at %llu found errors from %s.", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start, + failed_flags_str(mda->read_failed_flags)); + mda->vg_read_failed = 1; + release_vg(vg); + continue; + } + + /* + * Keep a list of any PV listed in any copy of the metadata. + */ + dm_list_iterate_items(pvl, &vg->pvs) { + if (device_list_find_pvid(&all_pvs_in_metadata, (const char *)&pvl->pv->id)) + continue; + if (!(dil = dm_pool_alloc(cmd->mem, sizeof(*dil)))) { + stack; continue; - if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - release_vg(correct_vg); - correct_vg = NULL; - break; } + dil->dev = pvl->pv->dev; + strncpy(dil->pvid, (const char *)&pvl->pv->id, ID_LEN); + dm_list_add(&all_pvs_in_metadata, &dil->list); } - if (correct_vg && inconsistent_mdas) { - release_vg(correct_vg); - correct_vg = NULL; + mda->vg_read_success = 1; + mda->vg_read_seqno = vg->seqno; + + /* + * Keep one copy of the VG metadata, release the others. + * Keep the first copy with the largest seqno. + * + * ->vg_read() sets vg_read_meta_checksum,size. + */ + if (!correct_vg) { + correct_vg = vg; + correct_mda = mda; + correct_dev = mda_dev; + correct_meta_checksum = mda->vg_read_meta_checksum; + correct_meta_size = mda->vg_read_meta_size; + continue; } - fid->ref_count--; - } - dm_list_init(&all_pvs); + /* + * Different copies of the metadata can have unmatching seqno's + * if a command fails after updating only some of the mdas. + */ + if (vg->seqno > correct_vg->seqno) { + release_vg(correct_vg); + correct_vg = vg; + correct_mda = mda; + correct_dev = mda_dev; + correct_meta_checksum = mda->vg_read_meta_checksum; + correct_meta_size = mda->vg_read_meta_size; + continue; + } - /* Failed to find VG where we expected it - full scan and retry */ - if (!correct_vg) { /* - * Free outstanding format instance that remained unassigned - * from previous step where we tried to get the "correct_vg", - * but we failed to do so (so there's a dangling fid now). + * This copy of the vg matches the "correct" copy so + * free/release this copy. */ + if (vg != correct_vg) + release_vg(vg); + } + fid->ref_count--; + + if (!correct_vg) { + /* FIXME: is there any chance this can happen? */ + /* FIXME: is there any reason to redo a label scan and try again? */ + log_error("Reading VG %s could not find metadata in mdas.", vgname); + *failed_flags |= FAILED_NOT_FOUND; + *failed_flags |= FAILED_ERROR; _destroy_fid(&fid); - vg_fmtdata = NULL; + return NULL; + } - inconsistent = 0; + log_debug_metadata("Reading VG %s with metadata seqno %d from mda at %s %llu", + vgname, correct_vg->seqno, + dev_name(correct_dev), + (unsigned long long)correct_mda->header_start); - /* Independent MDAs aren't supported under low memory */ - if (!cmd->independent_metadata_areas && critical_section()) - return_NULL; - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) - return_NULL; + /* + * Check correctness of each mda. + * mda's with problems are added to a list of mdas to be fixed. + */ + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + struct device *mda_dev = mda_get_device(mda); + int is_bad; - if (precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; + if (mda == correct_mda) + continue; - /* create format instance with appropriate metadata area */ - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); - return NULL; - } + is_bad = 0; - /* - * We use the fid globally here so prevent the release_vg - * call to destroy the fid - we may want to reuse it! - */ - fid->ref_count++; - /* Ensure contents of all metadata areas match - else recover */ - inconsistent_mda_count=0; - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - use_previous_vg = 0; - - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, NULL, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, NULL, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; - continue; + if (mda->vg_read_success) { + if (mda->vg_read_seqno != correct_mda->vg_read_seqno) { + log_warn("WARNING: Reading VG %s from %s %llu found seqno %d expected %d", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start, + mda->vg_read_seqno, correct_vg->seqno); + is_bad = 1; } - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; - continue; + if ((mda->vg_read_meta_checksum != correct_mda->vg_read_meta_checksum) || + (mda->vg_read_meta_size != correct_mda->vg_read_meta_size)) { + log_warn("WARNING: Reading VG %s from %s %llu found checksum %x size %zu expected %x %zu", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start, + mda->vg_read_meta_checksum, mda->vg_read_meta_size, + correct_mda->vg_read_meta_checksum, correct_mda->vg_read_meta_size); + is_bad = 1; } + } else if (mda->vg_read_failed) { + is_bad = 1; - if (!correct_vg) { - correct_vg = vg; - if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - return_NULL; - } + } else if (mda->vg_read_skipped) { + /* no device or defective device */ + + } else { + log_error(INTERNAL_ERROR "Reading VG %s no result from mda %s %llu.", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + } + + if (is_bad && bad_mdas) { + if (!(bad = dm_pool_alloc(cmd->mem, sizeof(struct bad_mda)))) { + stack; continue; } + bad->dev = mda_dev; + bad->mda_header_start = mda->header_start; + bad->failed_flags = mda->read_failed_flags; + dm_list_add(bad_mdas, &bad->list); + } + } - if (!id_equal(&vg->id, &correct_vg->id)) { - inconsistent = 1; - inconsistent_vgid = 1; - } + /* + * Check correctness of final VG metadata. + * + * Correlate three lists of PVs: + * 1. pvs listed in the chosen copy of the VG metadata (vg->pvs) + * 2. pvs from which mdas were read (fid->metadata_areas_in_use) + * 3. pvs listed in any copy of the VG metadata (all_pvs_in_metadta) + * + * . For each PV in the VG, check that we read an mda from that PV dev. + * (or there was a known reason for not, e.g. error, missing dev, no mdas) + * + * . For each mda we read, check the dev is a PV in the VG. + * (If we read an mda from a dev that's not a PV, retry without that dev?) + * + * . For each PV listed in some copy of the metadata, check it is listed in + * the chosen copy of the metadata. + */ - /* FIXME Also ensure contents same - checksums same? */ - if (correct_vg->seqno != vg->seqno) { - /* Ignore inconsistent seqno if told to skip repair logic */ - if (cmd->metadata_read_only) - log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) " - "as global/metadata_read_only is set.", - vgname, vg->seqno, correct_vg->seqno); - else - inconsistent = 1; + /* Check that we did not miss reading mdas from any PVs in this VG. */ + dm_list_iterate_items(pvl, &correct_vg->pvs) { + struct device *pv_dev = pvl->pv->dev; + int pv_no_dev = 0; + int pv_dev_missing = 0; + int pv_dev_defective = 0; + struct metadata_area *pv_mda_used = NULL; + struct metadata_area *pv_mda_ignored = NULL; + struct metadata_area *pv_mda_failed = NULL; - if (!_update_pv_list(cmd->mem, &all_pvs, vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - release_vg(correct_vg); - return_NULL; - } - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } + if (!pv_dev) { + log_debug_metadata("Checking VG %s PV %.8s has no device.", vgname, (char *)&pvl->pv->id); + pv_no_dev = 1; + } - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } + if (is_missing_pv(pvl->pv)) { + log_debug_metadata("Checking VG %s PV %s has missing flag.", + vgname, dev_name(pv_dev)); + pv_dev_missing = 1; } - fid->ref_count--; - /* Give up looking */ - if (!correct_vg) { - _free_pv_list(&all_pvs); - _destroy_fid(&fid); - return_NULL; + if (lvmcache_dev_is_defective(pv_dev)) { + log_debug_metadata("Checking VG %s PV %s is defective.", + vgname, dev_name(pv_dev)); + pv_dev_defective = 1; } - } - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg(correct_vg, (correct_vg->status & PRECOMMITTED)); + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + struct device *mda_dev = mda_get_device(mda); + if (mda_dev != pv_dev) + continue; + + log_debug_metadata("Checking VG %s PV %s has mda seqno %d failed_flags %llx.", + vgname, dev_name(pv_dev), mda->vg_read_seqno, + (unsigned long long)mda->read_failed_flags); - if (inconsistent) { - /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */ - if (use_precommitted) { - log_error("Inconsistent pre-commit metadata copies " - "for volume group %s", vgname); + if (mda->vg_read_failed) + pv_mda_failed = mda; + else + pv_mda_used = mda; + break; + } + + dm_list_iterate_items(mda, &fid->metadata_areas_ignored) { + struct device *mda_dev = mda_get_device(mda); + if (mda_dev != pv_dev) + continue; + log_debug_metadata("Checking VG %s PV %s has mda ignored.", + vgname, dev_name(pv_dev)); + pv_mda_ignored = mda; + break; + } + + /* + * Are there any other options to check for? + */ + if (!pv_no_dev && !pv_dev_missing && !pv_dev_defective && + !pv_mda_failed && !pv_mda_used && !pv_mda_ignored) { + log_warn("Checking VG %s PV %s has missed reading mda.", + vgname, dev_name(pv_dev)); /* - * Check whether all of the inconsistent MDAs were on - * MISSING PVs -- in that case, we should be safe. + * Inconsistency between the mda list used for reading the VG + * and the PVs listed in the VG metadata. It's possible that + * things changed since the label scan. */ - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - if (mda->status & MDA_INCONSISTENT) { - log_debug_metadata("Checking inconsistent MDA: %s", dev_name(mda_get_device(mda))); - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (mda_get_device(mda) == pvl->pv->dev && - (pvl->pv->status & MISSING_PV)) - --inconsistent_mda_count; - } - } - } + inconsistent_mda_pv = 1; + } + } - if (inconsistent_mda_count < 0) - log_error(INTERNAL_ERROR "Too many inconsistent MDAs."); + /* Check for metadata references to PVs that are not in the chosen metadata. */ + dm_list_iterate_items_safe(dil, dil2, &all_pvs_in_metadata) { + found = 0; - if (!inconsistent_mda_count) { - *consistent = 0; - _free_pv_list(&all_pvs); - return correct_vg; + dm_list_iterate_items(pvl, &correct_vg->pvs) { + if (id_equal(&pvl->pv->id, (const struct id *) &dil->pvid)) { + found = 1; + break; } - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; } - if (!*consistent) { - _free_pv_list(&all_pvs); - return correct_vg; + if (!found) { + log_debug_metadata("Checking VG %s found metadata reference to PV %s on %s which is not in VG.", + vgname, dil->pvid, dev_name(dil->dev)); + dm_list_del(&dil->list); + if (old_pvs) + dm_list_add(old_pvs, &dil->list); } + } - /* Don't touch if vgids didn't match */ - if (inconsistent_vgid) { - log_warn("WARNING: Inconsistent metadata UUIDs found for " - "volume group %s.", vgname); - *consistent = 0; - _free_pv_list(&all_pvs); - return correct_vg; - } + /* Check if mdas were read from devs that are not PVs in this VG. */ + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + struct device *mda_dev = mda_get_device(mda); + found = 0; - /* - * If PV is marked missing but we found it, - * update metadata and remove MISSING flag - */ - dm_list_iterate_items(pvl, &all_pvs) - _check_reappeared_pv(correct_vg, pvl->pv, 1); + dm_list_iterate_items(pvl, &correct_vg->pvs) { + if (mda_dev == pvl->pv->dev) { + found = 1; + break; + } + } - if (!_repair_inconsistent_vg(correct_vg)) { - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; + if (!found) { + /* how does this happen? */ + log_debug_metadata("Checking VG %s read metadata from %s which is not in PV list.", + vgname, dev_name(mda_dev)); + inconsistent_mda_pv = 1; } + } - if (!_wipe_outdated_pvs(cmd, correct_vg, &all_pvs)) { - _free_pv_list(&all_pvs); + /* + * If the set of mda devs we read does not match the PV devs in the VG, + * (accounting for the fact that some PVs do not have mdas or devices + * may be missing), then retry the label scan and mda reading since + * the set of mda devs found in the initial label scan may not have + * been correct. + * + * We could probably skip a full label scan and only rescan labels on the + * specific missed/wrong devs. + * + * There may be cases where an inconsistency is not a problem, and + * we could just ignore it. But, it's not clear if there are other + * cases where it's a more serious problem such that we shouldn't + * continue without some resolution. + */ + if (inconsistent_mda_pv && !retried) { + log_debug_metadata("FIXME: Reading VG %s rescanning to find correct mdas.", vgname); +#if 0 + if (correct_vg) release_vg(correct_vg); - return_NULL; - } + correct_vg = NULL; + _destroy_fid(&fid); + /* FIXME: we probably want to keep the defective devs */ + /* FIXME: do we need to clear any lvmcache info here or does label scan do that? */ + lvmcache_force_next_label_scan(); + lvmcache_label_scan(cmd); + log_debug_metadata("Reading VG %s from mdas again.", vgname); + retried = 1; + goto read_mdas; +#endif } - _free_pv_list(&all_pvs); + /* + * Possible problems: + * + * bad_mdas + * - mdas that we can't use and should be fixed (or disabled?) + * + * old_pvs + * - these PVs may have been removed from the VG while they were missing, + * and then reappeared. + * put the devs that should not be in the VG on the defective list + * and continue without them? or clear metadata on them automatically? + * + * inconsistent_mda_pv + * - the mdas that label scan told us to read for this VG are inconsistent + * with the PVs in the VG metadata. If the vg metadata otherwise + * seems fine, we could decide to trust the vg metadata, and make + * any other devs from metadata_areas_in_use defective. + * i.e. the two independent read paths (label scan, vg_read) don't converge + * on a single view of the vg, which sounds like as like to be an lvm + * bug or design issue as anything else. + * (perhaps split out any more serious inconsistency above into + * a separate case to handle differently?) + */ - if (vg_missing_pv_count(correct_vg)) { - log_verbose("There are %d physical volumes missing.", - vg_missing_pv_count(correct_vg)); - vg_mark_partial_lvs(correct_vg, 1); + /* FIXME: do something else here */ + if (inconsistent_mda_pv) { + log_error("Reading VG %s found mdas are inconsistent with PVs.", vgname); + *failed_flags |= FAILED_ERROR; + return correct_vg; } + /* + * Fix incorrect or incomplete info about this VG in lvmcache. + * The lvmcache info was assembled by label_scan without using + * vg locks or the full vg metadata which we now have. + * + * e.g. label_scan is not able to associate PVs without metadata + * with the VG they belong to. Now that we have VG metadata, + * we can use that to associate those PV info structs with the + * right vginfo struct in lvmcache. + */ + if (!lvmcache_update_vg_from_metadata(correct_vg, read_precommit_mdas, + correct_meta_checksum, correct_meta_size)) { + log_error("Reading VG %s failed to update metadata in cache.", vgname); + *failed_flags |= FAILED_INTERNAL; + *failed_flags |= FAILED_ERROR; + return correct_vg; + } + + /* + * FIXME: can we remove this? It doesn't appear that the + * PVMOVE flag is set in the vg status any longer, so this + * shouldn't be needed. + */ if ((correct_vg->status & PVMOVE) && !pvmove_mode()) { log_error("Interrupted pvmove detected in volume group %s.", correct_vg->name); log_print("Please restore the metadata by running vgcfgrestore."); - release_vg(correct_vg); - return NULL; - } - - /* We have the VG now finally, check if PV ext info is in sync with VG metadata. */ - if (!_check_or_repair_pv_ext(cmd, correct_vg, *consistent, &inconsistent_pvs)) { - release_vg(correct_vg); - return_NULL; - } - - *consistent = !inconsistent_pvs; - - if (correct_vg && *consistent) { - if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } - - if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } + *failed_flags |= FAILED_ERROR; + return correct_vg; } return correct_vg; @@ -4449,62 +4520,6 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) return 1; } -struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname, - const char *vgid, uint32_t warn_flags, int *consistent) -{ - struct volume_group *vg; - struct lv_list *lvl; - - if (!(vg = _vg_read(cmd, vgname, vgid, warn_flags, consistent, 0))) - goto_out; - - if (!check_pv_dev_sizes(vg)) - log_warn("One or more devices used as PVs in VG %s " - "have changed sizes.", vg->name); - - if (!check_pv_segments(vg)) { - log_error(INTERNAL_ERROR "PV segments corrupted in %s.", - vg->name); - release_vg(vg); - vg = NULL; - goto out; - } - - dm_list_iterate_items(lvl, &vg->lvs) { - if (!check_lv_segments(lvl->lv, 0)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - dm_list_iterate_items(lvl, &vg->lvs) { - /* - * Checks that cross-reference other LVs. - */ - if (!check_lv_segments(lvl->lv, 1)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - (void) _check_devs_used_correspond_with_vg(vg); -out: - if (!*consistent && (warn_flags & WARN_INCONSISTENT)) { - if (is_orphan_vg(vgname)) - log_warn("WARNING: Found inconsistent standalone Physical Volumes."); - else - log_warn("WARNING: Volume Group %s is not consistent.", vgname); - } - - return vg; -} - void free_pv_fid(struct physical_volume *pv) { if (!pv) @@ -4523,8 +4538,7 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, { const char *vgname; struct volume_group *vg; - uint32_t warn_flags = WARN_PV_READ | WARN_INCONSISTENT; - int consistent = 0; + uint64_t failed_flags = 0; /* * When using lvmlockd we should never reach this point. @@ -4552,11 +4566,13 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, return NULL; } - consistent = 0; + if (!(vg = vg_read_internal(cmd, vgname, vgid, precommitted, &failed_flags, NULL, NULL))) + return_NULL; - if ((vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) { - /* Does it matter if consistent is 0 or 1? */ - return vg; + if (failed_flags & FAILED_ERROR) { + if (vg) + release_vg(vg); + return_NULL; } log_debug_metadata("Reading VG by vgid %.8s not found.", vgid); @@ -4791,10 +4807,10 @@ static int _get_pvs(struct cmd_context *cmd, uint32_t warn_flags, struct pv_list *pvl, *pvl_copy; struct dm_list *vgids; struct volume_group *vg; - int consistent = 0; int old_pvmove; struct vg_list *vgl_item = NULL; int have_pv = 0; + uint64_t failed_flags = 0; lvmcache_label_scan(cmd); @@ -4812,7 +4828,6 @@ static int _get_pvs(struct cmd_context *cmd, uint32_t warn_flags, vgid = strl->str; if (!vgid) continue; /* FIXME Unnecessary? */ - consistent = 0; if (!(vgname = lvmcache_vgname_from_vgid(NULL, vgid))) { stack; continue; @@ -4827,13 +4842,17 @@ static int _get_pvs(struct cmd_context *cmd, uint32_t warn_flags, * FIXME Remove this hack. */ - warn_flags |= WARN_INCONSISTENT; - - if (!(vg = vg_read_internal(cmd, vgname, (!vgslist) ? vgid : NULL, warn_flags, &consistent))) { + if (!(vg = vg_read_internal(cmd, vgname, (!vgslist) ? vgid : NULL, 0, &failed_flags, NULL, NULL))) { stack; continue; } + if (failed_flags & FAILED_ERROR) { + if (vg) + release_vg(vg); + return 0; + } + /* Move PVs onto results list */ if (pvslist) dm_list_iterate_items(pvl, &vg->pvs) { @@ -5112,7 +5131,7 @@ static int _access_vg_clustered(struct cmd_context *cmd, const struct volume_gro */ uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status) { - uint32_t failure = 0; + uint64_t failure = 0; if ((status & CLUSTERED) && !_access_vg_clustered(vg->cmd, vg)) /* Return because other flags are considered undefined. */ @@ -5149,36 +5168,6 @@ int vg_check_status(const struct volume_group *vg, uint64_t status) return !vg_bad_status_bits(vg, status); } -/* - * VG is left unlocked on failure - */ -static struct volume_group *_recover_vg(struct cmd_context *cmd, - const char *vg_name, const char *vgid) -{ - int consistent = 1; - struct volume_group *vg; - - unlock_vg(cmd, NULL, vg_name); - - dev_close_all(); - - if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL)) - return_NULL; - - if (!(vg = vg_read_internal(cmd, vg_name, vgid, WARN_PV_READ, &consistent))) { - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - if (!consistent) { - release_vg(vg); - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - return (struct volume_group *)vg; -} - static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id) { const struct dm_config_node *cn; @@ -5206,7 +5195,7 @@ static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id } static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg, - uint32_t lockd_state, uint32_t *failure) + uint32_t lockd_state, uint64_t *failed_flags) { if (!is_real_vg(vg->name)) return 1; @@ -5250,7 +5239,7 @@ static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg vg->name, vg->lock_type); } - *failure |= FAILED_LOCK_TYPE; + *failed_flags |= FAILED_LOCK_TYPE; return 0; } @@ -5264,7 +5253,7 @@ static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg if (lockd_state & LDST_FAIL) { if ((lockd_state & LDST_EX) || cmd->lockd_vg_enforce_sh) { log_error("Cannot access VG %s due to failed lock.", vg->name); - *failure |= FAILED_LOCK_MODE; + *failed_flags |= FAILED_LOCK_MODE; return 0; } @@ -5369,29 +5358,29 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg) * FIXME: move vg_bad_status_bits() checks in here. */ static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg, - uint32_t lockd_state, uint32_t *failure) + uint32_t lockd_state, uint64_t *failed_flags) { if (!is_real_vg(vg->name)) { /* Disallow use of LVM1 orphans when a host system ID is set. */ if (cmd->system_id && *cmd->system_id && systemid_on_pvs(vg)) { - *failure |= FAILED_SYSTEMID; + *failed_flags |= FAILED_SYSTEMID; return_0; } return 1; } if (!_access_vg_clustered(cmd, vg)) { - *failure |= FAILED_CLUSTERED; + *failed_flags |= FAILED_CLUSTERED; return 0; } - if (!_access_vg_lock_type(cmd, vg, lockd_state, failure)) { + if (!_access_vg_lock_type(cmd, vg, lockd_state, failed_flags)) { /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */ return 0; } if (!_access_vg_systemid(cmd, vg)) { - *failure |= FAILED_SYSTEMID; + *failed_flags |= FAILED_SYSTEMID; return 0; } @@ -5399,195 +5388,460 @@ static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg } /* - * Consolidated locking, reading, and status flag checking. + * On failure, the FAILED_ERROR flag is set. + * FAILED_ERROR is usually accompanied by other FAILED + * flags describing a more specific reason for the failure. + * A vg may still be returned on failure. * - * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in - * read_flags will return it with FAILED_INCONSISTENT set instead of - * giving you nothing. + * On success, a vg is returned and FAILED_ERROR is not set, + * but other FAILED flags may be set indicating issues/problems + * with the vg. * - * Use vg_read_error(vg) to determine the result. Nonzero means there were - * problems reading the volume group. - * Zero value means that the VG is open and appropriate locks are held. + * Returning FAILED_ERROR and a vg allows the caller to: + * - report better errors + * - ignore errors in specific cases + * - look at vg->pvs to eliminate them from being processed + * + * When FAILED_ERROR is set, the other failed_flags do + * not necessarily represent all the problems with the VG, + * because the function may return after the first error if it + * doesn't make sense to go any further. */ -static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, - uint32_t lock_flags, - uint64_t status_flags, - uint32_t read_flags, - uint32_t lockd_state) + + +/* + * Access exceptions used by vg_read callers. + * These apply to cases where there is no problem with the VG itself, + * but normal access controls would cause vg_read to return an error + * unless overriden by one of these. Things that control access to a + * VG are: exported flag, system ID, lock type. + * + * READ_ALLOW_EXPORTED: allow access to exported vg + * include_foreign_vgs: allow access to foreign vg + * include_active_foreign_vgs: allow access to foreign vg if lvs are active + * include_shared_vgs: allow access to shared vg + * lockd_vg_disable: allow access without lvmlockd lock + * lockd_vg_enforce_sh: disallow access without shared lock + * + * Error exceptions used by vg_read callers. + * These apply to cases where there is a problem with the VG which + * would normally cause vg_read to return an error unless overriden + * by one of these. + * + * READ_ALLOW_ERRORS + * handles_missing_pvs: allow write access while PVs are missing + * handles_unknown_segments: allow write access while LVs have unknown segments + * + * Other read flags: + * + * READ_NO_LOCK + * READ_FOR_UPDATE + * READ_NO_REPAIR + */ + +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, uint64_t *failed_flags_out) { struct volume_group *vg = NULL; - int consistent = 1; - int consistent_in; - uint32_t failure = 0; - uint32_t warn_flags = 0; - int already_locked; - - if ((read_flags & READ_ALLOW_INCONSISTENT) || (lock_flags != LCK_VG_WRITE)) - consistent = 0; + struct pv_list *pvl; + struct lv_list *lvl; + struct dm_list bad_mdas; + struct dm_list old_pvs; + uint64_t status_flags = 0; + uint64_t failed_flags = 0; + uint64_t failed_flags_access = 0; + uint64_t failed_flags_repair = 0; + unsigned saved_handles_missing_pvs; + uint32_t lock_flags; + int errors_ok = (read_flags & READ_ALLOW_ERRORS); + int repair_ok = !(read_flags & READ_NO_REPAIR); + int do_lock = !(read_flags & READ_NO_LOCK); + int missing_pvs = 0; + int missing_devs = 0; + int unknown_lv_segs = 0; + int bad_lv_segs = 0; + + dm_list_init(&bad_mdas); + dm_list_init(&old_pvs); + + *failed_flags_out = 0; if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) { - log_error("Volume group name \"%s\" has invalid characters.", - vg_name); - return NULL; + failed_flags |= FAILED_BADNAME; + goto_bad; } - already_locked = lvmcache_vgname_is_locked(vg_name); + lock_flags = (read_flags & READ_FOR_UPDATE) ? LCK_VG_WRITE : LCK_VG_READ; + + if (do_lock && !lock_vol(cmd, vg_name, lock_flags, NULL)) { + failed_flags |= FAILED_VG_LOCKING; + goto_bad; + } else + log_debug_metadata("Locking %s already done", vg_name); + + if (is_orphan_vg(vg_name)) + return _vg_read_orphans(cmd, vg_name); + + if (!(vg = vg_read_internal(cmd, vg_name, vgid, 0, &failed_flags, &bad_mdas, &old_pvs))) + goto_bad; - if (!already_locked && - !lock_vol(cmd, vg_name, lock_flags, NULL)) { - log_error("Can't get lock for %s", vg_name); - return _vg_make_handle(cmd, vg, FAILED_LOCKING); + if (failed_flags & FAILED_ERROR) { + if (!errors_ok) + goto_bad; } - if (already_locked) - log_very_verbose("Locking %s already done", vg_name); + /* + * Access checks: is the command allowed to use the VG. + * Access to a VG may be disallowed based on the host, + * the command being run, the caller (flags caller set). + * There are a number of settings checked here that + * override normal access restrictions and allow the + * vg_read to continue where it would otherwise return + * an access error. + * + * FIXME: unify these two functions: + * _vg_access_permitted() and vg_bad_status_bits(), + * they both check the state of the VG, combined with + * the state of the local host or flags passed from the + * caller, to determine if this VG can be processed. + * + * If we wanted to return the VG in cases where access is + * not allowed, we would probably want to do some of the + * other correctness checking below first. + */ + + if (!_vg_access_permitted(cmd, vg, lockd_state, &failed_flags_access)) { + failed_flags |= failed_flags_access; + goto_bad; + } if (is_orphan_vg(vg_name)) status_flags &= ~LVM_WRITE; - consistent_in = consistent; + if (read_flags & READ_ALLOW_EXPORTED) + status_flags &= ~EXPORTED_VG; - warn_flags = WARN_PV_READ; - if (consistent || (read_flags & READ_WARN_INCONSISTENT)) - warn_flags |= WARN_INCONSISTENT; + if (read_flags & READ_FOR_UPDATE) + status_flags |= EXPORTED_VG | LVM_WRITE; - /* If consistent == 1, we get NULL here if correction fails. */ - if (!(vg = vg_read_internal(cmd, vg_name, vgid, warn_flags, &consistent))) { - if (consistent_in && !consistent) { - failure |= FAILED_INCONSISTENT; - goto bad; - } - if (!(read_flags & READ_OK_NOTFOUND)) - log_error("Volume group \"%s\" not found", vg_name); - failure |= FAILED_NOTFOUND; - goto bad; + failed_flags_access |= vg_bad_status_bits(vg, status_flags); + if (failed_flags_access) { + failed_flags |= failed_flags_access; + goto_bad; } - if (!_vg_access_permitted(cmd, vg, lockd_state, &failure)) - goto bad; + /* + * We have a legitmate copy of the vg from an mda. + * We are allowed to access the vg. + * There may already be some problems identified + * by vg_read_internal (bad_mdas, old_pvs). + * Check for other problems with the vg and other + * ancillary problems that could be repaired. + */ - /* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */ - if (!consistent && !failure) { - release_vg(vg); - if (!(vg = _recover_vg(cmd, vg_name, vgid))) { - if (is_orphan_vg(vg_name)) - log_error("Recovery of standalone physical volumes failed."); - else - log_error("Recovery of volume group \"%s\" failed.", - vg_name); - failure |= FAILED_RECOVERY; - goto bad_no_unlock; - } + dm_list_iterate_items(pvl, &vg->pvs) { + if (is_missing_pv(pvl->pv)) + missing_pvs++; + if (!pvl->pv->dev) + missing_devs++; } /* - * Check that the tool can handle tricky cases -- missing PVs and - * unknown segment types. + * Only commands with "handles_missing_pvs" can modify a + * VG while it's missing devices. (The WRITE lock flag + * is how we tell it wants to write the VG.) */ + if (missing_pvs || missing_devs) { + if (missing_pvs) + failed_flags |= FAILED_MISSING_PVS; + if (missing_devs) + failed_flags |= FAILED_MISSING_DEVS; - if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s while PVs are missing.", vg->name); - log_error("Consider vgreduce --removemissing."); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; + log_debug_metadata("Checking VG %s found %d missing PVs and %d PVs with no device.", + vgname, missing_pvs, missing_devs); + + if (!cmd->handles_missing_pvs && (lock_flags == LCK_VG_WRITE)) + goto_bad; } - if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s with unknown segments in it!", - vg->name); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; + log_debug_metadata("Checking VG %s device sizes.", vgname); + if (!check_pv_dev_sizes(vg)) + failed_flags |= FAILED_PV_DEV_SIZES; + + log_debug_metadata("Checking VG %s PV segments.", vgname); + if (!check_pv_segments(vg)) { + failed_flags |= FAILED_BAD_PV_SEGS; + if (!errors_ok) + goto_bad; } - failure |= vg_bad_status_bits(vg, status_flags); - if (failure) - goto_bad; + log_debug_metadata("Checking VG %s LV segments.", vgname); - if (!(vg = _vg_make_handle(cmd, vg, failure)) || vg_read_error(vg)) - if (!already_locked) - unlock_vg(cmd, vg, vg_name); + /* + * FIXME: doesn't the lower level metadata parsing check this already? + * If so, it should set this failed flag and we just check that here. + */ + dm_list_iterate_items(lvl, &vg->lvs) { + if (lv_has_unknown_segments(lvl->lv)) + unknown_lv_segs++; + } - return vg; + if (unknown_lv_segs) { + failed_flags |= FAILED_UNKNOWN_LV_SEGS; -bad: - if (!already_locked) - unlock_vg(cmd, vg, vg_name); + if (!cmd->handles_unknown_segments && (lock_flags == LCK_VG_WRITE)) + goto_bad; + } -bad_no_unlock: - return _vg_make_handle(cmd, vg, failure); -} + dm_list_iterate_items(lvl, &vg->lvs) { + if (!check_lv_segments(lvl->lv, 0)) + bad_lv_segs++; + } -/* - * vg_read: High-level volume group metadata read function. - * - * vg_read_error() must be used on any handle returned to check for errors. - * - * - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT - * - VG is read-only: FAILED_READ_ONLY - * - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED - * - VG is not RESIZEABLE: FAILED_RESIZEABLE - * - locking failed: FAILED_LOCKING - * - * On failures, all locks are released, unless one of the following applies: - * - vgname_is_locked(lock_name) is true - * FIXME: remove the above 2 conditions if possible and make an error always - * release the lock. - * - * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE. - * - * Checking for VG existence: - * - * FIXME: We want vg_read to attempt automatic recovery after acquiring a - * temporary write lock: if that fails, we bail out as usual, with failed & - * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in - * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with - * *consistent = 1. - */ -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - uint64_t status_flags = UINT64_C(0); - uint32_t lock_flags = LCK_VG_READ; + dm_list_iterate_items(lvl, &vg->lvs) { + /* Checks that cross-reference other LVs. */ + if (!check_lv_segments(lvl->lv, 1)) + bad_lv_segs++; + } - if (read_flags & READ_FOR_UPDATE) { - status_flags |= EXPORTED_VG | LVM_WRITE; - lock_flags = LCK_VG_WRITE; + if (bad_lv_segs) { + failed_flags |= FAILED_BAD_LV_SEGS; + if (!errors_ok) + goto_bad; } - if (read_flags & READ_ALLOW_EXPORTED) - status_flags &= ~EXPORTED_VG; + /* + * This does not repair anything, it just prints warnings if + * active LVs in this VG are using other devs. + * FIXME: Should we disallow changing the VG in this condition? + */ + log_debug_metadata("Checking VG %s devices being used by LVs.", vgname); + _check_devs_used_correspond_with_vg(vg); - return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status_flags, read_flags, lockd_state); -} + /* + * Repair/maintenance. + * Most are ancillary conditions, not related to the + * correctness or usability of the VG. If we cannot + * repair them, the VG can still be used. Any of + * these issues that are not repaired have a failed_flag + * set to notify the caller of the issue. + */ -/* - * A high-level volume group metadata reading function. Open a volume group for - * later update (this means the user code can change the metadata and later - * request the new metadata to be written and committed). - */ -struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - struct volume_group *vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state); + /* + * vg_read_internal discovers these problems + * bad_mdas: vg metadata could not be found/used from these mdas + * old_pvs: these devices have metadata for this vg, but are not + * in the vg. + */ + if (!dm_list_empty(&bad_mdas)) + failed_flags_repair |= FAILED_BAD_MDAS; - if (!vg || vg_read_error(vg)) - stack; + if (!dm_list_empty(&old_pvs)) + failed_flags_repair |= FAILED_OLD_PVS; + +#if 0 + /* + * Reappeared PVs: if a PV in the metadata has the MISSING flag, + * but the device is now seen, then we can clear the MISSING flag + * and update the metadata on the device. + * + * (was check_reappeared_pv) + * (action: clear MISSING_PV on vg->pvs entries) + */ + if (_find_reappeared_pvs(vg)) + failed_flags_repair |= FAILED_REAPPEARED_PVS; + + /* + * Set lv->status PARTIAL_LV for LVs using PVs that are missing. + * + * Is this a metadata flag? + * + * (was vg_mark_partial_lvs) + * (action: set PARTIAL_LV on vg->lvs entries) + */ + if (_find_partial_lvs(vg)) + failed_flags_repair |= FAILED_PARTIAL_LVS; + + /* + * (was vg_strip_outdated_historical_lvs) + * (action: updates vg->...lvs) + */ + if (_find_outdated_historical_lvs(vg)) + failed_flags_repair |= FAILED_OUTDATED_HISTORICAL_LVS; + + /* + * (was check_or_repair_pv_ext, _vg_update_old_pv_ext_if_needed) + * (action: pv_write(), vg_write()/vg_commit(), + * adds pvl to vg->pv_write_list, vg_write()/vg_commit()) + */ + if (_find_wrong_pv_ext(vg)) + failed_flags_repair |= FAILED_WRONG_PV_EXT; +#endif + + /* + * Nothing needs repair. + */ + if (!failed_flags_repair) + goto out; + + /* + * Check conditions in which we cannot repair. + * If we don't repair for some reason, all the repairs we would + * have done are returned in failed_flags. + */ + + if (!repair_ok) { + /* The caller does not want to repair. */ + failed_flags |= failed_flags_repair; + goto out; + } + + if (cmd->metadata_read_only) { + log_warn("WARNING: Not updating VG %s because global/metadata_read_only is set.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } + + if (vg_is_exported(vg)) { + log_warn("WARNING: Not updating VG %s because VG is exported.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } + + if (_is_foreign_vg(vg)) { + log_warn("WARNING: Not updating VG %s because VG is foreign.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } + if (lvmcache_found_duplicate_pvs()) { + log_warn("WARNING: Not updating VG %s because of duplicate PVs.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } + + if (lock_flags != LCK_VG_WRITE) { + /* + * Try to upgrade to a write lock to do the repairs. + */ + if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL)) { + log_warn("WARNING: Not udpating VG %s because write lock is not held.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } else + log_warn("VG %s upgraded to exclusive file lock for repair.", vg_name); + } + + if (is_lockd_type(vg->lock_type) && !(lockd_state & LDST_EX)) { + uint32_t lockd_state_convert; + + /* + * Try to upgrade to ex lock to do the repairs. + */ + if (!lockd_vg(cmd, vg_name, "ex", 0, &lockd_state_convert)) { + log_warn("WARNING: Not updating VG %s because exclusive lock is not held in lvmlockd.", vg_name); + failed_flags |= failed_flags_repair; + goto out; + } else + log_warn("VG %s upgraded to exclusive lock in lvmlockd for repair.", vg_name); + } + + /* + * Do repairs. Any repairs that failed are indicated with failed_flags. + * + * FIXME: record in log file the repairs being made. + */ + +#if 0 + /* is this fixed just as a result of vg_write() updating all mdas? defective devs? */ + if (failed_flags_repair & FAILED_BAD_MDAS) + if (_fix_bad_mdas(vg, &bad_mdas)) + failed_flags_repair &= ~FAILED_BAD_MDAS; + + if (failed_flags_repair & FAILED_OLD_PVS) + if (_fix_old_pvs(vg, &old_pvs)) + failed_flags_repair &= ~FAILED_OLD_PVS; + + if (failed_flags_repair & FAILED_REAPPEARED_PVS) + if (_fix_reappeared_pvs(vg)) + failed_flags_repair &= ~FAILED_REAPPEARED_PVS; + + if (failed_flags_repair & FAILED_PARITIAL_LVS) + if (_fix_partial_lvs(vg)) + failed_flags_repair &= ~FAILED_PARITIAL_LVS; + + if (failed_flags_repair & FAILED_OUTDATED_HISTORICAL_LVS) + if (_fix_outdated_historical_lvs(vg)) + failed_flags_repair &= ~FAILED_OUTDATED_HISTORICAL_LVS; + + if (failed_flags_repair & FAILED_WRONG_PV_EXT) + if (_fix_wrong_pv_ext(vg)) + failed_flags_repair &= ~FAILED_WRONG_PV_EXT; + + if (failed_flags_repair) + failed_flags |= failed_flags_repair; +#endif + + /* + * Write the repaired VG. + */ + + log_warn("Updating VG %s using metadata version %u.", vg_name, vg->seqno); + + saved_handles_missing_pvs = cmd->handles_missing_pvs; + cmd->handles_missing_pvs = 1; + if (!vg_write(vg)) { + log_error("Updating VG %s failed to write VG.", vg_name); + cmd->handles_missing_pvs = saved_handles_missing_pvs; + failed_flags |= FAILED_REPAIR_UPDATE; + goto out; + } + + cmd->handles_missing_pvs = saved_handles_missing_pvs; + + if (!vg_commit(vg)) { + log_error("Repairing VG %s failed to commit VG.", vg_name); + failed_flags |= FAILED_REPAIR_UPDATE; + goto out; + } + + log_warn("Reading VG %s updated metadata with version %u.", vg_name, vg->seqno); + + out: + /* When we return a vg, we also return with the vg lock held. */ + + if (failed_flags_out) + *failed_flags_out = failed_flags; + + log_debug_metadata("Reading VG %s done with failed_flags %llx", vg_name, (unsigned long long)failed_flags); return vg; -} -/* - * Test the validity of a VG handle returned by vg_read() or vg_read_for_update(). - */ -uint32_t vg_read_error(struct volume_group *vg_handle) -{ - if (!vg_handle) - return FAILED_ALLOCATION; + bad: + failed_flags |= FAILED_ERROR; + + /* + * If caller doesn't want failed_flags returned, it means they + * don't get the vg returned on an error. If they want to get + * the vg returned on an error, they need to check failed_flags. + */ + if (!failed_flags_out) { + release_vg(vg); + vg = NULL; + } + + /* + * When we return a vg, we also return with the vg lock held. + * When we don't return a vg, we also don't return with the vg + * lock held (unless we're not involved in locking.) + */ + if (!vg && do_lock) + unlock_vg(cmd, vg, vg_name); + + if (failed_flags_out) + *failed_flags_out = failed_flags; - return vg_handle->read_status; + log_debug_metadata("Reading VG %s error with failed_flags %llx", vg_name, (unsigned long long)failed_flags); + return vg; } /* @@ -5597,16 +5851,13 @@ uint32_t vg_read_error(struct volume_group *vg_handle) * NOTE: If you find the return codes confusing, you might think of this * function as similar to an open() call with O_CREAT and O_EXCL flags * (open returns fail with -EEXIST if file already exists). - * - * Returns: - * FAILED_LOCKING - Cannot lock name - * FAILED_EXIST - VG name already exists - cannot reserve - * SUCCESS - VG name does not exist in system and WRITE lock held */ -uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname) +int vg_lock_newname(struct cmd_context *cmd, const char *vgname, int *lock_failed, int *name_exists) { - if (!lock_vol(cmd, vgname, LCK_VG_WRITE, NULL)) - return FAILED_LOCKING; + if (!lock_vol(cmd, vgname, LCK_VG_WRITE, NULL)) { + *lock_failed = 1; + return 0; + } /* Find the vgname in the cache */ /* If it's not there we must do full scan to be completely sure */ @@ -5620,20 +5871,22 @@ uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname) * critical_section() is true. */ unlock_vg(cmd, NULL, vgname); - return FAILED_LOCKING; + *lock_failed = 1; + return 0; } lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); if (!lvmcache_fmt_from_vgname(cmd, vgname, NULL, 0)) { /* vgname not found after scanning */ - return SUCCESS; + return 1; } } } /* Found vgname so cannot reserve. */ unlock_vg(cmd, NULL, vgname); - return FAILED_EXIST; + *name_exists = 1; + return 0; } struct format_instance *alloc_fid(const struct format_type *fmt, @@ -6056,7 +6309,7 @@ int vg_strip_outdated_historical_lvs(struct volume_group *vg) { return 1; } -const char *failed_flags_str(uint32_t failed_flags) +const char *failed_flags_str(uint64_t failed_flags) { if (failed_flags & FAILED_LABEL_CHECKSUM) return "LVM label header checksum"; diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index c1329e570..527a4d927 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -82,14 +82,17 @@ struct metadata_area_ops { const char *vg_name, struct metadata_area * mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg); + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches); + struct volume_group *(*vg_read_precommit) (struct format_instance * fi, const char *vg_name, struct metadata_area * mda, struct label_read_data *ld, - struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg); + uint32_t last_meta_checksum, + size_t last_meta_size, + unsigned *last_meta_matches); /* * Write out complete VG metadata. You must ensure internal * consistency before calling. eg. PEs can't refer to PVs not @@ -169,8 +172,15 @@ struct metadata_area { struct dm_list list; struct metadata_area_ops *ops; void *metadata_locn; + uint64_t header_start; /* mda_header.start */ + uint64_t read_failed_flags; uint32_t status; - uint32_t read_failed_flags; + uint32_t vg_read_seqno; + uint32_t vg_read_meta_checksum; + size_t vg_read_meta_size; + unsigned vg_read_skipped:1; + unsigned vg_read_success:1; + unsigned vg_read_failed:1; }; struct metadata_area *mda_copy(struct dm_pool *mem, struct metadata_area *mda); diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c index 35cdfea7e..88323d56f 100644 --- a/lib/metadata/vg.c +++ b/lib/metadata/vg.c @@ -779,10 +779,9 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv); vg->extent_count -= pv_pe_count(pv); - orphan_vg = vg_read_for_update(cmd, vg->fid->fmt->orphan_vg_name, - NULL, 0, 0); - - if (vg_read_error(orphan_vg)) + orphan_vg = vg_read(cmd, vg->fid->fmt->orphan_vg_name, NULL, + READ_NO_LOCK | READ_FOR_UPDATE, 0, NULL); + if (!orphan_vg) goto bad; if (!vg_split_mdas(cmd, vg, orphan_vg) || !vg->pv_count) { diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h index bc2c2ddba..bd828fcd6 100644 --- a/lib/metadata/vg.h +++ b/lib/metadata/vg.h @@ -152,11 +152,6 @@ struct volume_group { struct dm_list removed_pvs; uint32_t open_mode; /* FIXME: read or write - check lock type? */ - /* - * Store result of the last vg_read(). - * 0 for success else appropriate FAILURE_* bits set. - */ - uint32_t read_status; uint32_t mda_copies; /* target number of mdas for this VG */ struct dm_hash_table *hostnames; /* map of creation hostnames */ diff --git a/tools/polldaemon.c b/tools/polldaemon.c index d69284d47..add65a333 100644 --- a/tools/polldaemon.c +++ b/tools/polldaemon.c @@ -157,12 +157,10 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, } /* Locks the (possibly renamed) VG again */ - vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state); - if (vg_read_error(vg)) { + vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state, NULL); + if (!vg) { /* What more could we do here? */ log_error("ABORTING: Can't reread VG for %s.", id->display_name); - release_vg(vg); - vg = NULL; ret = 0; goto out; } @@ -400,9 +398,8 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i * to the VG we're interested in is the change done locally. */ - vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state); - if (vg_read_error(vg)) { - release_vg(vg); + vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state, NULL); + if (!vg) { log_error("Can't reread VG for %s", id->display_name); ret = 0; goto out_ret; diff --git a/tools/toollib.c b/tools/toollib.c index cd72fb130..5ff1afe3d 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -26,12 +26,6 @@ ((ret_code) == ECMD_PROCESSED) ? REPORT_OBJECT_CMDLOG_SUCCESS \ : REPORT_OBJECT_CMDLOG_FAILURE, (ret_code)) -struct device_id_list { - struct dm_list list; - struct device *dev; - char pvid[ID_LEN + 1]; -}; - const char *command_name(struct cmd_context *cmd) { return cmd->command->name; @@ -176,42 +170,67 @@ const char *skip_dev_dir(struct cmd_context *cmd, const char *vg_name, } /* - * Three possible results: - * a) return 0, skip 0: take the VG, and cmd will end in success - * b) return 0, skip 1: skip the VG, and cmd will end in success - * c) return 1, skip *: skip the VG, and cmd will end in failure + * Checks the FAILED flags returned by vg_read() to decide + * if they should produce an error message and command exit error. + * + * (This does not have any role in deciding if the command + * can use the VG. Those override conditions allowing a command + * to use a VG in spite of failed flags are handled in vg_read(), + * and if those override conditions are met, vg_read() does not + * return FAILED_ERROR, and this function is not used.) * - * Case b is the special case, and includes the following: - * . The VG is inconsistent, and the command allows for inconsistent VGs. - * . The VG is clustered, the host cannot access clustered VG's, - * and the command option has been used to ignore clustered vgs. + * A number of FAILED flags have conditions in which they should + * not produce error messages. For each of those FAILED flags, + * check its associated condition, and clear the flag if the + * message can be skipped. At the end, if any failed flags remain + * set, then one or more failed flags cannot be suppressed, + * and we return 0 indicating that the error cannot be fully + * suppressed. The flags that cannot be suppressed are returned in + * failed_flags_result. * - * Case c covers the other errors returned when reading the VG. - * If *skip is 1, it's OK for the caller to read the list of PVs in the VG. + * If all the failed flags that are set have conditions that allow + * them to be suppressed, then return 1 indicating that the vg can + * be silently skipped. failed_flags_result will be 0. + * + * Many FAILED flags do not have conditions in which + * they can be suppressed. If any of these are set, + * then this function will always return 0. */ -static int _ignore_vg(struct volume_group *vg, const char *vg_name, - struct dm_list *arg_vgnames, uint32_t read_flags, - int *skip, int *notfound) -{ - uint32_t read_error = vg_read_error(vg); - *skip = 0; - *notfound = 0; +static int _suppress_failed_flags(struct volume_group *vg, const char *vg_name, + struct dm_list *arg_vgnames, uint32_t read_flags, + uint64_t failed_flags, uint64_t *failed_flags_result) +{ + uint64_t failed = failed_flags; - if ((read_error & FAILED_NOTFOUND) && (read_flags & READ_OK_NOTFOUND)) { - *notfound = 1; + /* + * This is an odd case that shouldn't generally happen + * if the failed flags are used as intended. + * Without another specific failed flag we don't really + * know what this case is, and it's probably not right + * to silently ignore it. + */ + failed &= ~FAILED_ERROR; + if (!failed) { + *failed_flags_result = FAILED_ERROR; return 0; } - if ((read_error & FAILED_INCONSISTENT) && (read_flags & READ_ALLOW_INCONSISTENT)) - read_error &= ~FAILED_INCONSISTENT; /* Check for other errors */ + /* + * NOT_FOUND will not generally have any other flags set, + * so check if we're done early. + */ + if ((!vg || (failed & FAILED_NOT_FOUND)) && (read_flags & READ_OK_NOTFOUND)) + failed &= ~FAILED_NOT_FOUND; - if ((read_error & FAILED_CLUSTERED) && vg->cmd->ignore_clustered_vgs) { - read_error &= ~FAILED_CLUSTERED; /* Check for other errors */ - log_verbose("Skipping volume group %s", vg_name); - *skip = 1; + if (!failed) { + *failed_flags_result = 0; + return 1; } + if ((failed & FAILED_CLUSTERED) && vg->cmd->ignore_clustered_vgs) + failed &= ~FAILED_CLUSTERED; + /* * Commands that operate on "all vgs" shouldn't be bothered by * skipping a foreign VG, and the command shouldn't fail when @@ -219,17 +238,9 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * operate on a foreign VG and it's skipped, then the command * would expect to fail. */ - if (read_error & FAILED_SYSTEMID) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { - log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.", - vg->name, vg->system_id, vg->cmd->system_id ? "" : "unknown ", - vg->cmd->system_id ? " " : "", vg->cmd->system_id ? vg->cmd->system_id : ""); - return 1; - } else { - read_error &= ~FAILED_SYSTEMID; /* Check for other errors */ - log_verbose("Skipping foreign volume group %s", vg_name); - *skip = 1; - } + if (failed & FAILED_SYSTEMID) { + if (!arg_vgnames || !str_list_match_item(arg_vgnames, vg->name)) + failed &= ~FAILED_SYSTEMID; } /* @@ -241,37 +252,151 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * VG lock_type requires lvmlockd), and FAILED_LOCK_MODE (the * command failed to acquire the necessary lock.) */ - if (read_error & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { - if (read_error & FAILED_LOCK_TYPE) - log_error("Cannot access VG %s with lock type %s that requires lvmlockd.", - vg->name, vg->lock_type); - /* For FAILED_LOCK_MODE, the error is printed in vg_read. */ - return 1; - } else { - read_error &= ~FAILED_LOCK_TYPE; /* Check for other errors */ - read_error &= ~FAILED_LOCK_MODE; - log_verbose("Skipping volume group %s", vg_name); - *skip = 1; + if (failed & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) { + if (!arg_vgnames || !str_list_match_item(arg_vgnames, vg->name)) { + failed &= ~FAILED_LOCK_TYPE; + failed &= ~FAILED_LOCK_MODE; } } - if (read_error == FAILED_CLUSTERED) { - *skip = 1; - stack; /* Error already logged */ - return 1; + /* + * If failed flags remain set, then all the failures cannot be + * suppressed, and we return 0. If all failed flags have been + * cleared by their associated conditions, then we can silently + * suppress the error. + */ + if (failed) { + *failed_flags_result = failed; + return 0; } - if (read_error != SUCCESS) { - *skip = 0; - if (is_orphan_vg(vg_name)) - log_error("Cannot process standalone physical volumes"); - else - log_error("Cannot process volume group %s", vg_name); - return 1; + *failed_flags_result = 0; + return 1; +} + +static void _print_failed_flags(struct cmd_context *cmd, struct volume_group *vg, + const char *vg_name, uint64_t failed_flags_print) +{ + uint64_t failed = failed_flags_print; + + /* + * This shouldn't happen if failed_flags and suppress_failed_flags() + * are used as intended. + */ + if (!failed) { + log_error(INTERNAL_ERROR "Cannot use VG %s (no failed flags).", vg_name); + return; } - return 0; + if (failed & FAILED_ERROR) { + failed &= ~FAILED_ERROR; + /* + * Usually this general flag is ignored and a more specific + * flag is set, but if this happens to be the only flag set + * for some reason then print a generic error. + */ + if (!failed) + log_error("Cannot read VG %s.", vg_name); + return; + } + + if (failed & FAILED_INTERNAL) { + failed &= ~FAILED_INTERNAL; + log_error("Cannot read VG %s (internal error).", vg_name); + } + + if (failed & FAILED_NOT_FOUND) { + failed &= ~FAILED_NOT_FOUND; + log_error("Volume group \"%s\" not found.", vg_name); + } + + if (failed & FAILED_BADNAME) { + failed &= ~FAILED_BADNAME; + log_error("Volume group name \"%s\" has invalid characters.", vg_name); + } + + if (failed & FAILED_VG_LOCKING) { + failed &= ~FAILED_VG_LOCKING; + log_error("Cannot lock VG %s.", vg_name); + } + + if (failed & FAILED_READ_ONLY) { + failed &= ~FAILED_READ_ONLY; + log_error("Cannot access read-only VG %s.", vg_name); + } + + if (failed & FAILED_EXPORTED) { + failed &= ~FAILED_EXPORTED; + log_error("Cannot access exported VG %s.", vg_name); + } + + if (failed & FAILED_RESIZEABLE) { + failed &= ~FAILED_RESIZEABLE; + log_error("Cannot access non-resizeable VG %s.", vg_name); + } + + if (failed & FAILED_CLUSTERED) { + failed &= ~FAILED_CLUSTERED; + log_error("Cannot access clustered VG %s that requires clvmd.", vg_name); + } + + if (failed & FAILED_SYSTEMID) { + failed &= ~FAILED_SYSTEMID; + log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.", + vg_name, vg->system_id, vg->cmd->system_id ? "" : "unknown ", + cmd->system_id ? " " : "", cmd->system_id ? cmd->system_id : ""); + } + + if (failed & FAILED_LOCK_TYPE) { + failed &= ~FAILED_LOCK_TYPE; + log_error("Cannot access VG %s with lock type %s that requires lvmlockd.", + vg_name, vg->lock_type); + } + + if (failed & FAILED_LOCK_MODE) { + failed &= ~FAILED_LOCK_MODE; + /* FIXME: remove same message in access_vg_lock_type() ? */ + log_error("Cannot access VG %s due to failed lock in lvmlockd.", vg_name); + } + + if (failed & FAILED_MISSING_PVS) { + failed &= ~FAILED_MISSING_PVS; + log_error("Cannot change VG %s while PVs are missing.", vg_name); + log_error("Consider vgreduce --removemissing."); + } + + if (failed & FAILED_MISSING_DEVS) { + failed &= ~FAILED_MISSING_DEVS; + log_error("Cannot change VG %s while PVs have no devices.", vg_name); + log_error("Consider vgreduce --removemissing."); + } + + if (failed & FAILED_BAD_PV_SEGS) { + failed &= ~FAILED_BAD_PV_SEGS; + log_error("Bad PV segments in VG %s.", vg_name); + } + + if (failed & FAILED_UNKNOWN_LV_SEGS) { + failed &= ~FAILED_UNKNOWN_LV_SEGS; + log_error("Unknown LV segments in VG %s.", vg_name); + } + + if (failed & FAILED_BAD_LV_SEGS) { + failed &= ~FAILED_BAD_LV_SEGS; + log_error("Bad LV segments in VG %s.", vg_name); + } + + /* + * TODO: add a check and message for each failed flag + */ + + /* + * This generic message should never be reached, but keep it + * here in case a new failed flag is added without adding a + * check above. + */ + if (failed) + log_error("Cannot use VG %s due to failed flags 0x%llx.", vg_name, (unsigned long long)failed); } /* @@ -1900,13 +2025,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint64_t failed_flags; + uint64_t failed_flags_print; int whole_selected = 0; int ret_max = ECMD_PROCESSED; int ret; - int skip; - int notfound; int process_all = 0; - int already_locked; int do_report_ret_code = 1; log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG); @@ -1923,8 +2047,6 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, dm_list_iterate_items(vgnl, vgnameids_to_process) { vg_name = vgnl->vg_name; vg_uuid = vgnl->vgid; - skip = 0; - notfound = 0; uuid[0] = '\0'; if (is_orphan_vg(vg_name)) { @@ -1949,17 +2071,31 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, continue; } - already_locked = lvmcache_vgname_is_locked(vg_name); + failed_flags = 0; + failed_flags_print = 0; - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags); + if (!vg || (failed_flags & FAILED_ERROR)) { + if (!_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print)) { + _print_failed_flags(cmd, vg, vg_name, failed_flags_print); + ret_max = ECMD_FAILED; + report_log_ret_code(ret_max); + } stack; - ret_max = ECMD_FAILED; - report_log_ret_code(ret_max); goto endvg; } - if (skip || notfound) - goto endvg; + + /* + * The VG can be used when failed_flags do not include ERROR. + * TODO: in what cases do we want to warn about failed_flags + * that are set? + */ + if (failed_flags) { + _suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print); + /* _print_warn_flags(cmd, vg, vg_name, failed_flags_print); */ + /* FAILED_PV_DEV_SIZES WARNING: One or more devices used as PVs in VG have changed sizes */ + log_warn("WARNING: Processing VG %s with failed flags 0x%llx.", vg_name, (unsigned long long)failed_flags_print); + } /* Process this VG? */ if ((process_all || @@ -1969,6 +2105,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, log_very_verbose("Process single VG %s", vg_name); + /* + * FIXME: pass failed_flags to single function so that + * each command can decide what to do about any non-fatal + * issues that still exist. + */ + ret = process_single_vg(cmd, vg_name, vg, handle); _update_selection_result(handle, &whole_selected); if (ret != ECMD_PROCESSED) @@ -1977,11 +2119,11 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, if (ret > ret_max) ret_max = ret; } - - if (!vg_read_error(vg) && !already_locked) - unlock_vg(cmd, vg, vg_name); endvg: - release_vg(vg); + if (vg) { + unlock_vg(cmd, vg, vg_name); + release_vg(vg); + } if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) stack; @@ -3567,15 +3709,14 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag struct dm_list *tags_arg; struct dm_list lvnames; uint32_t lockd_state = 0; + uint64_t failed_flags; + uint64_t failed_flags_print; const char *vg_name; const char *vg_uuid; const char *vgn; const char *lvn; int ret_max = ECMD_PROCESSED; int ret; - int skip; - int notfound; - int already_locked; int do_report_ret_code = 1; log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG); @@ -3583,8 +3724,6 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag dm_list_iterate_items(vgnl, vgnameids_to_process) { vg_name = vgnl->vg_name; vg_uuid = vgnl->vgid; - skip = 0; - notfound = 0; uuid[0] = '\0'; if (vg_uuid && !id_write_format((const struct id*)vg_uuid, uuid, sizeof(uuid))) @@ -3636,17 +3775,29 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag continue; } - already_locked = lvmcache_vgname_is_locked(vg_name); + failed_flags = 0; + failed_flags_print = 0; - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags); + if (!vg || (failed_flags & FAILED_ERROR)) { + if (!_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print)) { + _print_failed_flags(cmd, vg, vg_name, failed_flags_print); + ret_max = ECMD_FAILED; + report_log_ret_code(ret_max); + } stack; - ret_max = ECMD_FAILED; - report_log_ret_code(ret_max); goto endvg; } - if (skip || notfound) - goto endvg; + + /* + * The VG can be used when failed_flags do not include ERROR. + * TODO: in what cases do we want to warn about failed_flags + * that are set? + */ + if (failed_flags) { + _suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print); + log_warn("WARNING: Processing VG %s with failed flags %llx.", vg_name, (unsigned long long)failed_flags_print); + } ret = process_each_lv_in_vg(cmd, vg, &lvnames, tags_arg, 0, handle, check_single_lv, process_single_lv); @@ -3655,13 +3806,14 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag report_log_ret_code(ret); if (ret > ret_max) ret_max = ret; - - if (!already_locked) - unlock_vg(cmd, vg, vg_name); endvg: - release_vg(vg); + if (vg) { + unlock_vg(cmd, vg, vg_name); + release_vg(vg); + } if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) stack; + log_set_report_object_name_and_id(NULL, NULL); } do_report_ret_code = 0; @@ -3919,51 +4071,6 @@ out: return r; } -static int _device_list_remove(struct dm_list *devices, struct device *dev) -{ - struct device_id_list *dil; - - dm_list_iterate_items(dil, devices) { - if (dil->dev == dev) { - dm_list_del(&dil->list); - return 1; - } - } - - return 0; -} - -static struct device_id_list *_device_list_find_dev(struct dm_list *devices, struct device *dev) -{ - struct device_id_list *dil; - - dm_list_iterate_items(dil, devices) { - if (dil->dev == dev) - return dil; - } - - return NULL; -} - -static int _device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst) -{ - struct device_id_list *dil; - struct device_id_list *dil_new; - - dm_list_iterate_items(dil, src) { - if (!(dil_new = dm_pool_alloc(cmd->mem, sizeof(*dil_new)))) { - log_error("device_id_list alloc failed."); - return ECMD_FAILED; - } - - dil_new->dev = dil->dev; - strncpy(dil_new->pvid, dil->pvid, ID_LEN); - dm_list_add(dst, &dil_new->list); - } - - return ECMD_PROCESSED; -} - /* * For each device in arg_devices or all_devices that has a pvid, add a copy of * that device to arg_missed. All PVs (devices with a pvid) should have been @@ -4080,13 +4187,13 @@ static int _process_duplicate_pvs(struct cmd_context *cmd, dm_list_iterate_items(devl, &unused_duplicate_devs) { /* Duplicates are displayed if -a is used or the dev is named as an arg. */ - _device_list_remove(all_devices, devl->dev); + device_list_remove(all_devices, devl->dev); if (!process_all_devices && dm_list_empty(arg_devices)) continue; - if ((dil = _device_list_find_dev(arg_devices, devl->dev))) - _device_list_remove(arg_devices, devl->dev); + if ((dil = device_list_find_dev(arg_devices, devl->dev))) + device_list_remove(arg_devices, devl->dev); if (!process_all_devices && !dil) continue; @@ -4171,10 +4278,10 @@ static int _process_defective_pvs(struct cmd_context *cmd, dm_list_iterate_items(devl, &defective_devs) { - _device_list_remove(all_devices, devl->dev); + device_list_remove(all_devices, devl->dev); - if ((dil = _device_list_find_dev(arg_devices, devl->dev))) - _device_list_remove(arg_devices, devl->dev); + if ((dil = device_list_find_dev(arg_devices, devl->dev))) + device_list_remove(arg_devices, devl->dev); if (!(cmd->cname->flags & ENABLE_DEFECTIVE_DEVS)) continue; @@ -4263,9 +4370,9 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, /* Remove each arg_devices entry as it is processed. */ if (!process_pv && !dm_list_empty(arg_devices) && - (dil = _device_list_find_dev(arg_devices, pv->dev))) { + (dil = device_list_find_dev(arg_devices, pv->dev))) { process_pv = 1; - _device_list_remove(arg_devices, dil->dev); + device_list_remove(arg_devices, dil->dev); } if (!process_pv && !dm_list_empty(arg_tags) && @@ -4280,7 +4387,7 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, else log_very_verbose("Processing PV %s in VG %s.", pv_name, vg->name); - _device_list_remove(all_devices, pv->dev); + device_list_remove(all_devices, pv->dev); /* * pv->dev should be found in all_devices unless it's a @@ -4350,11 +4457,11 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint64_t failed_flags; + uint64_t failed_flags_print; int ret_max = ECMD_PROCESSED; int ret; int skip; - int notfound; - int already_locked; int do_report_ret_code = 1; log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG); @@ -4363,7 +4470,6 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, vg_name = vgnl->vg_name; vg_uuid = vgnl->vgid; skip = 0; - notfound = 0; uuid[0] = '\0'; if (is_orphan_vg(vg_name)) { @@ -4388,25 +4494,42 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, log_debug("Processing PVs in VG %s", vg_name); - already_locked = lvmcache_vgname_is_locked(vg_name); + failed_flags = 0; + failed_flags_print = 0; - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, NULL, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags); + if (!vg || (failed_flags & FAILED_ERROR)) { + if (!_suppress_failed_flags(vg, vg_name, NULL, read_flags, failed_flags, &failed_flags_print)) { + _print_failed_flags(cmd, vg, vg_name, failed_flags_print); + ret_max = ECMD_FAILED; + report_log_ret_code(ret_max); + } stack; - ret_max = ECMD_FAILED; - report_log_ret_code(ret_max); - if (!skip) + + /* + * FIXME: can we just do this instead of going + * through the processing with the skip flag? + * remove_pv_list_from_device_list(&vg->pvs, arg_devices); + * remove_pv_list_from_device_list(&vg->pvs, all_devices); + */ + if (vg) { + skip = 1; + goto process; + } else { goto endvg; - /* Drop through to eliminate a clustered VG's PVs from the devices list */ + } } - if (notfound) - goto endvg; - + /* - * Don't continue when skip is set, because we need to remove - * vg->pvs entries from devices list. + * The VG can be used when failed_flags do not include ERROR. + * TODO: in what cases do we want to warn about failed_flags + * that are set? */ - + if (failed_flags) { + _suppress_failed_flags(vg, vg_name, NULL, read_flags, failed_flags, &failed_flags_print); + log_warn("WARNING: Processing VG %s with failed flags 0x%llx.", vg_name, (unsigned long long)failed_flags_print); + } +process: ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags, process_all_pvs, process_all_devices, skip, handle, process_single_pv); @@ -4415,11 +4538,12 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, report_log_ret_code(ret); if (ret > ret_max) ret_max = ret; - - if (!skip && !already_locked) - unlock_vg(cmd, vg, vg->name); endvg: - release_vg(vg); + if (vg) { + unlock_vg(cmd, vg, vg->name); + release_vg(vg); + } + if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) stack; @@ -4470,9 +4594,7 @@ int process_each_pv(struct cmd_context *cmd, * if it was removed between creating the list of all VGs and then * processing each VG. */ - if (only_this_vgname) - read_flags |= READ_WARN_INCONSISTENT; - else + if (!only_this_vgname) read_flags |= READ_OK_NOTFOUND; /* Disable error in vg_read so we can print it from ignore_vg. */ @@ -4636,7 +4758,7 @@ int process_each_pv(struct cmd_context *cmd, struct dm_list arg_missed_orig; dm_list_init(&arg_missed_orig); - _device_list_copy(cmd, &arg_missed, &arg_missed_orig); + device_list_copy(cmd, &arg_missed, &arg_missed_orig); log_verbose("Some PVs were not found in first search, retrying."); @@ -4658,8 +4780,8 @@ int process_each_pv(struct cmd_context *cmd, /* Devices removed from arg_missed are removed from arg_devices. */ dm_list_iterate_items(dil, &arg_missed_orig) { - if (!_device_list_find_dev(&arg_missed, dil->dev)) - _device_list_remove(&arg_devices, dil->dev); + if (!device_list_find_dev(&arg_missed, dil->dev)) + device_list_remove(&arg_devices, dil->dev); } } @@ -5489,7 +5611,6 @@ int pvcreate_each_device(struct cmd_context *cmd, struct pv_list *pvl; struct pv_list *vgpvl; const char *pv_name; - int consistent = 0; int must_use_all = (cmd->cname->flags & MUST_USE_ALL_ARGS); int found; unsigned i; @@ -5781,9 +5902,11 @@ do_command: * and not recreate a new PV on top of an existing PV. */ if (pp->preserve_existing && pp->orphan_vg_name) { + uint64_t failed_flags = 0; + log_debug("Using existing orphan PVs in %s.", pp->orphan_vg_name); - if (!(orphan_vg = vg_read_internal(cmd, pp->orphan_vg_name, NULL, 0, &consistent))) { + if (!(orphan_vg = vg_read_internal(cmd, pp->orphan_vg_name, NULL, 0, &failed_flags, NULL, NULL))) { log_error("Cannot read orphans VG %s.", pp->orphan_vg_name); goto bad; } diff --git a/tools/vgcfgbackup.c b/tools/vgcfgbackup.c index 5d2113186..345ac7927 100644 --- a/tools/vgcfgbackup.c +++ b/tools/vgcfgbackup.c @@ -62,12 +62,6 @@ static int _vg_backup_single(struct cmd_context *cmd, const char *vg_name, if (!backup_to_file(filename, vg->cmd->cmd_line, vg)) return_ECMD_FAILED; } else { - if (vg_read_error(vg) == FAILED_INCONSISTENT) { - log_error("No backup taken: specify filename with -f " - "to backup an inconsistent VG"); - return ECMD_FAILED; - } - /* just use the normal backup code */ backup_enable(cmd, 1); /* force a backup */ if (!backup(vg)) @@ -94,7 +88,7 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv) init_pvmove(1); - ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_ALLOW_INCONSISTENT, 0, + ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_NO_REPAIR, 0, handle, &_vg_backup_single); dm_free(last_filename); diff --git a/tools/vgcreate.c b/tools/vgcreate.c index af0c36364..2c9c61587 100644 --- a/tools/vgcreate.c +++ b/tools/vgcreate.c @@ -26,7 +26,8 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv) const char *clustered_message = ""; char *vg_name; struct arg_value_group_list *current_group; - uint32_t rc; + int lock_failed = 0; + int name_exists = 0; if (!argc) { log_error("Please provide volume group name and " @@ -72,8 +73,8 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv) * Check if the VG name already exists. This should be done before * creating PVs on any of the devices. */ - if ((rc = vg_lock_newname(cmd, vp_new.vg_name)) != SUCCESS) { - if (rc == FAILED_EXIST) + if (!vg_lock_newname(cmd, vp_new.vg_name, &lock_failed, &name_exists)) { + if (name_exists) log_error("A volume group called %s already exists.", vp_new.vg_name); else log_error("Can't get lock for %s.", vp_new.vg_name); diff --git a/tools/vgmerge.c b/tools/vgmerge.c index 86c6a48a2..d29351ec9 100644 --- a/tools/vgmerge.c +++ b/tools/vgmerge.c @@ -19,12 +19,11 @@ static struct volume_group *_vgmerge_vg_read(struct cmd_context *cmd, const char *vg_name) { struct volume_group *vg; + log_verbose("Checking for volume group \"%s\"", vg_name); - vg = vg_read_for_update(cmd, vg_name, NULL, 0, 0); - if (vg_read_error(vg)) { - release_vg(vg); - return NULL; - } + + if (!(vg = vg_read(cmd, vg_name, NULL, READ_FOR_UPDATE, 0, NULL))) + return_NULL; if (is_lockd_type(vg->lock_type)) { log_error("vgmerge not allowed for lock_type %s", vg->lock_type); diff --git a/tools/vgremove.c b/tools/vgremove.c index 8bf384151..24a9b10dd 100644 --- a/tools/vgremove.c +++ b/tools/vgremove.c @@ -37,8 +37,7 @@ static int _vgremove_single(struct cmd_context *cmd, const char *vg_name, * Even multiple --yes are equivalent to single --force * When we require -ff it cannot be replaces with -f -y */ - force_t force = (force_t) arg_count(cmd, force_ARG) - ? : (arg_is_set(cmd, yes_ARG) ? DONT_PROMPT : PROMPT); + force_t force = (force_t) arg_count(cmd, force_ARG) ? : (arg_is_set(cmd, yes_ARG) ? DONT_PROMPT : PROMPT); unsigned lv_count, missing; int ret; @@ -71,8 +70,28 @@ static int _vgremove_single(struct cmd_context *cmd, const char *vg_name, if (!lockd_free_vg_before(cmd, vg, 0)) return_ECMD_FAILED; - if (!force && !vg_remove_check(vg)) - return_ECMD_FAILED; + if (!force) { + /* + * FIXME: are there other things we need to check which were + * allowed in vg_read because of handles_missing_pvs, but we + * don't want to handle without force? + */ + + if (vg_missing_pv_count(vg)) + return_ECMD_FAILED; + + if (!vg_check_status(vg, EXPORTED_VG)) + return_ECMD_FAILED; + + if ((lv_count = vg_visible_lvs(vg))) { + log_error("Volume group \"%s\" still contains %u logical volume(s)", + vg->name, lv_count); + return_ECMD_FAILED; + } + + if (!archive(vg)) + return_ECMD_FAILED; + } vg_remove_pvs(vg); diff --git a/tools/vgsplit.c b/tools/vgsplit.c index 46c891167..536474c5b 100644 --- a/tools/vgsplit.c +++ b/tools/vgsplit.c @@ -464,11 +464,14 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, int *existing_vg) { struct volume_group *vg_to = NULL; + int lock_failed = 0; + int name_exists = 0; log_verbose("Checking for new volume group \"%s\"", vg_name_to); + /* - * First try to create a new VG. If we cannot create it, - * and we get FAILED_EXIST (we will not be holding a lock), + * First try to create a new VG. If we cannot create it + * and it already exists (we will not be holding a lock), * a VG must already exist with this name. We then try to * read the existing VG - the vgsplit will be into an existing VG. * @@ -476,26 +479,28 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, * we obtained a WRITE lock and could not find the vgname in the * system. Thus, the split will be into a new VG. */ - vg_to = vg_lock_and_create(cmd, vg_name_to); - if (vg_read_error(vg_to) == FAILED_LOCKING) { + vg_to = vg_lock_and_create(cmd, vg_name_to, &lock_failed, &name_exists); + + if (vg_to) + return vg_to; + + if (lock_failed) { log_error("Can't get lock for %s", vg_name_to); - release_vg(vg_to); return NULL; } - if (vg_read_error(vg_to) == FAILED_EXIST) { + + if (name_exists) { *existing_vg = 1; - release_vg(vg_to); - vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0, 0); - if (vg_read_error(vg_to)) { - release_vg(vg_to); + if (!(vg_to = vg_read(cmd, vg_name_to, NULL, READ_NO_LOCK | READ_FOR_UPDATE, 0, NULL))) return_NULL; - } - } else if (vg_read_error(vg_to) == SUCCESS) { - *existing_vg = 0; + return vg_to; } - return vg_to; + + /* shouldn't happen */ + log_error("Failed to lock or create VG %s.", vg_name_to); + return_NULL; } /* @@ -511,11 +516,8 @@ static struct volume_group *_vgsplit_from(struct cmd_context *cmd, log_verbose("Checking for volume group \"%s\"", vg_name_from); - vg_from = vg_read_for_update(cmd, vg_name_from, NULL, 0, 0); - if (vg_read_error(vg_from)) { - release_vg(vg_from); - return NULL; - } + if (!(vg_from = vg_read(cmd, vg_name_from, NULL, READ_FOR_UPDATE, 0, NULL))) + return_NULL; if (is_lockd_type(vg_from->lock_type)) { log_error("vgsplit not allowed for lock_type %s", vg_from->lock_type); @@ -581,6 +583,11 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv) return ECMD_FAILED; } + if (!validate_name(vg_name_to)) { + log_error("Invalid vg name %s", vg_name_to); + return ECMD_FAILED; + } + if (strcmp(vg_name_to, vg_name_from) < 0) lock_vg_from_first = 0; @@ -749,9 +756,8 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv) */ if (!test_mode()) { release_vg(vg_to); - vg_to = vg_read_for_update(cmd, vg_name_to, NULL, - READ_ALLOW_EXPORTED, 0); - if (vg_read_error(vg_to)) { + if (!(vg_to = vg_read(cmd, vg_name_to, NULL, READ_FOR_UPDATE | READ_ALLOW_EXPORTED, 0, NULL))) { + /* FIXME: this inconsistent message is not necessarily true. */ log_error("Volume group \"%s\" became inconsistent: " "please fix manually", vg_name_to); goto bad; |