diff options
author | David Teigland <teigland@redhat.com> | 2017-10-25 15:46:42 -0500 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2017-11-10 10:53:57 -0600 |
commit | 0e15dfe2b6415c7956ab83cfbc36ef7a1f7b5097 (patch) | |
tree | dbe53089ca936441cfc93ae80461228bd9b191ab | |
parent | 83f25c98de5185ec1b70f9f9cc1f7e1121b7cafb (diff) | |
download | lvm2-0e15dfe2b6415c7956ab83cfbc36ef7a1f7b5097.tar.gz |
label_scan/vg_read: use label_read_data to avoid disk reads
The new label_scan() function reads a large buffer of data
from the start of the disk, and saves it so that multiple
structs can be read from it. Previously, only the label_header
was read from this buffer, and the code which needed data
structures that immediately followed the label_header would
read those from disk separately. This created a large
number of small, unnecessary disk reads.
In each place that the two read paths (label_scan and vg_read)
need to read data from disk, first check if that data is
already available from the label_read_data buffer, and if
so just copy it from the buffer instead of reading from disk.
Code changes
------------
- passing the label_read_data struct down through
both read paths to make it available.
- before every disk read, first check if the location
and size of the desired piece of data exists fully
in the label_read_data buffer, and if so copy it
from there. Otherwise, use the existing code to
read the data from disk.
- adding some log_error messages on existing error paths
that were already being updated for the reasons above.
- using similar naming for parallel functions on the two
parallel read paths that are being updated above.
label_scan path calls:
read_metadata_location_summary, text_read_metadata_summary
vg_read path calls:
read_metadata_location_vg, text_read_metadata_file
Previously, those functions were named:
label_scan path calls:
vgname_from_mda, text_vgsummary_import
vg_read path calls:
_find_vg_rlocn, text_vg_import_fd
I/O changes
-----------
In the label_scan path, the following data is either copied
from label_read_data or read from disk for each PV:
- label_header and pv_header
- mda_header (in _raw_read_mda_header)
- vg metadata name (in read_metadata_location_summary)
- vg metadata (in config_file_read_fd)
Total of 4 reads per PV in the label_scan path.
In the vg_read path, the following data is either copied from
label_read_data or read from disk for each PV:
- mda_header (in _raw_read_mda_header)
- vg metadata name (in read_metadata_location_vg)
- vg metadata (in config_file_read_fd)
Total of 3 reads per PV in the vg_read path.
For a common read/reporting command, each PV will be:
- read by the command's initial lvmcache_label_scan()
- read by lvmcache_label_rescan_vg() at the start of vg_read()
- read by vg_read()
Previously, this would cause 11 synchronous disk reads per PV:
4 from lvmcache_label_scan(), 4 from lvmcache_label_rescan_vg()
and 3 from vg_read().
With this commit's optimization, there are now 2 async disk reads
per PV: 1 from lvmcache_label_scan() and 1 from
lvmcache_label_rescan_vg().
When a second mda is used on a PV, it is located at the
end of the PV. This second mda and copy of metadata will
not be found in the label_read_data buffer, and will always
require separate disk reads.
-rw-r--r-- | lib/cache/lvmetad.c | 2 | ||||
-rw-r--r-- | lib/config/config.c | 18 | ||||
-rw-r--r-- | lib/config/config.h | 2 | ||||
-rw-r--r-- | lib/format1/format1.c | 4 | ||||
-rw-r--r-- | lib/format1/lvm1-label.c | 1 | ||||
-rw-r--r-- | lib/format_pool/format_pool.c | 4 | ||||
-rw-r--r-- | lib/format_pool/pool_label.c | 1 | ||||
-rw-r--r-- | lib/format_text/archive.c | 2 | ||||
-rw-r--r-- | lib/format_text/archiver.c | 2 | ||||
-rw-r--r-- | lib/format_text/format-text.c | 229 | ||||
-rw-r--r-- | lib/format_text/import-export.h | 12 | ||||
-rw-r--r-- | lib/format_text/import.c | 117 | ||||
-rw-r--r-- | lib/format_text/import_vsn1.c | 55 | ||||
-rw-r--r-- | lib/format_text/layout.h | 6 | ||||
-rw-r--r-- | lib/format_text/text_label.c | 39 | ||||
-rw-r--r-- | lib/label/label.c | 4 | ||||
-rw-r--r-- | lib/label/label.h | 3 | ||||
-rw-r--r-- | lib/metadata/metadata-exported.h | 14 | ||||
-rw-r--r-- | lib/metadata/metadata.c | 22 | ||||
-rw-r--r-- | lib/metadata/metadata.h | 12 |
20 files changed, 367 insertions, 182 deletions
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 1fd9091dd..86a2caced 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -1766,7 +1766,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) struct volume_group *vg; if (mda_is_ignored(mda) || - !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1))) + !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, NULL))) return 1; /* FIXME Also ensure contents match etc. */ diff --git a/lib/config/config.c b/lib/config/config.c index 1f1896922..9209771f7 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -494,7 +494,7 @@ int override_config_tree_from_profile(struct cmd_context *cmd, * and function avoids parsing of mda into config tree which * remains unmodified and should not be used. */ -int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, +int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async, off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, int checksum_only, int no_dup_node_check) @@ -517,7 +517,18 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, if (!(dev->flags & DEV_REGULAR) || size2) use_mmap = 0; - if (use_mmap) { + if (buf_async) { + if (!(buf = dm_malloc(size + size2))) { + log_error("Failed to allocate circular buffer."); + return 0; + } + + memcpy(buf, buf_async + offset, size); + if (size2) + memcpy(buf + size, buf_async + offset2, size2); + + fb = buf; + } else if (use_mmap) { mmap_offset = offset % lvm_getpagesize(); /* memory map the file */ fb = mmap((caddr_t) 0, size + mmap_offset, PROT_READ, @@ -532,6 +543,7 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, log_error("Failed to allocate circular buffer."); return 0; } + if (!dev_read_circular(dev, (uint64_t) offset, size, (uint64_t) offset2, size2, buf)) { goto out; @@ -601,7 +613,7 @@ int config_file_read(struct dm_config_tree *cft) } } - r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0, + r = config_file_read_fd(cft, cf->dev, NULL, 0, (size_t) info.st_size, 0, 0, (checksum_fn_t) NULL, 0, 0, 0); if (!cf->keep_open) { diff --git a/lib/config/config.h b/lib/config/config.h index 901994ae4..291eed71c 100644 --- a/lib/config/config.h +++ b/lib/config/config.h @@ -239,7 +239,7 @@ config_source_t config_get_source_type(struct dm_config_tree *cft); typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size); struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open); -int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, +int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async, off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, int skip_parse, int no_dup_node_check); diff --git a/lib/format1/format1.c b/lib/format1/format1.c index b3569e08e..816e2580f 100644 --- a/lib/format1/format1.c +++ b/lib/format1/format1.c @@ -180,9 +180,9 @@ out: static struct volume_group *_format1_vg_read(struct format_instance *fid, const char *vg_name, struct metadata_area *mda __attribute__((unused)), + struct label_read_data *ld __attribute__((unused)), struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), - unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused))) + unsigned *use_previous_vg __attribute__((unused))) { struct volume_group *vg; struct disk_list *dl; diff --git a/lib/format1/lvm1-label.c b/lib/format1/lvm1-label.c index f5ce1e892..a9ccaaf6b 100644 --- a/lib/format1/lvm1-label.c +++ b/lib/format1/lvm1-label.c @@ -55,6 +55,7 @@ static int _lvm1_write(struct label *label __attribute__((unused)), void *buf __ } static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, + struct label_read_data *ld, struct label **label) { struct pv_disk *pvd = (struct pv_disk *) buf; diff --git a/lib/format_pool/format_pool.c b/lib/format_pool/format_pool.c index f6e5e011b..93214a179 100644 --- a/lib/format_pool/format_pool.c +++ b/lib/format_pool/format_pool.c @@ -101,9 +101,9 @@ static int _check_usp(const char *vgname, struct user_subpool *usp, int sp_count static struct volume_group *_pool_vg_read(struct format_instance *fid, const char *vg_name, struct metadata_area *mda __attribute__((unused)), + struct label_read_data *ld __attribute__((unused)), struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), - unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused))) + unsigned *use_previous_vg __attribute__((unused))) { struct volume_group *vg; struct user_subpool *usp; diff --git a/lib/format_pool/pool_label.c b/lib/format_pool/pool_label.c index 0f1d1f791..fa34c2fe1 100644 --- a/lib/format_pool/pool_label.c +++ b/lib/format_pool/pool_label.c @@ -56,6 +56,7 @@ static int _pool_write(struct label *label __attribute__((unused)), void *buf __ } static int _pool_read(struct labeller *l, struct device *dev, void *buf, + struct label_read_data *ld, struct label **label) { struct pool_list pl; diff --git a/lib/format_text/archive.c b/lib/format_text/archive.c index 4d4e7ac27..75ad60ad2 100644 --- a/lib/format_text/archive.c +++ b/lib/format_text/archive.c @@ -321,7 +321,7 @@ static void _display_archive(struct cmd_context *cmd, struct archive_file *af) * retrieve the archive time and description. */ /* FIXME Use variation on _vg_read */ - if (!(vg = text_vg_import_file(tf, af->path, &when, &desc))) { + if (!(vg = text_read_metadata_file(tf, af->path, &when, &desc))) { log_error("Unable to read archive file."); tf->fmt->ops->destroy_instance(tf); return; diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index d3811556d..aa448735e 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd, } dm_list_iterate_items(mda, &tf->metadata_areas_in_use) { - if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0))) + if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, NULL))) stack; break; } diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index f726621a1..ced056237 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -190,7 +190,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, if (!dev_open_readonly(area->dev)) return_0; - if (!(mdah = raw_read_mda_header(fmt, area))) + if (!(mdah = raw_read_mda_header(fmt, area, NULL))) goto_out; rlocn = mdah->raw_locns; @@ -316,15 +316,26 @@ static void _xlate_mdah(struct mda_header *mdah) } } -static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area) +static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, + struct label_read_data *ld) { if (!dev_open_readonly(dev_area->dev)) return_0; - if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) { - if (!dev_close(dev_area->dev)) - stack; - return_0; + if (!ld || (ld->buf_len < dev_area->start + MDA_HEADER_SIZE)) { + log_debug_metadata("Reading mda header sector from %s at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); + + if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) { + if (!dev_close(dev_area->dev)) + stack; + return_0; + } + } else { + log_debug_metadata("Copying mda header sector from %s buffer at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); + + memcpy(mdah, ld->buf + dev_area->start, MDA_HEADER_SIZE); } if (!dev_close(dev_area->dev)) @@ -366,7 +377,8 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev } struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area) + struct device_area *dev_area, + struct label_read_data *ld) { struct mda_header *mdah; @@ -375,7 +387,7 @@ struct mda_header *raw_read_mda_header(const struct format_type *fmt, return NULL; } - if (!_raw_read_mda_header(mdah, dev_area)) { + if (!_raw_read_mda_header(mdah, dev_area, ld)) { dm_pool_free(fmt->cmd->mem, mdah); return NULL; } @@ -402,8 +414,14 @@ static int _raw_write_mda_header(const struct format_type *fmt, return 1; } -static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, +/* + * FIXME: unify this with read_metadata_location() which is used + * in the label scanning path. + */ + +static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, struct mda_header *mdah, + struct label_read_data *ld, const char *vgname, int *precommitted) { @@ -438,11 +456,20 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, if (!*vgname) return rlocn; - /* FIXME Loop through rlocns two-at-a-time. List null-terminated. */ - /* FIXME Ignore if checksum incorrect!!! */ - if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, - sizeof(vgnamebuf), vgnamebuf)) - goto_bad; + /* + * Verify that the VG metadata pointed to by the rlocn + * begins with a valid vgname. + */ + if (!ld || (ld->buf_len < dev_area->start + rlocn->offset + NAME_LEN)) { + /* FIXME Loop through rlocns two-at-a-time. List null-terminated. */ + /* FIXME Ignore if checksum incorrect!!! */ + if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, + sizeof(vgnamebuf), vgnamebuf)) + goto_bad; + } else { + memset(vgnamebuf, 0, sizeof(vgnamebuf)); + memcpy(vgnamebuf, ld->buf + dev_area->start + rlocn->offset, NAME_LEN); + } if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) @@ -488,10 +515,10 @@ static int _raw_holds_vgname(struct format_instance *fid, if (!dev_open_readonly(dev_area->dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, dev_area))) + if (!(mdah = raw_read_mda_header(fid->fmt, dev_area, NULL))) return_0; - if (_find_vg_rlocn(dev_area, mdah, vgname, &noprecommit)) + if (_read_metadata_location_vg(dev_area, mdah, NULL, vgname, &noprecommit)) r = 1; if (!dev_close(dev_area->dev)) @@ -503,10 +530,10 @@ static int _raw_holds_vgname(struct format_instance *fid, static struct volume_group *_vg_read_raw_area(struct format_instance *fid, const char *vgname, struct device_area *area, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int precommitted, - int single_device) + int precommitted) { struct volume_group *vg = NULL; struct raw_locn *rlocn; @@ -515,10 +542,10 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, char *desc; uint32_t wrap = 0; - if (!(mdah = raw_read_mda_header(fid->fmt, area))) + if (!(mdah = raw_read_mda_header(fid->fmt, area, ld))) goto_out; - if (!(rlocn = _find_vg_rlocn(area, mdah, vgname, &precommitted))) { + if (!(rlocn = _read_metadata_location_vg(area, mdah, ld, vgname, &precommitted))) { log_debug_metadata("VG %s not found on %s", vgname, dev_name(area->dev)); goto out; } @@ -532,25 +559,25 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, goto out; } - /* FIXME 64-bit */ - if (!(vg = text_vg_import_fd(fid, NULL, vg_fmtdata, use_previous_vg, single_device, area->dev, - (off_t) (area->start + rlocn->offset), - (uint32_t) (rlocn->size - wrap), - (off_t) (area->start + MDA_HEADER_SIZE), - wrap, calc_crc, rlocn->checksum, &when, - &desc)) && (!use_previous_vg || !*use_previous_vg)) - goto_out; + vg = text_read_metadata(fid, area->dev, NULL, ld, vg_fmtdata, use_previous_vg, + (off_t) (area->start + rlocn->offset), + (uint32_t) (rlocn->size - wrap), + (off_t) (area->start + MDA_HEADER_SIZE), + wrap, + calc_crc, + rlocn->checksum, + &when, &desc); - if (vg) - log_debug_metadata("Read %s %smetadata (%u) from %s at %" PRIu64 " size %" - PRIu64, vg->name, precommitted ? "pre-commit " : "", - vg->seqno, dev_name(area->dev), - area->start + rlocn->offset, rlocn->size); - else - log_debug_metadata("Skipped reading %smetadata from %s at %" PRIu64 " size %" - PRIu64 " with matching checksum.", precommitted ? "pre-commit " : "", - dev_name(area->dev), - area->start + rlocn->offset, rlocn->size); + if (!vg) { + /* FIXME: detect and handle errors, and distinguish from the optimization + that skips parsing the metadata which also returns NULL. */ + } + + log_debug_metadata("Found metadata on %s at %"PRIu64" size %"PRIu64" for VG %s", + dev_name(area->dev), + area->start + rlocn->offset, + rlocn->size, + vgname); if (vg && precommitted) vg->status |= PRECOMMITTED; @@ -562,9 +589,9 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, static struct volume_group *_vg_read_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, - int single_device) + unsigned *use_previous_vg) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -572,7 +599,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, single_device); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, ld, vg_fmtdata, use_previous_vg, 0); if (!dev_close(mdac->area.dev)) stack; @@ -583,6 +610,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg) { @@ -592,7 +620,7 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, 0); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, ld, vg_fmtdata, use_previous_vg, 1); if (!dev_close(mdac->area.dev)) stack; @@ -630,10 +658,10 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!dev_open(mdac->area.dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL))) goto_out; - rlocn = _find_vg_rlocn(&mdac->area, mdah, old_vg_name ? : vg->name, &noprecommit); + rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, old_vg_name ? : vg->name, &noprecommit); mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah); if (!fidtc->raw_metadata_buf && @@ -736,10 +764,10 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, if (!found) return 1; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL))) goto_out; - if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, old_vg_name ? : vg->name, &noprecommit))) { + if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, old_vg_name ? : vg->name, &noprecommit))) { mdah->raw_locns[0].offset = 0; mdah->raw_locns[0].size = 0; mdah->raw_locns[0].checksum = 0; @@ -846,10 +874,10 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, if (!dev_open(mdac->area.dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL))) goto_out; - if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, vg->name, &noprecommit))) { + if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, vg->name, &noprecommit))) { rlocn = &mdah->raw_locns[0]; mdah->raw_locns[1].offset = 0; } @@ -883,8 +911,10 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid, time_t when; char *desc; - if (!(vg = text_vg_import_file(fid, read_path, &when, &desc))) - return_NULL; + if (!(vg = text_read_metadata_file(fid, read_path, &when, &desc))) { + log_error("Failed to read VG %s from %s", vgname, read_path); + return NULL; + } /* * Currently you can only have a single volume group per @@ -907,9 +937,9 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid, static struct volume_group *_vg_read_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused))) + unsigned *use_previous_vg __attribute__((unused))) { struct text_context *tc = (struct text_context *) mda->metadata_locn; @@ -919,6 +949,7 @@ static struct volume_group *_vg_read_file(struct format_instance *fid, static struct volume_group *_vg_read_precommit_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg __attribute__((unused))) { @@ -1161,8 +1192,9 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) return 1; } -int vgname_from_mda(const struct format_type *fmt, - struct mda_header *mdah, struct device_area *dev_area, +int read_metadata_location_summary(const struct format_type *fmt, + struct mda_header *mdah, struct label_read_data *ld, + struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors) { struct raw_locn *rlocn; @@ -1170,13 +1202,12 @@ int vgname_from_mda(const struct format_type *fmt, unsigned int len = 0; char buf[NAME_LEN + 1] __attribute__((aligned(8))); uint64_t buffer_size, current_usage; - unsigned used_cached_metadata = 0; if (mda_free_sectors) *mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT; if (!mdah) { - log_error(INTERNAL_ERROR "vgname_from_mda called with NULL pointer for mda_header"); + log_error(INTERNAL_ERROR "read_metadata_location_summary called with NULL pointer for mda_header"); return 0; } @@ -1187,15 +1218,21 @@ int vgname_from_mda(const struct format_type *fmt, * If no valid offset, do not try to search for vgname */ if (!rlocn->offset) { - log_debug("%s: found metadata with offset 0.", - dev_name(dev_area->dev)); + log_debug_metadata("Metadata location on %s at %"PRIu64" has offset 0.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return 0; } - /* Do quick check for a vgname */ - if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, - NAME_LEN, buf)) - return_0; + /* + * Verify that the VG metadata pointed to by the rlocn + * begins with a valid vgname. + */ + if (!ld || (ld->buf_len < dev_area->start + rlocn->offset + NAME_LEN)) { + if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, buf)) + return_0; + } else { + memcpy(buf, ld->buf + dev_area->start + rlocn->offset, NAME_LEN); + } while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && len < (NAME_LEN - 1)) @@ -1204,47 +1241,65 @@ int vgname_from_mda(const struct format_type *fmt, buf[len] = '\0'; /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(buf)) + if (!validate_name(buf)) { + log_error("Metadata location on %s at %"PRIu64" begins with invalid VG name.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return_0; + } /* We found a VG - now check the metadata */ if (rlocn->offset + rlocn->size > mdah->size) wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); if (wrap > rlocn->offset) { - log_error("%s: metadata too large for circular buffer", - dev_name(dev_area->dev)); + log_error("Metadata location on %s at %"PRIu64" is too large for circular buffer.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return 0; } - /* Did we see this metadata before? */ + /* + * Did we see this metadata before? + * Look in lvmcache to see if there is vg info matching + * the checksum/size that we see in the mda_header (rlocn) + * on this device. If so, then vgsummary->name is is set + * and controls if the "checksum_only" flag passed to + * text_read_metadata_summary() is 1 or 0. + * + * If checksum_only = 1, then text_read_metadata_summary() + * will read the metadata from this device, and run the + * checksum function on it. If the calculated checksum + * of the metadata matches the checksum in the mda_header, + * which also matches the checksum saved in vginfo from + * another device, then it skips parsing the metadata into + * a config tree, which saves considerable cpu time. + */ vgsummary->mda_checksum = rlocn->checksum; vgsummary->mda_size = rlocn->size; + lvmcache_lookup_mda(vgsummary); - if (lvmcache_lookup_mda(vgsummary)) - used_cached_metadata = 1; - - /* FIXME 64-bit */ - if (!text_vgsummary_import(fmt, dev_area->dev, + if (!text_read_metadata_summary(fmt, dev_area->dev, ld, (off_t) (dev_area->start + rlocn->offset), (uint32_t) (rlocn->size - wrap), (off_t) (dev_area->start + MDA_HEADER_SIZE), wrap, calc_crc, vgsummary->vgname ? 1 : 0, - vgsummary)) + vgsummary)) { + log_error("Metadata location on %s at %"PRIu64" has invalid summary for VG.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return_0; + } /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(vgsummary->vgname)) + if (!validate_name(vgsummary->vgname)) { + log_error("Metadata location on %s at %"PRIu64" has invalid VG name.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return_0; + } - log_debug_metadata("%s: %s metadata at %" PRIu64 " size %" PRIu64 - " (in area at %" PRIu64 " size %" PRIu64 - ") for %s (" FMTVGID ")", + log_debug_metadata("Found metadata summary on %s at %"PRIu64" size %"PRIu64" for VG %s", dev_name(dev_area->dev), - used_cached_metadata ? "Using cached" : "Found", dev_area->start + rlocn->offset, - rlocn->size, dev_area->start, dev_area->size, vgsummary->vgname, - (char *)&vgsummary->vgid); + rlocn->size, + vgsummary->vgname); if (mda_free_sectors) { current_usage = (rlocn->size + SECTOR_SIZE - UINT64_C(1)) - @@ -1289,14 +1344,14 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu continue; } - if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area))) { + if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area, NULL))) { stack; goto close_dev; } - /* TODO: caching as in vgname_from_mda() (trigger this code?) */ - if (vgname_from_mda(fmt, mdah, &rl->dev_area, &vgsummary, NULL)) { - vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0); + /* TODO: caching as in read_metadata_location() (trigger this code?) */ + if (read_metadata_location_summary(fmt, mdah, NULL, &rl->dev_area, &vgsummary, NULL)) { + vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, NULL, 0); if (vg) { lvmcache_update_vg(vg, 0); lvmcache_set_independent_location(vg->name); @@ -1777,7 +1832,13 @@ static int _mda_export_text_raw(struct metadata_area *mda, struct mda_context *mdc = (struct mda_context *) mda->metadata_locn; char mdah[MDA_HEADER_SIZE]; /* temporary */ - if (!mdc || !_raw_read_mda_header((struct mda_header *)mdah, &mdc->area)) + if (!mdc) { + log_error(INTERNAL_ERROR "mda_export_text_raw no mdc"); + return 1; /* pretend the MDA does not exist */ + } + + /* FIXME: why aren't ignore,start,size,free_sectors available? */ + if (!_raw_read_mda_header((struct mda_header *)mdah, &mdc->area, NULL)) return 1; /* pretend the MDA does not exist */ return config_make_nodes(cft, parent, NULL, diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h index 2f39e2a4e..2610a39b5 100644 --- a/lib/format_text/import-export.h +++ b/lib/format_text/import-export.h @@ -49,7 +49,6 @@ struct text_vg_version_ops { int (*check_version) (const struct dm_config_tree * cf); struct volume_group *(*read_vg) (struct format_instance * fid, const struct dm_config_tree *cf, - unsigned use_cached_pvs, unsigned allow_lvmetad_extensions); void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf, time_t *when, char **desc); @@ -68,23 +67,24 @@ int read_segtype_lvflags(uint64_t *status, char *segtype_str); int text_vg_export_file(struct volume_group *vg, const char *desc, FILE *fp); size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf); -struct volume_group *text_vg_import_file(struct format_instance *fid, +struct volume_group *text_read_metadata_file(struct format_instance *fid, const char *file, time_t *when, char **desc); -struct volume_group *text_vg_import_fd(struct format_instance *fid, +struct volume_group *text_read_metadata(struct format_instance *fid, + struct device *dev, const char *file, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, - struct device *dev, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, uint32_t checksum, time_t *when, char **desc); -int text_vgsummary_import(const struct format_type *fmt, +int text_read_metadata_summary(const struct format_type *fmt, struct device *dev, + struct label_read_data *ld, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, diff --git a/lib/format_text/import.c b/lib/format_text/import.c index 62dee8faf..87b33edf2 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -35,8 +35,9 @@ static void _init_text_import(void) /* * Find out vgname on a given device. */ -int text_vgsummary_import(const struct format_type *fmt, +int text_read_metadata_summary(const struct format_type *fmt, struct device *dev, + struct label_read_data *ld, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, @@ -45,24 +46,57 @@ int text_vgsummary_import(const struct format_type *fmt, { struct dm_config_tree *cft; struct text_vg_version_ops **vsn; + char *buf = NULL; int r = 0; + if (ld) { + if (ld->buf_len >= (offset + size)) + buf = ld->buf; + else { + /* + * Needs data beyond the end of the ld buffer. + * Will do a new synchronous read to get the data. + * (scan_size could also be made larger.) + */ + log_debug_metadata("label scan buffer for %s len %u does not include metadata at %llu size %u", + dev_name(dev), ld->buf_len, (unsigned long long)offset, size); + buf = NULL; + } + } + _init_text_import(); if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0))) return_0; - if ((!dev && !config_file_read(cft)) || - (dev && !config_file_read_fd(cft, dev, offset, size, + if (dev) { + if (buf) + log_debug_metadata("Copying metadata summary for %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + else + log_debug_metadata("Reading metadata summary from %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + + if (!config_file_read_fd(cft, dev, buf, offset, size, offset2, size2, checksum_fn, vgsummary->mda_checksum, - checksum_only, 1))) { - log_error("Couldn't read volume group metadata."); - goto out; + checksum_only, 1)) { + /* FIXME: handle errors */ + log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + goto out; + } + } else { + if (!config_file_read(cft)) { + log_error("Couldn't read volume group metadata from file."); + goto out; + } } if (checksum_only) { /* Checksum matches already-cached content - no need to reparse. */ + log_debug_metadata("Skipped parsing metadata on %s", dev_name(dev)); r = 1; goto out; } @@ -91,12 +125,12 @@ struct cached_vg_fmtdata { size_t cached_mda_size; }; -struct volume_group *text_vg_import_fd(struct format_instance *fid, +struct volume_group *text_read_metadata(struct format_instance *fid, + struct device *dev, const char *file, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, - struct device *dev, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, @@ -106,8 +140,18 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, struct volume_group *vg = NULL; struct dm_config_tree *cft; struct text_vg_version_ops **vsn; + char *buf = NULL; int skip_parse; + /* + * This struct holds the checksum and size of the VG metadata + * that was read from a previous device. When we read the VG + * metadata from this device, we can skip parsing it into a + * cft (saving time) if the checksum of the metadata buffer + * we read from this device matches the size/checksum saved in + * the mda_header/rlocn struct on this device, and matches the + * size/checksum from the previous device. + */ if (vg_fmtdata && !*vg_fmtdata && !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) { log_error("Failed to allocate VG fmtdata for text format."); @@ -127,15 +171,49 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, ((*vg_fmtdata)->cached_mda_checksum == checksum) && ((*vg_fmtdata)->cached_mda_size == (size + size2)); - if ((!dev && !config_file_read(cft)) || - (dev && !config_file_read_fd(cft, dev, offset, size, + if (ld) { + if (ld->buf_len >= (offset + size)) + buf = ld->buf; + else { + /* + * Needs data beyond the end of the ld buffer. + * Will do a new synchronous read to get the data. + * (scan_size could also be made larger.) + */ + log_debug_metadata("label scan buffer for %s len %u does not include metadata at %llu size %u", + dev_name(dev), ld->buf_len, (unsigned long long)offset, size); + buf = NULL; + } + } + + if (dev) { + if (buf) + log_debug_metadata("Copying metadata for %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + else + log_debug_metadata("Reading metadata from %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + + if (!config_file_read_fd(cft, dev, buf, offset, size, offset2, size2, checksum_fn, checksum, - skip_parse, 1))) - goto_out; + skip_parse, 1)) { + /* FIXME: handle errors */ + log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + goto out; + } + } else { + if (!config_file_read(cft)) { + log_error("Couldn't read volume group metadata from file."); + goto out; + } + } if (skip_parse) { if (use_previous_vg) *use_previous_vg = 1; + log_debug_metadata("Skipped parsing metadata on %s", dev_name(dev)); goto out; } @@ -146,7 +224,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, if (!(*vsn)->check_version(cft)) continue; - if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0))) + if (!(vg = (*vsn)->read_vg(fid, cft, 0))) goto_out; (*vsn)->read_desc(vg->vgmem, cft, when, desc); @@ -166,17 +244,20 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, return vg; } -struct volume_group *text_vg_import_file(struct format_instance *fid, +struct volume_group *text_read_metadata_file(struct format_instance *fid, const char *file, time_t *when, char **desc) { - return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, (off_t)0, 0, (off_t)0, 0, NULL, 0, + return text_read_metadata(fid, NULL, file, NULL, NULL, NULL, + (off_t)0, 0, (off_t)0, 0, + NULL, + 0, when, desc); } static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft, struct format_instance *fid, - unsigned allow_lvmetad_extensions) + unsigned for_lvmetad) { struct volume_group *vg = NULL; struct text_vg_version_ops **vsn; @@ -191,7 +272,7 @@ static struct volume_group *_import_vg_from_config_tree(const struct dm_config_t * The only path to this point uses cached vgmetadata, * so it can use cached PV state too. */ - if (!(vg = (*vsn)->read_vg(fid, cft, 1, allow_lvmetad_extensions))) + if (!(vg = (*vsn)->read_vg(fid, cft, for_lvmetad))) stack; else if ((vg_missing = vg_missing_pv_count(vg))) { log_verbose("There are %d physical volumes missing.", diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index 4e7978183..e7fc8abcd 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -32,9 +32,7 @@ typedef int (*section_fn) (struct format_instance * fid, struct volume_group * vg, const struct dm_config_node * pvn, const struct dm_config_node * vgn, struct dm_hash_table * pv_hash, - struct dm_hash_table * lv_hash, - unsigned *scan_done_once, - unsigned report_missing_devices); + struct dm_hash_table * lv_hash); #define _read_int32(root, path, result) \ dm_config_get_uint32(root, path, (uint32_t *) (result)) @@ -180,9 +178,7 @@ static int _read_pv(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *pvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash, - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once, - unsigned report_missing_devices) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; struct physical_volume *pv; @@ -225,10 +221,7 @@ static int _read_pv(struct format_instance *fid, if (!id_write_format(&pv->id, buffer, sizeof(buffer))) buffer[0] = '\0'; - if (report_missing_devices) - log_error_once("Couldn't find device with uuid %s.", buffer); - else - log_very_verbose("Couldn't find device with uuid %s.", buffer); + log_error_once("Couldn't find device with uuid %s.", buffer); } if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) @@ -573,9 +566,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)), struct volume_group *vg, const struct dm_config_node *lvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash, - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash) { struct dm_pool *mem = vg->vgmem; struct logical_volume *lv; @@ -730,9 +721,7 @@ static int _read_historical_lvnames(struct format_instance *fid __attribute__((u struct volume_group *vg, const struct dm_config_node *hlvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; struct generic_logical_volume *glv; @@ -801,9 +790,7 @@ static int _read_historical_lvnames_interconnections(struct format_instance *fid struct volume_group *vg, const struct dm_config_node *hlvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; const char *historical_lv_name, *origin_name = NULL; @@ -913,9 +900,7 @@ static int _read_lvsegs(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *lvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash, - struct dm_hash_table *lv_hash, - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash) { struct logical_volume *lv; @@ -976,12 +961,9 @@ static int _read_sections(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *vgn, struct dm_hash_table *pv_hash, struct dm_hash_table *lv_hash, - int optional, - unsigned *scan_done_once) + int optional) { const struct dm_config_node *n; - /* Only report missing devices when doing a scan */ - unsigned report_missing_devices = scan_done_once ? !*scan_done_once : 1; if (!dm_config_get_section(vgn, section, &n)) { if (!optional) { @@ -993,8 +975,7 @@ static int _read_sections(struct format_instance *fid, } for (n = n->child; n; n = n->sib) { - if (!fn(fid, vg, n, vgn, pv_hash, lv_hash, - scan_done_once, report_missing_devices)) + if (!fn(fid, vg, n, vgn, pv_hash, lv_hash)) return_0; } @@ -1003,15 +984,13 @@ static int _read_sections(struct format_instance *fid, static struct volume_group *_read_vg(struct format_instance *fid, const struct dm_config_tree *cft, - unsigned use_cached_pvs, - unsigned allow_lvmetad_extensions) + unsigned for_lvmetad) { const struct dm_config_node *vgn; const struct dm_config_value *cv; const char *str, *format_str, *system_id; struct volume_group *vg; struct dm_hash_table *pv_hash = NULL, *lv_hash = NULL; - unsigned scan_done_once = use_cached_pvs; uint64_t vgstatus; /* skip any top-level values */ @@ -1166,15 +1145,15 @@ static struct volume_group *_read_vg(struct format_instance *fid, } if (!_read_sections(fid, "physical_volumes", _read_pv, vg, - vgn, pv_hash, lv_hash, 0, &scan_done_once)) { + vgn, pv_hash, lv_hash, 0)) { log_error("Couldn't find all physical volumes for volume " "group %s.", vg->name); goto bad; } - if (allow_lvmetad_extensions) + if (for_lvmetad) _read_sections(fid, "outdated_pvs", _read_pv, vg, - vgn, pv_hash, lv_hash, 1, &scan_done_once); + vgn, pv_hash, lv_hash, 1); else if (dm_config_has_node(vgn, "outdated_pvs")) log_error(INTERNAL_ERROR "Unexpected outdated_pvs section in metadata of VG %s.", vg->name); @@ -1186,28 +1165,28 @@ static struct volume_group *_read_vg(struct format_instance *fid, } if (!_read_sections(fid, "logical_volumes", _read_lvnames, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all logical volume names for volume " "group %s.", vg->name); goto bad; } if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all historical logical volumes for volume " "group %s.", vg->name); goto bad; } if (!_read_sections(fid, "logical_volumes", _read_lvsegs, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all logical volumes for " "volume group %s.", vg->name); goto bad; } if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames_interconnections, - vg, vgn, pv_hash, lv_hash, 1, NULL)) { + vg, vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all removed logical volume interconnections " "for volume group %s.", vg->name); goto bad; diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index 75a935ba5..597114142 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -81,7 +81,8 @@ struct mda_header { } __attribute__ ((packed)); struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area); + struct device_area *dev_area, + struct label_read_data *ld); struct mda_lists { struct dm_list dirs; @@ -103,7 +104,8 @@ struct mda_context { #define LVM2_LABEL "LVM2 001" #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize()) -int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah, +int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, + struct label_read_data *ld, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors); diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index ad323b6d2..d3ff6826f 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -308,14 +308,22 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)), return 1; } -struct _update_mda_baton { +struct _mda_baton { struct lvmcache_info *info; struct label *label; + struct label_read_data *ld; }; -static int _update_mda(struct metadata_area *mda, void *baton) +/* + * FIXME: optimize reads when there is a second mda at the end + * of the PV. For the second mda we should also have a single + * large read covering mda_header and metadata, and we should + * be able to reuse it in vg_read. + */ + +static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) { - struct _update_mda_baton *p = baton; + struct _mda_baton *p = baton; const struct format_type *fmt = p->label->labeller->fmt; struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct mda_header *mdah; @@ -334,7 +342,7 @@ static int _update_mda(struct metadata_area *mda, void *baton) return 1; } - if (!(mdah = raw_read_mda_header(fmt, &mdac->area))) { + if (!(mdah = raw_read_mda_header(fmt, &mdac->area, p->ld))) { stack; goto close_dev; } @@ -350,7 +358,7 @@ static int _update_mda(struct metadata_area *mda, void *baton) return 1; } - if (vgname_from_mda(fmt, mdah, &mdac->area, &vgsummary, + if (read_metadata_location_summary(fmt, mdah, p->ld, &mdac->area, &vgsummary, &mdac->free_sectors) && !lvmcache_update_vgname_and_id(p->info, &vgsummary)) { if (!dev_close(mdac->area.dev)) @@ -365,22 +373,29 @@ close_dev: return 1; } -static int _text_read(struct labeller *l, struct device *dev, void *buf, - struct label **label) +/* + * When label_read_data *ld is set, it means that we have read the first + * ld->buf_len bytes of the device and already have that data, so we don't need + * to do any dev_read's (as long as the desired dev_read offset+size is less + * then ld->buf_len). + */ + +static int _text_read(struct labeller *l, struct device *dev, void *label_buf, + struct label_read_data *ld, struct label **label) { - struct label_header *lh = (struct label_header *) buf; + struct label_header *lh = (struct label_header *) label_buf; struct pv_header *pvhdr; struct pv_header_extension *pvhdr_ext; struct lvmcache_info *info; struct disk_locn *dlocn_xl; uint64_t offset; uint32_t ext_version; - struct _update_mda_baton baton; + struct _mda_baton baton; /* * PV header base */ - pvhdr = (struct pv_header *) ((char *) buf + xlate32(lh->offset_xl)); + pvhdr = (struct pv_header *) ((char *) label_buf + xlate32(lh->offset_xl)); if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, FMT_TEXT_ORPHAN_VG_NAME, @@ -436,9 +451,9 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf, out: baton.info = info; baton.label = *label; + baton.ld = ld; - if (!lvmcache_foreach_mda(info, _update_mda, &baton)) - return_0; + lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton); lvmcache_make_valid(info); diff --git a/lib/label/label.c b/lib/label/label.c index 058ebb86a..dbe9253de 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -452,7 +452,7 @@ int label_read(struct device *dev, struct label **labelp, uint64_t scan_sector) * the pv_header, mda locations, mda contents. * It saves the info it finds into lvmcache info/vginfo structs. */ - if ((r = (l->ops->read)(l, dev, label_buf, labelp)) && *labelp) { + if ((r = (l->ops->read)(l, dev, label_buf, NULL, labelp)) && *labelp) { (*labelp)->dev = dev; (*labelp)->sector = sector; } else { @@ -709,7 +709,7 @@ static int _label_read_data_process(struct cmd_context *cmd, struct label_read_d * the pv_header, mda locations, mda contents. * It saves the info it finds into lvmcache info/vginfo structs. */ - if ((r = (l->ops->read)(l, ld->dev, label_buf, &label)) && label) { + if ((r = (l->ops->read)(l, ld->dev, label_buf, ld, &label)) && label) { label->dev = ld->dev; label->sector = sector; } else { diff --git a/lib/label/label.h b/lib/label/label.h index 2b27b44ae..a91c8f7e5 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -80,7 +80,8 @@ struct label_ops { * Read a label from a volume. */ int (*read) (struct labeller * l, struct device * dev, - void *label_buf, struct label ** label); + void *label_buf, + struct label_read_data *ld, struct label ** label); /* * Additional consistency checks for the paranoid. diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 85e5838f1..058135656 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -366,6 +366,20 @@ struct pv_segment { */ #define FMT_INSTANCE_PRIVATE_MDAS 0x00000008U + +/* + * Each VG has its own fid struct. The fid for a VG describes where + * the metadata for that VG can be found. The lists hold mda locations. + * + * label scan finds the metadata locations (devs and offsets) for a VG, + * and saves this info in lvmcache vginfo/info lists. + * + * vg_read() then creates an fid for a given VG, and the mda locations + * from lvmcache are copied onto the fid lists. Those mda locations + * are read again by vg_read() to get VG metadata that is used to + * create the 'vg' struct. + */ + struct format_instance { unsigned ref_count; /* Refs to this fid from VG and PV structs */ struct dm_pool *mem; diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index d3a547650..ff22277fc 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -34,6 +34,7 @@ #include "lvmlockd.h" #include "time.h" #include "lvmnotify.h" +#include "label.h" #include <math.h> #include <sys/param.h> @@ -714,6 +715,10 @@ int check_pv_dev_sizes(struct volume_group *vg) * source file. All the following and more are only used by liblvm: * * . get_pvs() + * . get_vgids() + * . get_vgnames() + * . lvmcache_get_vgids() + * . lvmcache_get_vgnames() * . the vg->pvs_to_write list and pv_to_write struct */ @@ -3948,12 +3953,17 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, /* Ensure contents of all metadata areas match - else do recovery */ inconsistent_mda_count=0; dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + struct device *mda_dev = mda_get_device(mda); + struct label_read_data *ld; + use_previous_vg = 0; - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { + log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev)); + + ld = get_label_read_data(cmd, mda_dev); + + if ((use_precommitted && !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, ld, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || + (!use_precommitted && !(vg = mda->ops->vg_read(fid, vgname, mda, ld, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; @@ -4143,9 +4153,9 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, use_previous_vg = 0; if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || + !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, NULL, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { + !(vg = mda->ops->vg_read(fid, vgname, mda, NULL, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index 0de9ed858..145d0428a 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -25,6 +25,8 @@ #include "dev-cache.h" #include "lvm-string.h" #include "metadata-exported.h" +#include "lvm-logging.h" +#include "label.h" //#define MAX_STRIPES 128U //#define SECTOR_SHIFT 9L @@ -79,12 +81,13 @@ struct metadata_area_ops { struct volume_group *(*vg_read) (struct format_instance * fi, const char *vg_name, struct metadata_area * mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, - int single_device); + unsigned *use_previous_vg); struct volume_group *(*vg_read_precommit) (struct format_instance * fi, const char *vg_name, struct metadata_area * mda, + struct label_read_data *ld, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg); /* @@ -176,6 +179,11 @@ void mda_set_ignored(struct metadata_area *mda, unsigned mda_ignored); unsigned mda_locns_match(struct metadata_area *mda1, struct metadata_area *mda2); struct device *mda_get_device(struct metadata_area *mda); +/* + * fic is used to create an fid. It's used to pass fmt/vgname/vgid args + * to create_instance() which creates an fid for the specified vg. + */ + struct format_instance_ctx { uint32_t type; union { |