summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2017-07-27 16:25:14 -0500
committerDavid Teigland <teigland@redhat.com>2017-10-16 11:17:12 -0500
commit6c7ee16a6c2f576ec41ae7c8d1afe11d87420cb5 (patch)
tree0c2c497a2155987b493c2e0a17916f9829184718
parentb3fd15930e1cb109fe5e615b11de7774e825345f (diff)
downloadlvm2-6c7ee16a6c2f576ec41ae7c8d1afe11d87420cb5.tar.gz
labels: avoid metadata area read using async read data
Copy the metadata out of the initial async read buffer instead of performing another two synchronous reads (first to check vgname, second to read all metadata.)
-rw-r--r--lib/config/config.c18
-rw-r--r--lib/config/config.h2
-rw-r--r--lib/format_text/format-text.c22
-rw-r--r--lib/format_text/import-export.h3
-rw-r--r--lib/format_text/import.c36
-rw-r--r--lib/format_text/layout.h3
-rw-r--r--lib/format_text/text_label.c19
-rw-r--r--lib/label/label.c51
-rw-r--r--lib/label/label.h8
9 files changed, 116 insertions, 46 deletions
diff --git a/lib/config/config.c b/lib/config/config.c
index 1f1896922..9209771f7 100644
--- a/lib/config/config.c
+++ b/lib/config/config.c
@@ -494,7 +494,7 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
* and function avoids parsing of mda into config tree which
* remains unmodified and should not be used.
*/
-int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
+int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async,
off_t offset, size_t size, off_t offset2, size_t size2,
checksum_fn_t checksum_fn, uint32_t checksum,
int checksum_only, int no_dup_node_check)
@@ -517,7 +517,18 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
if (!(dev->flags & DEV_REGULAR) || size2)
use_mmap = 0;
- if (use_mmap) {
+ if (buf_async) {
+ if (!(buf = dm_malloc(size + size2))) {
+ log_error("Failed to allocate circular buffer.");
+ return 0;
+ }
+
+ memcpy(buf, buf_async + offset, size);
+ if (size2)
+ memcpy(buf + size, buf_async + offset2, size2);
+
+ fb = buf;
+ } else if (use_mmap) {
mmap_offset = offset % lvm_getpagesize();
/* memory map the file */
fb = mmap((caddr_t) 0, size + mmap_offset, PROT_READ,
@@ -532,6 +543,7 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
log_error("Failed to allocate circular buffer.");
return 0;
}
+
if (!dev_read_circular(dev, (uint64_t) offset, size,
(uint64_t) offset2, size2, buf)) {
goto out;
@@ -601,7 +613,7 @@ int config_file_read(struct dm_config_tree *cft)
}
}
- r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0,
+ r = config_file_read_fd(cft, cf->dev, NULL, 0, (size_t) info.st_size, 0, 0,
(checksum_fn_t) NULL, 0, 0, 0);
if (!cf->keep_open) {
diff --git a/lib/config/config.h b/lib/config/config.h
index 901994ae4..291eed71c 100644
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -239,7 +239,7 @@ config_source_t config_get_source_type(struct dm_config_tree *cft);
typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size);
struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
-int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
+int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async,
off_t offset, size_t size, off_t offset2, size_t size2,
checksum_fn_t checksum_fn, uint32_t checksum,
int skip_parse, int no_dup_node_check);
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
index 123489749..e35fa09b2 100644
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -1166,8 +1166,9 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)
return 1;
}
-int vgname_from_mda(const struct format_type *fmt,
- struct mda_header *mdah, struct device_area *dev_area,
+int read_metadata_location(const struct format_type *fmt,
+ struct mda_header *mdah, struct label_read_data *ld,
+ struct device_area *dev_area,
struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors)
{
struct raw_locn *rlocn;
@@ -1181,7 +1182,7 @@ int vgname_from_mda(const struct format_type *fmt,
*mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT;
if (!mdah) {
- log_error(INTERNAL_ERROR "vgname_from_mda called with NULL pointer for mda_header");
+ log_error(INTERNAL_ERROR "read_metadata_location called with NULL pointer for mda_header");
return 0;
}
@@ -1198,9 +1199,12 @@ int vgname_from_mda(const struct format_type *fmt,
}
/* Do quick check for a vgname */
- if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset,
- NAME_LEN, buf))
- return_0;
+ if (!ld || (ld->buf_len < dev_area->start + rlocn->offset + NAME_LEN)) {
+ if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, buf))
+ return_0;
+ } else {
+ memcpy(buf, ld->buf + dev_area->start + rlocn->offset, NAME_LEN);
+ }
while (buf[len] && !isspace(buf[len]) && buf[len] != '{' &&
len < (NAME_LEN - 1))
@@ -1230,7 +1234,7 @@ int vgname_from_mda(const struct format_type *fmt,
used_cached_metadata = 1;
/* FIXME 64-bit */
- if (!text_vgsummary_import(fmt, dev_area->dev,
+ if (!text_read_metadata_summary(fmt, dev_area->dev, ld,
(off_t) (dev_area->start + rlocn->offset),
(uint32_t) (rlocn->size - wrap),
(off_t) (dev_area->start + MDA_HEADER_SIZE),
@@ -1292,8 +1296,8 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu
goto close_dev;
}
- /* TODO: caching as in vgname_from_mda() (trigger this code?) */
- if (vgname_from_mda(fmt, mdah, &rl->dev_area, &vgsummary, NULL)) {
+ /* TODO: caching as in read_metadata_location() (trigger this code?) */
+ if (read_metadata_location(fmt, mdah, NULL, &rl->dev_area, &vgsummary, NULL)) {
vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0);
if (vg)
lvmcache_update_vg(vg, 0);
diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h
index 2f39e2a4e..de6bcf7a6 100644
--- a/lib/format_text/import-export.h
+++ b/lib/format_text/import-export.h
@@ -83,8 +83,9 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
uint32_t checksum,
time_t *when, char **desc);
-int text_vgsummary_import(const struct format_type *fmt,
+int text_read_metadata_summary(const struct format_type *fmt,
struct device *dev,
+ struct label_read_data *ld,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
diff --git a/lib/format_text/import.c b/lib/format_text/import.c
index 10d0b08a5..a977763a7 100644
--- a/lib/format_text/import.c
+++ b/lib/format_text/import.c
@@ -35,8 +35,9 @@ static void _init_text_import(void)
/*
* Find out vgname on a given device.
*/
-int text_vgsummary_import(const struct format_type *fmt,
+int text_read_metadata_summary(const struct format_type *fmt,
struct device *dev,
+ struct label_read_data *ld,
off_t offset, uint32_t size,
off_t offset2, uint32_t size2,
checksum_fn_t checksum_fn,
@@ -45,19 +46,40 @@ int text_vgsummary_import(const struct format_type *fmt,
{
struct dm_config_tree *cft;
struct text_vg_version_ops **vsn;
+ char *buf_async = NULL;
int r = 0;
+ if (ld) {
+ if (ld->buf_len >= (offset + size))
+ buf_async = ld->buf;
+ else {
+ /*
+ * Needs data beyond the end of the async read buffer.
+ * Will do a new synchronous read to get the data.
+ * (ASYNC_SCAN_SIZE could also be made larger.)
+ */
+ log_debug_metadata("async read buffer for %s too small %u for metadata offset %llu size %u",
+ dev_name(dev), ld->buf_len, (unsigned long long)offset, size);
+ buf_async = NULL;
+ }
+ }
+
_init_text_import();
if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0)))
return_0;
if (dev) {
- log_debug_metadata("Reading metadata from %s at %llu size %d (+%d)",
- dev_name(dev), (unsigned long long)offset,
- size, size2);
-
- if (!config_file_read_fd(cft, dev, offset, size,
+ if (buf_async)
+ log_debug_metadata("Copying metadata for %s at %llu size %d (+%d)",
+ dev_name(dev), (unsigned long long)offset,
+ size, size2);
+ else
+ log_debug_metadata("Reading metadata from %s at %llu size %d (+%d)",
+ dev_name(dev), (unsigned long long)offset,
+ size, size2);
+
+ if (!config_file_read_fd(cft, dev, buf_async, offset, size,
offset2, size2, checksum_fn,
vgsummary->mda_checksum,
checksum_only, 1)) {
@@ -138,7 +160,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
((*vg_fmtdata)->cached_mda_size == (size + size2));
if ((!dev && !config_file_read(cft)) ||
- (dev && !config_file_read_fd(cft, dev, offset, size,
+ (dev && !config_file_read_fd(cft, dev, NULL, offset, size,
offset2, size2, checksum_fn, checksum,
skip_parse, 1)))
goto_out;
diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h
index a5ddecb46..744d170f2 100644
--- a/lib/format_text/layout.h
+++ b/lib/format_text/layout.h
@@ -104,7 +104,8 @@ struct mda_context {
#define LVM2_LABEL "LVM2 001"
#define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize())
-int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah,
+int read_metadata_location(const struct format_type *fmt, struct mda_header *mdah,
+ struct label_read_data *ld,
struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary,
uint64_t *mda_free_sectors);
diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c
index b175f1b61..0d03d7b2b 100644
--- a/lib/format_text/text_label.c
+++ b/lib/format_text/text_label.c
@@ -308,15 +308,15 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)),
return 1;
}
-struct _update_mda_baton {
+struct _mda_baton {
struct lvmcache_info *info;
struct label *label;
struct label_read_data *ld;
};
-static int _update_mda(struct metadata_area *mda, void *baton)
+static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton)
{
- struct _update_mda_baton *p = baton;
+ struct _mda_baton *p = baton;
const struct format_type *fmt = p->label->labeller->fmt;
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
struct mda_header *mdah;
@@ -351,13 +351,7 @@ static int _update_mda(struct metadata_area *mda, void *baton)
return 1;
}
- /*
- * FIXME: vgname_from_mda reads metadata from mda location;
- * pass it ld so it can copy the metadata from ld->buf and
- * avoid reading the dev.
- */
-
- if (vgname_from_mda(fmt, mdah, &mdac->area, &vgsummary,
+ if (read_metadata_location(fmt, mdah, p->ld, &mdac->area, &vgsummary,
&mdac->free_sectors) &&
!lvmcache_update_vgname_and_id(p->info, &vgsummary)) {
if (!dev_close(mdac->area.dev))
@@ -390,7 +384,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf,
struct disk_locn *dlocn_xl;
uint64_t offset;
uint32_t ext_version;
- struct _update_mda_baton baton;
+ struct _mda_baton baton;
/*
* PV header base
@@ -453,8 +447,7 @@ out:
baton.label = *label;
baton.ld = ld;
- if (!lvmcache_foreach_mda(info, _update_mda, &baton))
- return_0;
+ lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton);
lvmcache_make_valid(info);
diff --git a/lib/label/label.c b/lib/label/label.c
index 110ddf2a7..f371c3588 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -463,13 +463,30 @@ static int _label_read_async_start(struct cmd_context *cmd, io_context_t aio_ctx
iocb->u.c.offset = 0;
ret = io_submit(aio_ctx, 1, &iocb);
- if (ret < 0)
+
+ /*
+ * This means that the number of devices exceeded the number of events
+ * set up in io_setup().
+ */
+ if (ret == -EAGAIN) {
+ log_debug_devs("Reading label no aio event for %s", dev_name(ld->dev));
return 0;
+ }
+
+ if (ret < 0) {
+ log_debug_devs("Reading label aio submit error %d for %s", ret, dev_name(ld->dev));
+ return 0;
+ }
return 1;
}
-#define MAX_GET_EVENTS 8
+/*
+ * We'll collect the results of this many async reads
+ * in one system call. It shouldn't matter much what
+ * number is used here.
+ */
+#define MAX_GET_EVENTS 16
/*
* Reap aio reads from devices.
@@ -556,6 +573,15 @@ static int _label_read_async_process(struct cmd_context *cmd, struct label_read_
}
/*
+ * The number of events to use in io_setup(),
+ * which is the limit on the number of concurrent
+ * async i/o's we can submit. After all these are
+ * used, io_submit() returns -EAGAIN, and we revert
+ * to doing synchronous io.
+ */
+#define MAX_ASYNC_EVENTS 1024
+
+/*
* label_scan iterates over all visible devices, looking
* for any that belong to lvm, and fills lvmcache with
* basic info about them. It's main job is to prepare
@@ -593,25 +619,30 @@ int label_scan_async(struct cmd_context *cmd)
*
* This data is meant to big large enough to cover all the
* headers and metadata that need to be read from the device
- * during the label scan:
+ * during the label scan for most common cases.
*
* 1. one of the first four sectors holds:
* label_header, pv_header, pv_header_extention
*
- * 2. the mda_header whose location is found from 1,
- * (is typically at 4096.)
+ * 2. the mda_header whose location is found from 1.
+ *
+ * 3. the metadata whose location is from found 2.
*
- * 3. the metadata whose location is from found 2,
- * (is typically at 16896.)
+ * If during processing, metadata needs to be read in a region
+ * beyond this buffer, then the code will revert do doing a
+ * synchronous read of the data it needs.
*/
buf_len = ASYNC_SCAN_SIZE;
+ memset(&aio_ctx, 0, sizeof(io_context_t));
+
/*
* if aio setup fails, caller will revert to sync scan
+ * The number of events set up here is the max number of
+ * concurrent async reads that can be submitted. After
+ * all of those are used, we revert to synchronous reads.
*/
- memset(&aio_ctx, 0, sizeof(io_context_t));
-
- error = io_setup(128, &aio_ctx);
+ error = io_setup(MAX_ASYNC_EVENTS, &aio_ctx);
if (error < 0) {
log_debug_devs("async io setup error %d, reverting to sync io.", error);
return_0;
diff --git a/lib/label/label.h b/lib/label/label.h
index 798d457ba..ebd0afb44 100644
--- a/lib/label/label.h
+++ b/lib/label/label.h
@@ -31,7 +31,13 @@ struct labeller;
void allow_reads_with_lvmetad(void);
-#define ASYNC_SCAN_SIZE (32 * 1024)
+/*
+ * This is the amount of data read from each device
+ * at the start of label scan. It's meant to be big
+ * enough to cover all the headers and metadata that
+ * need to be read during label scan for common cases.
+ */
+#define ASYNC_SCAN_SIZE (128 * 1024)
struct label_read_data {
char *buf; /* ASYNC_SCAN_SIZE aligned memory buffer */