From 055d3ecec1d9c449a8586860f570433a110f0125 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 6 Sep 2019 16:20:00 -0500 Subject: io-manager: use from lvm --- include/Makefile.in | 2 +- lib/cache/lvmcache.c | 3 +- lib/device/dev-io.c | 125 ++----- lib/device/dev-luks.c | 2 +- lib/device/dev-md.c | 2 +- lib/device/dev-swap.c | 2 +- lib/device/dev-type.c | 3 +- lib/device/device.h | 10 +- lib/device/io-manager.c | 27 +- lib/filters/filter-signature.c | 2 +- lib/format_text/format-text.c | 16 +- lib/label/hints.c | 1 - lib/label/label.c | 754 ++++++++++++++--------------------------- lib/label/label.h | 12 +- lib/metadata/mirror.c | 4 - tools/pvck.c | 6 +- 16 files changed, 329 insertions(+), 642 deletions(-) diff --git a/include/Makefile.in b/include/Makefile.in index d6c30d15b..b82dc1667 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -24,7 +24,7 @@ CLEAN_TARGETS += \ .symlinks_created \ activate.h \ archiver.h \ - bcache.h \ + io-manager.h \ btree.h \ clvm.h \ config-util.h \ diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 29d6446a6..e0a7b0764 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -913,7 +913,8 @@ int lvmcache_label_scan(struct cmd_context *cmd) * an info struct already exists in lvmcache for * the device. */ - label_scan(cmd); + if (!label_scan(cmd)) + return_0; /* * _choose_duplicates() returns: diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c index dd65c3d2e..6c6600672 100644 --- a/lib/device/dev-io.c +++ b/lib/device/dev-io.c @@ -58,13 +58,6 @@ static int _dev_get_size_file(struct device *dev, uint64_t *size) const char *name = dev_name(dev); struct stat info; - if (dev->size_seqno == _dev_size_seqno) { - log_very_verbose("%s: using cached size %" PRIu64 " sectors", - name, dev->size); - *size = dev->size; - return 1; - } - if (stat(name, &info)) { log_sys_error("stat", name); return 0; @@ -73,52 +66,12 @@ static int _dev_get_size_file(struct device *dev, uint64_t *size) *size = info.st_size; *size >>= SECTOR_SHIFT; /* Convert to sectors */ dev->size = *size; - dev->size_seqno = _dev_size_seqno; log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size); return 1; } -static int _dev_get_size_dev(struct device *dev, uint64_t *size) -{ - const char *name = dev_name(dev); - int fd = dev->bcache_fd; - int do_close = 0; - - if (dev->size_seqno == _dev_size_seqno) { - log_very_verbose("%s: using cached size %" PRIu64 " sectors", - name, dev->size); - *size = dev->size; - return 1; - } - - if (fd <= 0) { - if (!dev_open_readonly(dev)) - return_0; - fd = dev_fd(dev); - do_close = 1; - } - - if (ioctl(fd, BLKGETSIZE64, size) < 0) { - log_sys_error("ioctl BLKGETSIZE64", name); - if (do_close && !dev_close_immediate(dev)) - log_sys_error("close", name); - return 0; - } - - *size >>= BLKSIZE_SHIFT; /* Convert to sectors */ - dev->size = *size; - dev->size_seqno = _dev_size_seqno; - - log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size); - - if (do_close && !dev_close_immediate(dev)) - log_sys_error("close", name); - - return 1; -} - static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead) { long read_ahead_long; @@ -182,55 +135,24 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64 int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size, unsigned int *logical_block_size) { - int fd = dev->bcache_fd; - int do_close = 0; - unsigned int pbs = 0; - unsigned int lbs = 0; - - if (dev->physical_block_size || dev->logical_block_size) { - *physical_block_size = dev->physical_block_size; - *logical_block_size = dev->logical_block_size; - return 1; - } + int put = !dev->iodev; + int ret = 1; - if (fd <= 0) { - if (!dev_open_readonly(dev)) - return 0; - fd = dev_fd(dev); - do_close = 1; - } + if (!dev->iodev) + dev->iodev = io_get_dev(lvm_iom, dev_name(dev), EF_READ_ONLY); -#ifdef BLKPBSZGET /* not defined before kernel version 2.6.32 (e.g. rhel5) */ - /* - * BLKPBSZGET from kernel comment for blk_queue_physical_block_size: - * "the lowest possible sector size that the hardware can operate on - * without reverting to read-modify-write operations" - */ - if (ioctl(fd, BLKPBSZGET, &pbs)) { - stack; - pbs = 0; - } -#endif - - /* - * BLKSSZGET from kernel comment for blk_queue_logical_block_size: - * "the lowest possible block size that the storage device can address." - */ - if (ioctl(fd, BLKSSZGET, &lbs)) { - stack; - lbs = 0; - } - - dev->physical_block_size = pbs; - dev->logical_block_size = lbs; + if (!dev->iodev) + return 1; - *physical_block_size = pbs; - *logical_block_size = lbs; + if (!io_dev_block_sizes(dev->iodev, physical_block_size, logical_block_size)) + ret = 0; - if (do_close && !dev_close_immediate(dev)) - stack; + if (put) { + io_put_dev(dev->iodev); + dev->iodev = NULL; + } - return 1; + return ret; } /*----------------------------------------------------------------- @@ -243,13 +165,32 @@ void dev_size_seqno_inc(void) int dev_get_size(struct device *dev, uint64_t *size) { + int put = !dev->iodev; + int ret = 1; + if (!dev) return 0; if ((dev->flags & DEV_REGULAR)) return _dev_get_size_file(dev, size); - return _dev_get_size_dev(dev, size); + if (!dev->iodev) + dev->iodev = io_get_dev(lvm_iom, dev_name(dev), EF_READ_ONLY); + + if (!dev->iodev) + return 0; + + if (!io_dev_size(dev->iodev, size)) + ret = 0; + else + dev->size = *size; + + if (put) { + io_put_dev(dev->iodev); + dev->iodev = NULL; + } + + return ret; } int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead) diff --git a/lib/device/dev-luks.c b/lib/device/dev-luks.c index 344a55432..30b6745a2 100644 --- a/lib/device/dev-luks.c +++ b/lib/device/dev-luks.c @@ -23,7 +23,7 @@ int dev_is_luks(struct device *dev, uint64_t *offset_found, int full) char buf[LUKS_SIGNATURE_SIZE]; int ret = -1; - if (!scan_bcache) + if (!io_data_ready) return -EAGAIN; if (offset_found) diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c index 9d0a36363..9b47746a4 100644 --- a/lib/device/dev-md.c +++ b/lib/device/dev-md.c @@ -134,7 +134,7 @@ static int _native_dev_is_md_component(struct device *dev, uint64_t *offset_foun uint64_t size, sb_offset; int ret; - if (!scan_bcache) + if (!io_data_ready) return -EAGAIN; if (!dev_get_size(dev, &size)) { diff --git a/lib/device/dev-swap.c b/lib/device/dev-swap.c index 564bb58ec..c6a500f4a 100644 --- a/lib/device/dev-swap.c +++ b/lib/device/dev-swap.c @@ -42,7 +42,7 @@ int dev_is_swap(struct device *dev, uint64_t *offset_found, int full) unsigned page; int ret = 0; - if (!scan_bcache) + if (!io_data_ready) return -EAGAIN; if (!dev_get_size(dev, &size)) { diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index ebe0f99df..d352e3340 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -19,7 +19,6 @@ #include "lib/mm/xlate.h" #include "lib/config/config.h" #include "lib/metadata/metadata.h" -#include "lib/device/bcache.h" #include "lib/label/label.h" #ifdef BLKID_WIPING_SUPPORT @@ -512,7 +511,7 @@ static int _native_dev_is_partitioned(struct dev_types *dt, struct device *dev) { int r; - if (!scan_bcache) + if (!io_data_ready) return -EAGAIN; if (!_is_partitionable(dt, dev)) diff --git a/lib/device/device.h b/lib/device/device.h index bd3b35557..ab59a58e4 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -30,11 +30,8 @@ #define DEV_USED_FOR_LV 0x00000100 /* Is device used for an LV */ #define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */ #define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */ -#define DEV_IN_BCACHE 0x00000800 /* dev fd is open and used in bcache */ -#define DEV_BCACHE_EXCL 0x00001000 /* bcache_fd should be open EXCL */ #define DEV_FILTER_AFTER_SCAN 0x00002000 /* apply filter after bcache has data */ #define DEV_FILTER_OUT_SCAN 0x00004000 /* filtered out during label scan */ -#define DEV_BCACHE_WRITE 0x00008000 /* bcache_fd is open with RDWR */ #define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */ #define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */ #define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */ @@ -64,15 +61,14 @@ struct device { struct dm_list aliases; /* struct dm_str_list */ dev_t dev; + struct io_dev *iodev; /* from io_get_dev() */ + unsigned iom_flags; /* last used in io_get_dev() */ + /* private */ int fd; int open_count; - int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */ - int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */ int read_ahead; - int bcache_fd; uint32_t flags; - unsigned size_seqno; uint64_t size; uint64_t end; struct dev_ext ext; diff --git a/lib/device/io-manager.c b/lib/device/io-manager.c index e903b224d..1fd6372de 100644 --- a/lib/device/io-manager.c +++ b/lib/device/io-manager.c @@ -246,6 +246,18 @@ static bool _fallback_issue(struct async_engine *ae, enum dir d, int fd, return false; } +#if 1 + if (d == DIR_READ) { + log_debug("io R off %llu bytes %llu", + (unsigned long long)where, + (unsigned long long)len); + } else { + log_debug("io W off %llu bytes %llu", + (unsigned long long)where, + (unsigned long long)len); + } +#endif + while (len) { do { if (d == DIR_READ) @@ -256,7 +268,8 @@ static bool _fallback_issue(struct async_engine *ae, enum dir d, int fd, } while ((r < 0) && ((r == EINTR) || (r == EAGAIN))); if (r < 0) { - log_warn("io failed %d", r); + log_warn("%s failed fd %d %d errno %d", + (d == DIR_READ) ? "read" : "write", fd, r, errno); return false; } @@ -296,6 +309,18 @@ static bool _async_issue_(struct async_engine *e, enum dir d, int fd, cb->cb.u.c.nbytes = (se - sb) << SECTOR_SHIFT; cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE; +#if 1 + if (d == DIR_READ) { + log_debug("io RA off %llu bytes %llu", + (unsigned long long)cb->cb.u.c.offset, + (unsigned long long)cb->cb.u.c.nbytes); + } else { + log_debug("io WA off %llu bytes %llu", + (unsigned long long)cb->cb.u.c.offset, + (unsigned long long)cb->cb.u.c.nbytes); + } +#endif + cb_array[0] = &cb->cb; do { r = io_submit(e->aio_context, 1, cb_array); diff --git a/lib/filters/filter-signature.c b/lib/filters/filter-signature.c index 6a81203a9..bc9c70eba 100644 --- a/lib/filters/filter-signature.c +++ b/lib/filters/filter-signature.c @@ -27,7 +27,7 @@ static int _ignore_signature(struct cmd_context *cmd, struct dev_filter *f __att char buf[BUFSIZE]; int ret = 0; - if (!scan_bcache) { + if (!io_data_ready) { /* let pass, call again after scan */ log_debug_devs("filter signature deferred %s", dev_name(dev)); dev->flags |= DEV_FILTER_AFTER_SCAN; diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 2a5c8ece1..9ace66a52 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -247,14 +247,10 @@ static int _raw_write_mda_header(const struct format_type *fmt, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl))); - dev_set_last_byte(dev, start_byte + MDA_HEADER_SIZE); - if (!dev_write_bytes(dev, start_byte, MDA_HEADER_SIZE, mdah)) { - dev_unset_last_byte(dev); - log_error("Failed to write mda header to %s fd %d", dev_name(dev), dev->bcache_fd); + log_error("Failed to write mda header to %s", dev_name(dev)); return 0; } - dev_unset_last_byte(dev); return 1; } @@ -950,8 +946,6 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, goto out; } - dev_set_last_byte(mdac->area.dev, mda_start + mdah->size); - log_debug_metadata("VG %s %u metadata write at %llu size %llu (wrap %llu)", vg->name, vg->seqno, (unsigned long long)write1_start, @@ -959,8 +953,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, (unsigned long long)write2_size); if (!dev_write_bytes(mdac->area.dev, write1_start, (size_t)write1_size, write_buf)) { - log_error("Failed to write metadata to %s fd %d", devname, mdac->area.dev->bcache_fd); - dev_unset_last_byte(mdac->area.dev); + log_error("Failed to write metadata to %s", devname); goto out; } @@ -972,14 +965,11 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!dev_write_bytes(mdac->area.dev, write2_start, write2_size, write_buf + new_size - new_wrap)) { - log_error("Failed to write metadata wrap to %s fd %d", devname, mdac->area.dev->bcache_fd); - dev_unset_last_byte(mdac->area.dev); + log_error("Failed to write metadata wrap to %s", devname); goto out; } } - dev_unset_last_byte(mdac->area.dev); - rlocn_new->checksum = calc_crc(INITIAL_CRC, (uint8_t *)write_buf, (uint32_t)(new_size - new_wrap)); diff --git a/lib/label/hints.c b/lib/label/hints.c index 6510fcf74..b87ab317c 100644 --- a/lib/label/hints.c +++ b/lib/label/hints.c @@ -141,7 +141,6 @@ #include "lib/misc/crc.h" #include "lib/mm/xlate.h" #include "lib/cache/lvmcache.h" -#include "lib/device/bcache.h" #include "lib/commands/toolcontext.h" #include "lib/activate/activate.h" #include "lib/label/hints.h" diff --git a/lib/label/label.c b/lib/label/label.c index fe59394b0..7673d0347 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -19,7 +19,7 @@ #include "lib/misc/crc.h" #include "lib/mm/xlate.h" #include "lib/cache/lvmcache.h" -#include "lib/device/bcache.h" +#include "lib/device/io-manager.h" #include "lib/commands/toolcontext.h" #include "lib/activate/activate.h" #include "lib/label/hints.h" @@ -31,9 +31,73 @@ #include #include -/* FIXME Allow for larger labels? Restricted to single sector currently */ +int io_data_ready; -static uint64_t _current_bcache_size_bytes; +static uint64_t _current_io_size_bytes; + +static int _get_dev(struct device *dev, unsigned flags) +{ + if (dev->iodev) { + if (flags == dev->iom_flags) + return 1; + + /* currently writable, want to read */ + if ((flags & EF_READ_ONLY) && !dev->iom_flags) + return 1; + + /* currently excl, want to read */ + if ((flags & EF_READ_ONLY) && (dev->iom_flags & EF_EXCL)) + return 1; + + /* currently excl, want to write */ + if (!flags && (dev->iom_flags & EF_EXCL)) + return 1; + + /* currently readonly, want to write */ + if (!flags && (dev->iom_flags & EF_READ_ONLY)) { + log_print("dev reopen for writing %s", dev_name(dev)); + io_put_dev(dev->iodev); + dev->iom_flags = 0; + goto get; + } + + /* currently non-excl, want excl */ + if ((flags & EF_EXCL) && !(dev->iom_flags & EF_EXCL)) { + log_print("dev reopen excl %s", dev_name(dev)); + io_put_dev(dev->iodev); + dev->iom_flags = 0; + goto get; + } + + /* Can this happen? */ + log_print("dev reopen flags %x iom_flags %x %s", flags, dev->iom_flags, dev_name(dev)); + io_put_dev(dev->iodev); + dev->iom_flags = 0; + } +get: + dev->iodev = io_get_dev(lvm_iom, dev_name(dev), flags); + + if (!dev->iodev) { + log_error("No io device available %s", dev_name(dev)); + return 0; + } + + dev->iom_flags = flags; + + return 1; +} + +static void _put_dev(struct device *dev) +{ + if (!dev->iodev) { + log_error("put_dev no iodev %s", dev_name(dev)); + return; + } + + io_put_dev(dev->iodev); + dev->iodev = NULL; + dev->iom_flags = 0; +} /* * Internal labeller struct. @@ -119,7 +183,7 @@ int label_remove(struct device *dev) log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev)); - if (!label_scan_open_excl(dev)) { + if (!_get_dev(dev, EF_EXCL)) { log_error("Failed to open device %s", dev_name(dev)); return 0; } @@ -169,6 +233,7 @@ int label_remove(struct device *dev) } } + _put_dev(dev); return r; } @@ -207,22 +272,19 @@ int label_write(struct device *dev, struct label *label) PRIu32 ".", dev_name(dev), label->sector, xlate32(lh->offset_xl)); - if (!label_scan_open(dev)) { + if (!_get_dev(dev, 0)) { log_error("Failed to open device %s", dev_name(dev)); return 0; } offset = label->sector << SECTOR_SHIFT; - dev_set_last_byte(dev, offset + LABEL_SIZE); - if (!dev_write_bytes(dev, offset, LABEL_SIZE, buf)) { log_debug_devs("Failed to write label to %s", dev_name(dev)); r = 0; } - dev_unset_last_byte(dev); - + _put_dev(dev); return r; } @@ -249,17 +311,10 @@ struct label *label_create(struct labeller *labeller) } -/* global variable for accessing the bcache populated by label scan */ -struct bcache *scan_bcache; +/* global variable for accessing the io-manager populated by label scan */ +struct io_manager *lvm_iom; -#define BCACHE_BLOCK_SIZE_IN_SECTORS 256 /* 256*512 = 128K */ - -static bool _in_bcache(struct device *dev) -{ - if (!dev) - return NULL; - return (dev->flags & DEV_IN_BCACHE) ? true : false; -} +#define IOM_BLOCK_SIZE_IN_SECTORS 64 /* 64*512 = 32K */ static struct labeller *_find_lvm_header(struct device *dev, char *scan_buf, @@ -284,8 +339,8 @@ static struct labeller *_find_lvm_header(struct device *dev, sector += LABEL_SIZE >> SECTOR_SHIFT) { /* - * The scan_buf passed in is a bcache block, which is - * BCACHE_BLOCK_SIZE_IN_SECTORS large. So if start_sector is + * The scan_buf passed in is a iom block, which is + * IOM_BLOCK_SIZE_IN_SECTORS large. So if start_sector is * one of the last couple sectors in that buffer, we need to * break early. */ @@ -367,8 +422,8 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, /* * The device may have signatures that exclude it from being processed. - * If filters were applied before bcache data was available, some - * filters may have deferred their check until the point where bcache + * If filters were applied before iom data was available, some + * filters may have deferred their check until the point where iom * data had been read (here). They set this flag to indicate that the * filters should be retested now that data from the device is ready. */ @@ -401,7 +456,7 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, * FIXME: we don't need to copy one sector from bb->data into label_buf, * we can just point label_buf at one sector in ld->buf. */ - if (!(labeller = _find_lvm_header(dev, bb->data, BCACHE_BLOCK_SIZE_IN_SECTORS, label_buf, §or, block_sector, start_sector))) { + if (!(labeller = _find_lvm_header(dev, bb->data, IOM_BLOCK_SIZE_IN_SECTORS, label_buf, §or, block_sector, start_sector))) { /* * Non-PVs exit here @@ -461,165 +516,6 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, return ret; } -static int _scan_dev_open(struct device *dev) -{ - struct dm_list *name_list; - struct dm_str_list *name_sl; - const char *name; - struct stat sbuf; - int retried = 0; - int flags = 0; - int fd; - - if (!dev) - return 0; - - if (dev->flags & DEV_IN_BCACHE) { - /* Shouldn't happen */ - log_error("Device open %s has DEV_IN_BCACHE already set", dev_name(dev)); - dev->flags &= ~DEV_IN_BCACHE; - } - - if (dev->bcache_fd > 0) { - /* Shouldn't happen */ - log_error("Device open %s already open with fd %d", - dev_name(dev), dev->bcache_fd); - return 0; - } - - /* - * All the names for this device (major:minor) are kept on - * dev->aliases, the first one is the primary/preferred name. - */ - if (!(name_list = dm_list_first(&dev->aliases))) { - /* Shouldn't happen */ - log_error("Device open %s %d:%d has no path names.", - dev_name(dev), (int)MAJOR(dev->dev), (int)MINOR(dev->dev)); - return 0; - } - name_sl = dm_list_item(name_list, struct dm_str_list); - name = name_sl->str; - - flags |= O_DIRECT; - flags |= O_NOATIME; - - /* - * FIXME: udev is a train wreck when we open RDWR and close, so we - * need to only use RDWR when we actually need to write, and use - * RDONLY otherwise. Fix, disable or scrap udev nonsense so we can - * just open with RDWR by default. - */ - - if (dev->flags & DEV_BCACHE_EXCL) { - flags |= O_EXCL; - flags |= O_RDWR; - } else if (dev->flags & DEV_BCACHE_WRITE) { - flags |= O_RDWR; - } else { - flags |= O_RDONLY; - } - -retry_open: - - fd = open(name, flags, 0777); - - if (fd < 0) { - if ((errno == EBUSY) && (flags & O_EXCL)) { - log_error("Can't open %s exclusively. Mounted filesystem?", - dev_name(dev)); - } else { - int major, minor; - - /* - * Shouldn't happen, if it does, print stat info to help figure - * out what's wrong. - */ - - major = (int)MAJOR(dev->dev); - minor = (int)MINOR(dev->dev); - - log_error("Device open %s %d:%d failed errno %d", name, major, minor, errno); - - if (stat(name, &sbuf)) { - log_debug_devs("Device open %s %d:%d stat failed errno %d", - name, major, minor, errno); - } else if (sbuf.st_rdev != dev->dev) { - log_debug_devs("Device open %s %d:%d stat %d:%d does not match.", - name, major, minor, - (int)MAJOR(sbuf.st_rdev), (int)MINOR(sbuf.st_rdev)); - } - - if (!retried) { - /* - * FIXME: remove this, the theory for this retry is that - * there may be a udev race that we can sometimes mask by - * retrying. This is here until we can figure out if it's - * needed and if so fix the real problem. - */ - usleep(5000); - log_debug_devs("Device open %s retry", dev_name(dev)); - retried = 1; - goto retry_open; - } - } - return 0; - } - - dev->flags |= DEV_IN_BCACHE; - dev->bcache_fd = fd; - return 1; -} - -static int _scan_dev_close(struct device *dev) -{ - if (!(dev->flags & DEV_IN_BCACHE)) - log_error("scan_dev_close %s no DEV_IN_BCACHE set", dev_name(dev)); - - dev->flags &= ~DEV_IN_BCACHE; - dev->flags &= ~DEV_BCACHE_EXCL; - - if (dev->bcache_fd < 0) { - log_error("scan_dev_close %s already closed", dev_name(dev)); - return 0; - } - - if (close(dev->bcache_fd)) - log_warn("close %s errno %d", dev_name(dev), errno); - dev->bcache_fd = -1; - return 1; -} - -static void _drop_bad_aliases(struct device *dev) -{ - struct dm_str_list *strl, *strl2; - const char *name; - struct stat sbuf; - int major = (int)MAJOR(dev->dev); - int minor = (int)MINOR(dev->dev); - int bad; - - dm_list_iterate_items_safe(strl, strl2, &dev->aliases) { - name = strl->str; - bad = 0; - - if (stat(name, &sbuf)) { - bad = 1; - log_debug_devs("Device path check %d:%d %s stat failed errno %d", - major, minor, name, errno); - } else if (sbuf.st_rdev != dev->dev) { - bad = 1; - log_debug_devs("Device path check %d:%d %s stat %d:%d does not match.", - major, minor, name, - (int)MAJOR(sbuf.st_rdev), (int)MINOR(sbuf.st_rdev)); - } - - if (bad) { - log_debug_devs("Device path check %d:%d dropping path %s.", major, minor, name); - dev_cache_failed_path(dev, name); - } - } -} - /* * Read or reread label/metadata from selected devs. * @@ -639,7 +535,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, struct dm_list reopen_devs; struct device_list *devl, *devl2; struct block *bb; - int retried_open = 0; int scan_read_errors = 0; int scan_process_errors = 0; int scan_failed_count = 0; @@ -657,7 +552,7 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, log_debug_devs("Scanning %d devices for VG info", dm_list_size(devs)); scan_more: - rem_prefetches = bcache_max_prefetches(scan_bcache); + rem_prefetches = io_max_prefetches(lvm_iom); submit_count = 0; dm_list_iterate_items_safe(devl, devl2, devs) { @@ -672,16 +567,17 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, if (!rem_prefetches) break; - if (!_in_bcache(devl->dev)) { - if (!_scan_dev_open(devl->dev)) { - log_debug_devs("Scan failed to open %s.", dev_name(devl->dev)); - dm_list_del(&devl->list); - dm_list_add(&reopen_devs, &devl->list); - continue; - } - } + if (!_get_dev(devl->dev, EF_READ_ONLY)) + break; - bcache_prefetch(scan_bcache, devl->dev->bcache_fd, 0); + /* + * Prefetch the first block of the disk which holds the label + * and pv header, the mda header, and some the metadata text + * (if current metadata text is further into the metadata area, + * it will not be in this block and will require reading another + * block or more later.) + */ + io_prefetch_block(lvm_iom, devl->dev->iodev, 0); rem_prefetches--; submit_count++; @@ -698,18 +594,18 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, scan_failed = 0; is_lvm_device = 0; - if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) { + if (!io_get_block(lvm_iom, devl->dev->iodev, 0, 0, &bb)) { log_debug_devs("Scan failed to read %s error %d.", dev_name(devl->dev), error); scan_failed = 1; scan_read_errors++; scan_failed_count++; lvmcache_del_dev(devl->dev); } else { - log_debug_devs("Processing data from device %s %d:%d fd %d block %p", + log_debug_devs("Processing data from device %s %d:%d block %p", dev_name(devl->dev), (int)MAJOR(devl->dev->dev), (int)MINOR(devl->dev->dev), - devl->dev->bcache_fd, bb); + bb); ret = _process_block(cmd, f, devl->dev, bb, 0, 0, &is_lvm_device); @@ -722,18 +618,27 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, } if (bb) - bcache_put(bb); + io_put_block(bb); /* - * Keep the bcache block of lvm devices we have processed so - * that the vg_read phase can reuse it. If bcache failed to - * read the block, or the device does not belong to lvm, then - * drop it from bcache. + * If iom failed to read the block, or the device does not + * belong to lvm, then drop it from iom. */ - if (scan_failed || !is_lvm_device) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); - _scan_dev_close(devl->dev); - } + if (scan_failed || !is_lvm_device) + io_invalidate_dev(lvm_iom, devl->dev->iodev); + + /* + * Allow io manager to drop this dev (close the fd and + * invalidate the cached block) if needed due to a full cache. + * If the common case, the cache should not be full and fds do + * not reach the max, so the dev will remain open in iomanager + * and the block we've read will remain cached, and when + * vg_read() comes to reading the metadata again, no new open + * or read will be needed. In the uncommon case, vg_read() + * will trigger a new open() and rereading the data from disk. + */ + io_put_dev(devl->dev->iodev); + devl->dev->iodev = NULL; dm_list_del(&devl->list); dm_list_add(&done_devs, &devl->list); @@ -742,51 +647,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, if (!dm_list_empty(devs)) goto scan_more; - /* - * We're done scanning all the devs. If we failed to open any of them - * the first time through, refresh device paths and retry. We failed - * to open the devs on the reopen_devs list. - * - * FIXME: it's not clear if or why this helps. - */ - if (!dm_list_empty(&reopen_devs)) { - if (retried_open) { - /* Don't try again. */ - scan_failed_count += dm_list_size(&reopen_devs); - dm_list_splice(&done_devs, &reopen_devs); - goto out; - } - retried_open = 1; - - dm_list_iterate_items_safe(devl, devl2, &reopen_devs) { - _drop_bad_aliases(devl->dev); - - if (dm_list_empty(&devl->dev->aliases)) { - log_warn("WARNING: Scan ignoring device %d:%d with no paths.", - (int)MAJOR(devl->dev->dev), - (int)MINOR(devl->dev->dev)); - - dm_list_del(&devl->list); - lvmcache_del_dev(devl->dev); - scan_failed_count++; - } - } - - /* - * This will search the system's /dev for new path names and - * could help us reopen the device if it finds a new preferred - * path name for this dev's major:minor. It does that by - * inserting a new preferred path name on dev->aliases. open - * uses the first name from that list. - */ - log_debug_devs("Scanning refreshing device paths."); - dev_cache_scan(); - - /* Put devs that failed to open back on the original list to retry. */ - dm_list_splice(devs, &reopen_devs); - goto scan_more; - } -out: log_debug_devs("Scanned devices: read errors %d process errors %d failed %d", scan_read_errors, scan_process_errors, scan_failed_count); @@ -800,50 +660,52 @@ out: /* * We don't know ahead of time if we will find some VG metadata - * that is larger than the total size of the bcache, which would + * that is larger than the total size of the iom, which would * prevent us from reading/writing the VG since we do not dynamically - * increase the bcache size when we find it's too small. In these + * increase the iom size when we find it's too small. In these * cases the user would need to set io_memory_size to be larger * than the max VG metadata size (lvm does not impose any limit on * the metadata size.) */ -#define MIN_BCACHE_BLOCKS 32 /* 4MB (32 * 128KB) */ -#define MAX_BCACHE_BLOCKS 4096 /* 512MB (4096 * 128KB) */ +#define MIN_IOM_BLOCKS 32 /* 4MB (128 * 32KB) */ +#define MAX_IOM_BLOCKS 16384 /* 512MB (16384 * 32KB) */ -static int _setup_bcache(void) +#define IOM_MAX_DEVS 4096 + +static int _setup_io_manager(void) { - struct io_engine_ *ioe = NULL; + struct io_engine *ioe = NULL; int iomem_kb = io_memory_size(); - int block_size_kb = (BCACHE_BLOCK_SIZE_IN_SECTORS * 512) / 1024; + int block_size_kb = (IOM_BLOCK_SIZE_IN_SECTORS * 512) / 1024; int cache_blocks; cache_blocks = iomem_kb / block_size_kb; - if (cache_blocks < MIN_BCACHE_BLOCKS) - cache_blocks = MIN_BCACHE_BLOCKS; + if (cache_blocks < MIN_IOM_BLOCKS) + cache_blocks = MIN_IOM_BLOCKS; - if (cache_blocks > MAX_BCACHE_BLOCKS) - cache_blocks = MAX_BCACHE_BLOCKS; + if (cache_blocks > MAX_IOM_BLOCKS) + cache_blocks = MAX_IOM_BLOCKS; - _current_bcache_size_bytes = cache_blocks * BCACHE_BLOCK_SIZE_IN_SECTORS * 512; + _current_io_size_bytes = cache_blocks * IOM_BLOCK_SIZE_IN_SECTORS * 512; if (use_aio()) { - if (!(ioe = create_async_io_engine_())) { + if (!(ioe = create_async_io_engine(true))) { log_warn("Failed to set up async io, using sync io."); init_use_aio(0); } } if (!ioe) { - if (!(ioe = create_sync_io_engine_())) { + if (!(ioe = create_sync_io_engine(true))) { log_error("Failed to set up sync io."); return 0; } } - if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) { - log_error("Failed to create bcache with %d cache blocks.", cache_blocks); + if (!(lvm_iom = io_manager_create(IOM_BLOCK_SIZE_IN_SECTORS, cache_blocks, IOM_MAX_DEVS, ioe))) { + log_error("Failed to create io-manager with %d cache blocks.", cache_blocks); return 0; } @@ -939,6 +801,14 @@ int label_scan(struct cmd_context *cmd) dm_list_init(&scan_devs); dm_list_init(&hints_list); + if (lvm_iom) + io_invalidate_all(lvm_iom); + + if (!lvm_iom) { + if (!_setup_io_manager()) + return 0; + } + /* * dev_cache_scan() creates a list of devices on the system * (saved in in dev-cache) which we can iterate through to @@ -990,23 +860,9 @@ int label_scan(struct cmd_context *cmd) continue; devl->dev = dev; dm_list_add(&all_devs, &devl->list); - - /* - * label_scan should not generally be called a second time, - * so this will usually not be true. - */ - if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - } }; dev_iter_destroy(iter); - if (!scan_bcache) { - if (!_setup_bcache()) - return 0; - } - /* * In some common cases we can avoid scanning all devices * by using hints which tell us which devices are PVs, which @@ -1039,35 +895,37 @@ int label_scan(struct cmd_context *cmd) */ _prepare_open_file_limit(cmd, dm_list_size(&scan_devs)); + io_data_ready = 1; + /* * Do the main scan. */ _scan_list(cmd, cmd->filter, &scan_devs, NULL); /* - * Metadata could be larger than total size of bcache, and bcache + * Metadata could be larger than total size of iom, and iom * cannot currently be resized during the command. If this is the * case (or within reach), warn that io_memory_size needs to be * set larger. * - * Even if bcache out of space did not cause a failure during scan, it + * Even if iom out of space did not cause a failure during scan, it * may cause a failure during the next vg_read phase or during vg_write. * - * If there was an error during scan, we could recreate bcache here + * If there was an error during scan, we could recreate iom here * with a larger size and then restart label_scan. But, this does not - * address the problem of writing new metadata that excedes the bcache + * address the problem of writing new metadata that excedes the iom * size and failing, which would often be hit first, i.e. we'll fail * to write new metadata exceding the max size before we have a chance * to read any metadata with that size, unless we find an existing vg * that has been previously created with the larger size. * - * If the largest metadata is within 1MB of the bcache size, then start + * If the largest metadata is within 1MB of the iom size, then start * warning. */ max_metadata_size_bytes = lvmcache_max_metadata_size(); - if (max_metadata_size_bytes + (1024 * 1024) > _current_bcache_size_bytes) { - /* we want bcache to be 1MB larger than the max metadata seen */ + if (max_metadata_size_bytes + (1024 * 1024) > _current_io_size_bytes) { + /* we want io-manager to be 1MB larger than the max metadata seen */ uint64_t want_size_kb = (max_metadata_size_bytes / 1024) + 1024; uint64_t remainder; if ((remainder = (want_size_kb % 1024))) @@ -1154,7 +1012,7 @@ int label_scan(struct cmd_context *cmd) * Scan and cache lvm data from the listed devices. If a device is already * scanned and cached, this replaces the previously cached lvm data for the * device. This is called when vg_read() wants to guarantee that it is using - * the latest data from the devices in the VG (since the scan populated bcache + * the latest data from the devices in the VG (since the scan populated iom * without a lock.) */ @@ -1162,16 +1020,16 @@ int label_scan_devs(struct cmd_context *cmd, struct dev_filter *f, struct dm_lis { struct device_list *devl; - if (!scan_bcache) { - if (!_setup_bcache()) + if (!lvm_iom) { + if (!_setup_io_manager()) return 0; } dm_list_iterate_items(devl, devs) { - if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); - _scan_dev_close(devl->dev); - } + if (!_get_dev(devl->dev, EF_READ_ONLY)) + continue; + + io_invalidate_dev(lvm_iom, devl->dev->iodev); } _scan_list(cmd, f, devs, NULL); @@ -1189,21 +1047,16 @@ int label_scan_devs_rw(struct cmd_context *cmd, struct dev_filter *f, struct dm_ { struct device_list *devl; - if (!scan_bcache) { - if (!_setup_bcache()) + if (!lvm_iom) { + if (!_setup_io_manager()) return 0; } dm_list_iterate_items(devl, devs) { - if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); - _scan_dev_close(devl->dev); - } - /* - * With this flag set, _scan_dev_open() done by - * _scan_list() will do open RW - */ - devl->dev->flags |= DEV_BCACHE_WRITE; + if (!_get_dev(devl->dev, 0)) + continue; + + io_invalidate_dev(lvm_iom, devl->dev->iodev); } _scan_list(cmd, f, devs, NULL); @@ -1217,15 +1070,10 @@ int label_scan_devs_excl(struct dm_list *devs) int failed = 0; dm_list_iterate_items(devl, devs) { - if (_in_bcache(devl->dev)) { - bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); - _scan_dev_close(devl->dev); - } - /* - * With this flag set, _scan_dev_open() done by - * _scan_list() will do open EXCL - */ - devl->dev->flags |= DEV_BCACHE_EXCL; + if (!_get_dev(devl->dev, EF_EXCL)) + continue; + + io_invalidate_dev(lvm_iom, devl->dev->iodev); } _scan_list(NULL, NULL, devs, &failed); @@ -1237,15 +1085,15 @@ int label_scan_devs_excl(struct dm_list *devs) void label_scan_invalidate(struct device *dev) { - if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - } + if (!_get_dev(dev, EF_READ_ONLY)) + return; + io_invalidate_dev(lvm_iom, dev->iodev); + _put_dev(dev); } /* * If a PV is stacked on an LV, then the LV is kept open - * in bcache, and needs to be closed so the open fd doesn't + * in iom, and needs to be closed so the open fd doesn't * interfere with processing the LV. */ @@ -1264,44 +1112,34 @@ void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv } /* - * Empty the bcache of all blocks and close all open fds, - * but keep the bcache set up. + * Empty the iom of all blocks and close all open fds, + * but keep the iom set up. */ void label_scan_drop(struct cmd_context *cmd) { - struct dev_iter *iter; - struct device *dev; - - if (!(iter = dev_iter_create(NULL, 0))) - return; - - while ((dev = dev_iter_get(cmd, iter))) { - if (_in_bcache(dev)) - _scan_dev_close(dev); - } - dev_iter_destroy(iter); + if (lvm_iom) + io_invalidate_all(lvm_iom); } /* - * Close devices that are open because bcache is holding blocks for them. - * Destroy the bcache. + * Close devices that are open because iom is holding blocks for them. + * Destroy the iom. */ void label_scan_destroy(struct cmd_context *cmd) { - if (!scan_bcache) + if (!lvm_iom) return; - label_scan_drop(cmd); - - bcache_destroy(scan_bcache); - scan_bcache = NULL; + io_invalidate_all(lvm_iom); + io_manager_destroy(lvm_iom); + lvm_iom = NULL; } /* * Read (or re-read) and process (or re-process) the data for a device. This - * will reset (clear and repopulate) the bcache and lvmcache info for this + * will reset (clear and repopulate) the iom and lvmcache info for this * device. There are only a couple odd places that want to reread a specific * device, this is not a commonly used function. */ @@ -1319,11 +1157,13 @@ int label_read(struct device *dev) dm_list_init(&one_dev); dm_list_add(&one_dev, &devl->list); - if (_in_bcache(dev)) { - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); + if (!_get_dev(dev, EF_READ_ONLY)) { + log_error("No io device available for %s", dev_name(devl->dev)); + return 0; } + io_invalidate_dev(lvm_iom, dev->iodev); + _scan_list(NULL, NULL, &one_dev, &failed); free(devl); @@ -1333,264 +1173,166 @@ int label_read(struct device *dev) return 1; } -int label_scan_setup_bcache(void) +int label_scan_setup_io_manager(void) { - if (!scan_bcache) { - if (!_setup_bcache()) + if (!lvm_iom) { + if (!_setup_io_manager()) return 0; } return 1; } -/* - * This is needed to write to a new non-lvm device. - * Scanning that dev would not keep it open or in - * bcache, but to use bcache_write we need the dev - * to be open so we can use dev->bcache_fd to write. - */ - +/* FIXME: probably not needed, read will do it */ int label_scan_open(struct device *dev) { - if (!_in_bcache(dev)) - return _scan_dev_open(dev); - return 1; + return _get_dev(dev, EF_READ_ONLY); } int label_scan_open_excl(struct device *dev) { - if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_EXCL)) { - /* FIXME: avoid tossing out bcache blocks just to replace fd. */ - log_debug("Close and reopen excl %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - } - dev->flags |= DEV_BCACHE_EXCL; - dev->flags |= DEV_BCACHE_WRITE; - return label_scan_open(dev); + return _get_dev(dev, EF_EXCL); } +/* FIXME: probably not needed, write will do it */ int label_scan_open_rw(struct device *dev) { - if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) { - /* FIXME: avoid tossing out bcache blocks just to replace fd. */ - log_debug("Close and reopen rw %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - } - dev->flags |= DEV_BCACHE_WRITE; - return label_scan_open(dev); + return _get_dev(dev, 0); } bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data) { - if (!scan_bcache) { + int put = !dev->iodev; + + if (!lvm_iom) { /* Should not happen */ - log_error("dev_read bcache not set up %s", dev_name(dev)); + log_error("dev_read io manager not set up %s", dev_name(dev)); return false; } - if (dev->bcache_fd <= 0) { - /* This is not often needed. */ - if (!label_scan_open(dev)) { - log_error("Error opening device %s for reading at %llu length %u.", - dev_name(dev), (unsigned long long)start, (uint32_t)len); - return false; - } - } + if (!_get_dev(dev, EF_READ_ONLY)) + return false; - if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + if (!io_read_bytes(lvm_iom, dev->iodev, start, len, data)) { log_error("Error reading device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } + + if (put) + _put_dev(dev); return true; } bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data) { + int put = !dev->iodev; + if (test_mode()) return true; - if (!scan_bcache) { + if (!lvm_iom) { /* Should not happen */ - log_error("dev_write bcache not set up %s", dev_name(dev)); + log_error("dev_write io manager not set up %s", dev_name(dev)); return false; } - if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) { - /* FIXME: avoid tossing out bcache blocks just to replace fd. */ - log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - - dev->flags |= DEV_BCACHE_WRITE; - label_scan_open(dev); - } - - if (dev->bcache_fd <= 0) { - /* This is not often needed. */ - dev->flags |= DEV_BCACHE_WRITE; - if (!label_scan_open(dev)) { - log_error("Error opening device %s for writing at %llu length %u.", - dev_name(dev), (unsigned long long)start, (uint32_t)len); - return false; - } - } + if (!_get_dev(dev, 0)) + return false; - if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + if (!io_write_bytes(lvm_iom, dev->iodev, start, len, data)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } - if (!bcache_flush(scan_bcache)) { + if (!io_flush(lvm_iom)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } + + if (put) + _put_dev(dev); return true; } bool dev_write_zeros(struct device *dev, uint64_t start, size_t len) { + int put = !dev->iodev; + if (test_mode()) return true; - if (!scan_bcache) { - log_error("dev_write_zeros bcache not set up %s", dev_name(dev)); + if (!lvm_iom) { + log_error("dev_write_zeros io manager not set up %s", dev_name(dev)); return false; } - if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) { - /* FIXME: avoid tossing out bcache blocks just to replace fd. */ - log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - - dev->flags |= DEV_BCACHE_WRITE; - label_scan_open(dev); - } - - if (dev->bcache_fd <= 0) { - /* This is not often needed. */ - dev->flags |= DEV_BCACHE_WRITE; - if (!label_scan_open(dev)) { - log_error("Error opening device %s for writing at %llu length %u.", - dev_name(dev), (unsigned long long)start, (uint32_t)len); - return false; - } - } - - dev_set_last_byte(dev, start + len); + if (!_get_dev(dev, 0)) + return false; - if (!bcache_zero_bytes(scan_bcache, dev->bcache_fd, start, len)) { + if (!io_zero_bytes(lvm_iom, dev->iodev, start, len)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - dev_unset_last_byte(dev); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } - if (!bcache_flush(scan_bcache)) { + if (!io_flush(lvm_iom)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - dev_unset_last_byte(dev); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } - dev_unset_last_byte(dev); + + if (put) + _put_dev(dev); return true; } bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val) { + int put = !dev->iodev; + if (test_mode()) return true; - if (!scan_bcache) { - log_error("dev_set_bytes bcache not set up %s", dev_name(dev)); + if (!lvm_iom) { + log_error("dev_set_bytes io manager not set up %s", dev_name(dev)); return false; } - if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) { - /* FIXME: avoid tossing out bcache blocks just to replace fd. */ - log_debug("Close and reopen to write %s", dev_name(dev)); - bcache_invalidate_fd(scan_bcache, dev->bcache_fd); - _scan_dev_close(dev); - /* goes to label_scan_open() since bcache_fd < 0 */ - } - - if (dev->bcache_fd <= 0) { - /* This is not often needed. */ - dev->flags |= DEV_BCACHE_WRITE; - if (!label_scan_open(dev)) { - log_error("Error opening device %s for writing at %llu length %u.", - dev_name(dev), (unsigned long long)start, (uint32_t)len); - return false; - } - } - - dev_set_last_byte(dev, start + len); + if (!_get_dev(dev, 0)) + return false; - if (!bcache_set_bytes(scan_bcache, dev->bcache_fd, start, len, val)) { + if (!io_set_bytes(lvm_iom, dev->iodev, start, len, val)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - dev_unset_last_byte(dev); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } - if (!bcache_flush(scan_bcache)) { + if (!io_flush(lvm_iom)) { log_error("Error writing device %s at %llu length %u.", dev_name(dev), (unsigned long long)start, (uint32_t)len); - dev_unset_last_byte(dev); - label_scan_invalidate(dev); + if (put) + _put_dev(dev); return false; } - dev_unset_last_byte(dev); + if (put) + _put_dev(dev); return true; } -void dev_set_last_byte(struct device *dev, uint64_t offset) -{ - unsigned int physical_block_size = 0; - unsigned int logical_block_size = 0; - unsigned int bs; - - if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) { - stack; - return; /* FIXME: error path ? */ - } - - if ((physical_block_size == 512) && (logical_block_size == 512)) - bs = 512; - else if ((physical_block_size == 4096) && (logical_block_size == 4096)) - bs = 4096; - else if ((physical_block_size == 512) || (logical_block_size == 512)) { - log_debug("Set last byte mixed block sizes physical %u logical %u using 512", - physical_block_size, logical_block_size); - bs = 512; - } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) { - log_debug("Set last byte mixed block sizes physical %u logical %u using 4096", - physical_block_size, logical_block_size); - bs = 4096; - } else { - log_debug("Set last byte mixed block sizes physical %u logical %u using 512", - physical_block_size, logical_block_size); - bs = 512; - } - - bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs); -} - -void dev_unset_last_byte(struct device *dev) -{ - bcache_unset_last_byte(scan_bcache, dev->bcache_fd); -} - diff --git a/lib/label/label.h b/lib/label/label.h index f06b7df63..b60f9b5e1 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -18,7 +18,7 @@ #include "lib/uuid/uuid.h" #include "lib/device/device.h" -#include "lib/device/bcache.h" +#include "lib/device/io-manager.h" #define LABEL_ID "LABELONE" #define LABEL_SIZE SECTOR_SIZE /* Think very carefully before changing this */ @@ -100,7 +100,8 @@ int label_write(struct device *dev, struct label *label); struct label *label_create(struct labeller *labeller); void label_destroy(struct label *label); -extern struct bcache *scan_bcache; +extern struct io_manager *lvm_iom; +extern int io_data_ready; /* set once io manager is populated with device blocks */ int label_scan(struct cmd_context *cmd); int label_scan_devs(struct cmd_context *cmd, struct dev_filter *f, struct dm_list *devs); @@ -113,20 +114,17 @@ void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev); int label_read_sector(struct device *dev, uint64_t scan_sector); void label_scan_confirm(struct device *dev); -int label_scan_setup_bcache(void); +int label_scan_setup_io_manager(void); int label_scan_open(struct device *dev); int label_scan_open_excl(struct device *dev); int label_scan_open_rw(struct device *dev); /* - * Wrappers around bcache equivalents. - * (these make it easier to disable bcache and revert to direct rw if needed) + * Wrappers around io-manager equivalents. */ bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data); bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data); bool dev_write_zeros(struct device *dev, uint64_t start, size_t len); bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val); -void dev_set_last_byte(struct device *dev, uint64_t offset); -void dev_unset_last_byte(struct device *dev); #endif diff --git a/lib/metadata/mirror.c b/lib/metadata/mirror.c index 845945728..73489a91e 100644 --- a/lib/metadata/mirror.c +++ b/lib/metadata/mirror.c @@ -302,14 +302,10 @@ static int _write_log_header(struct cmd_context *cmd, struct logical_volume *lv) return 0; } - dev_set_last_byte(dev, sizeof(log_header)); - if (!dev_write_bytes(dev, UINT64_C(0), sizeof(log_header), &log_header)) { - dev_unset_last_byte(dev); log_error("Failed to write log header to %s.", name); return 0; } - dev_unset_last_byte(dev); label_scan_invalidate(dev); diff --git a/tools/pvck.c b/tools/pvck.c index af2dd8e8a..a59240938 100644 --- a/tools/pvck.c +++ b/tools/pvck.c @@ -1126,7 +1126,7 @@ static int _dump_headers(struct cmd_context *cmd, return ECMD_FAILED; } - label_scan_setup_bcache(); + label_scan_setup_io_manager(); if (!_dump_label_and_pv_header(cmd, 1, dev, NULL, &mda1_offset, &mda1_size, &mda2_offset, &mda2_size)) @@ -1199,7 +1199,7 @@ static int _dump_metadata(struct cmd_context *cmd, return ECMD_FAILED; } - label_scan_setup_bcache(); + label_scan_setup_io_manager(); if (!_dump_label_and_pv_header(cmd, 0, dev, NULL, &mda1_offset, &mda1_size, &mda2_offset, &mda2_size)) @@ -1324,7 +1324,7 @@ int pvck(struct cmd_context *cmd, int argc, char **argv) if (arg_is_set(cmd, labelsector_ARG)) labelsector = arg_uint64_value(cmd, labelsector_ARG, UINT64_C(0)); - label_scan_setup_bcache(); + label_scan_setup_io_manager(); for (i = 0; i < argc; i++) { pv_name = argv[i]; -- cgit v1.2.1