summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2019-09-06 16:20:00 -0500
committerDavid Teigland <teigland@redhat.com>2019-09-06 16:20:00 -0500
commit055d3ecec1d9c449a8586860f570433a110f0125 (patch)
tree6b68e683bb9245e41cf71e06c5643731994ffeb6
parent13c702f862008192a36ee171e546ba0d247bcf9f (diff)
downloadlvm2-dev-dct-lvm-io-manager.tar.gz
io-manager: use from lvmdev-dct-lvm-io-manager
-rw-r--r--include/Makefile.in2
-rw-r--r--lib/cache/lvmcache.c3
-rw-r--r--lib/device/dev-io.c125
-rw-r--r--lib/device/dev-luks.c2
-rw-r--r--lib/device/dev-md.c2
-rw-r--r--lib/device/dev-swap.c2
-rw-r--r--lib/device/dev-type.c3
-rw-r--r--lib/device/device.h10
-rw-r--r--lib/device/io-manager.c27
-rw-r--r--lib/filters/filter-signature.c2
-rw-r--r--lib/format_text/format-text.c16
-rw-r--r--lib/label/hints.c1
-rw-r--r--lib/label/label.c754
-rw-r--r--lib/label/label.h12
-rw-r--r--lib/metadata/mirror.c4
-rw-r--r--tools/pvck.c6
16 files changed, 329 insertions, 642 deletions
diff --git a/include/Makefile.in b/include/Makefile.in
index d6c30d15b..b82dc1667 100644
--- a/include/Makefile.in
+++ b/include/Makefile.in
@@ -24,7 +24,7 @@ CLEAN_TARGETS += \
.symlinks_created \
activate.h \
archiver.h \
- bcache.h \
+ io-manager.h \
btree.h \
clvm.h \
config-util.h \
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
index 29d6446a6..e0a7b0764 100644
--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -913,7 +913,8 @@ int lvmcache_label_scan(struct cmd_context *cmd)
* an info struct already exists in lvmcache for
* the device.
*/
- label_scan(cmd);
+ if (!label_scan(cmd))
+ return_0;
/*
* _choose_duplicates() returns:
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
index dd65c3d2e..6c6600672 100644
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -58,13 +58,6 @@ static int _dev_get_size_file(struct device *dev, uint64_t *size)
const char *name = dev_name(dev);
struct stat info;
- if (dev->size_seqno == _dev_size_seqno) {
- log_very_verbose("%s: using cached size %" PRIu64 " sectors",
- name, dev->size);
- *size = dev->size;
- return 1;
- }
-
if (stat(name, &info)) {
log_sys_error("stat", name);
return 0;
@@ -73,52 +66,12 @@ static int _dev_get_size_file(struct device *dev, uint64_t *size)
*size = info.st_size;
*size >>= SECTOR_SHIFT; /* Convert to sectors */
dev->size = *size;
- dev->size_seqno = _dev_size_seqno;
log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
return 1;
}
-static int _dev_get_size_dev(struct device *dev, uint64_t *size)
-{
- const char *name = dev_name(dev);
- int fd = dev->bcache_fd;
- int do_close = 0;
-
- if (dev->size_seqno == _dev_size_seqno) {
- log_very_verbose("%s: using cached size %" PRIu64 " sectors",
- name, dev->size);
- *size = dev->size;
- return 1;
- }
-
- if (fd <= 0) {
- if (!dev_open_readonly(dev))
- return_0;
- fd = dev_fd(dev);
- do_close = 1;
- }
-
- if (ioctl(fd, BLKGETSIZE64, size) < 0) {
- log_sys_error("ioctl BLKGETSIZE64", name);
- if (do_close && !dev_close_immediate(dev))
- log_sys_error("close", name);
- return 0;
- }
-
- *size >>= BLKSIZE_SHIFT; /* Convert to sectors */
- dev->size = *size;
- dev->size_seqno = _dev_size_seqno;
-
- log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
-
- if (do_close && !dev_close_immediate(dev))
- log_sys_error("close", name);
-
- return 1;
-}
-
static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead)
{
long read_ahead_long;
@@ -182,55 +135,24 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
int dev_get_direct_block_sizes(struct device *dev, unsigned int *physical_block_size,
unsigned int *logical_block_size)
{
- int fd = dev->bcache_fd;
- int do_close = 0;
- unsigned int pbs = 0;
- unsigned int lbs = 0;
-
- if (dev->physical_block_size || dev->logical_block_size) {
- *physical_block_size = dev->physical_block_size;
- *logical_block_size = dev->logical_block_size;
- return 1;
- }
+ int put = !dev->iodev;
+ int ret = 1;
- if (fd <= 0) {
- if (!dev_open_readonly(dev))
- return 0;
- fd = dev_fd(dev);
- do_close = 1;
- }
+ if (!dev->iodev)
+ dev->iodev = io_get_dev(lvm_iom, dev_name(dev), EF_READ_ONLY);
-#ifdef BLKPBSZGET /* not defined before kernel version 2.6.32 (e.g. rhel5) */
- /*
- * BLKPBSZGET from kernel comment for blk_queue_physical_block_size:
- * "the lowest possible sector size that the hardware can operate on
- * without reverting to read-modify-write operations"
- */
- if (ioctl(fd, BLKPBSZGET, &pbs)) {
- stack;
- pbs = 0;
- }
-#endif
-
- /*
- * BLKSSZGET from kernel comment for blk_queue_logical_block_size:
- * "the lowest possible block size that the storage device can address."
- */
- if (ioctl(fd, BLKSSZGET, &lbs)) {
- stack;
- lbs = 0;
- }
-
- dev->physical_block_size = pbs;
- dev->logical_block_size = lbs;
+ if (!dev->iodev)
+ return 1;
- *physical_block_size = pbs;
- *logical_block_size = lbs;
+ if (!io_dev_block_sizes(dev->iodev, physical_block_size, logical_block_size))
+ ret = 0;
- if (do_close && !dev_close_immediate(dev))
- stack;
+ if (put) {
+ io_put_dev(dev->iodev);
+ dev->iodev = NULL;
+ }
- return 1;
+ return ret;
}
/*-----------------------------------------------------------------
@@ -243,13 +165,32 @@ void dev_size_seqno_inc(void)
int dev_get_size(struct device *dev, uint64_t *size)
{
+ int put = !dev->iodev;
+ int ret = 1;
+
if (!dev)
return 0;
if ((dev->flags & DEV_REGULAR))
return _dev_get_size_file(dev, size);
- return _dev_get_size_dev(dev, size);
+ if (!dev->iodev)
+ dev->iodev = io_get_dev(lvm_iom, dev_name(dev), EF_READ_ONLY);
+
+ if (!dev->iodev)
+ return 0;
+
+ if (!io_dev_size(dev->iodev, size))
+ ret = 0;
+ else
+ dev->size = *size;
+
+ if (put) {
+ io_put_dev(dev->iodev);
+ dev->iodev = NULL;
+ }
+
+ return ret;
}
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead)
diff --git a/lib/device/dev-luks.c b/lib/device/dev-luks.c
index 344a55432..30b6745a2 100644
--- a/lib/device/dev-luks.c
+++ b/lib/device/dev-luks.c
@@ -23,7 +23,7 @@ int dev_is_luks(struct device *dev, uint64_t *offset_found, int full)
char buf[LUKS_SIGNATURE_SIZE];
int ret = -1;
- if (!scan_bcache)
+ if (!io_data_ready)
return -EAGAIN;
if (offset_found)
diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c
index 9d0a36363..9b47746a4 100644
--- a/lib/device/dev-md.c
+++ b/lib/device/dev-md.c
@@ -134,7 +134,7 @@ static int _native_dev_is_md_component(struct device *dev, uint64_t *offset_foun
uint64_t size, sb_offset;
int ret;
- if (!scan_bcache)
+ if (!io_data_ready)
return -EAGAIN;
if (!dev_get_size(dev, &size)) {
diff --git a/lib/device/dev-swap.c b/lib/device/dev-swap.c
index 564bb58ec..c6a500f4a 100644
--- a/lib/device/dev-swap.c
+++ b/lib/device/dev-swap.c
@@ -42,7 +42,7 @@ int dev_is_swap(struct device *dev, uint64_t *offset_found, int full)
unsigned page;
int ret = 0;
- if (!scan_bcache)
+ if (!io_data_ready)
return -EAGAIN;
if (!dev_get_size(dev, &size)) {
diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c
index ebe0f99df..d352e3340 100644
--- a/lib/device/dev-type.c
+++ b/lib/device/dev-type.c
@@ -19,7 +19,6 @@
#include "lib/mm/xlate.h"
#include "lib/config/config.h"
#include "lib/metadata/metadata.h"
-#include "lib/device/bcache.h"
#include "lib/label/label.h"
#ifdef BLKID_WIPING_SUPPORT
@@ -512,7 +511,7 @@ static int _native_dev_is_partitioned(struct dev_types *dt, struct device *dev)
{
int r;
- if (!scan_bcache)
+ if (!io_data_ready)
return -EAGAIN;
if (!_is_partitionable(dt, dev))
diff --git a/lib/device/device.h b/lib/device/device.h
index bd3b35557..ab59a58e4 100644
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -30,11 +30,8 @@
#define DEV_USED_FOR_LV 0x00000100 /* Is device used for an LV */
#define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */
#define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */
-#define DEV_IN_BCACHE 0x00000800 /* dev fd is open and used in bcache */
-#define DEV_BCACHE_EXCL 0x00001000 /* bcache_fd should be open EXCL */
#define DEV_FILTER_AFTER_SCAN 0x00002000 /* apply filter after bcache has data */
#define DEV_FILTER_OUT_SCAN 0x00004000 /* filtered out during label scan */
-#define DEV_BCACHE_WRITE 0x00008000 /* bcache_fd is open with RDWR */
#define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */
#define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */
#define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */
@@ -64,15 +61,14 @@ struct device {
struct dm_list aliases; /* struct dm_str_list */
dev_t dev;
+ struct io_dev *iodev; /* from io_get_dev() */
+ unsigned iom_flags; /* last used in io_get_dev() */
+
/* private */
int fd;
int open_count;
- int physical_block_size; /* From BLKPBSZGET: lowest possible sector size that the hardware can operate on without reverting to read-modify-write operations */
- int logical_block_size; /* From BLKSSZGET: lowest possible block size that the storage device can address */
int read_ahead;
- int bcache_fd;
uint32_t flags;
- unsigned size_seqno;
uint64_t size;
uint64_t end;
struct dev_ext ext;
diff --git a/lib/device/io-manager.c b/lib/device/io-manager.c
index e903b224d..1fd6372de 100644
--- a/lib/device/io-manager.c
+++ b/lib/device/io-manager.c
@@ -246,6 +246,18 @@ static bool _fallback_issue(struct async_engine *ae, enum dir d, int fd,
return false;
}
+#if 1
+ if (d == DIR_READ) {
+ log_debug("io R off %llu bytes %llu",
+ (unsigned long long)where,
+ (unsigned long long)len);
+ } else {
+ log_debug("io W off %llu bytes %llu",
+ (unsigned long long)where,
+ (unsigned long long)len);
+ }
+#endif
+
while (len) {
do {
if (d == DIR_READ)
@@ -256,7 +268,8 @@ static bool _fallback_issue(struct async_engine *ae, enum dir d, int fd,
} while ((r < 0) && ((r == EINTR) || (r == EAGAIN)));
if (r < 0) {
- log_warn("io failed %d", r);
+ log_warn("%s failed fd %d %d errno %d",
+ (d == DIR_READ) ? "read" : "write", fd, r, errno);
return false;
}
@@ -296,6 +309,18 @@ static bool _async_issue_(struct async_engine *e, enum dir d, int fd,
cb->cb.u.c.nbytes = (se - sb) << SECTOR_SHIFT;
cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE;
+#if 1
+ if (d == DIR_READ) {
+ log_debug("io RA off %llu bytes %llu",
+ (unsigned long long)cb->cb.u.c.offset,
+ (unsigned long long)cb->cb.u.c.nbytes);
+ } else {
+ log_debug("io WA off %llu bytes %llu",
+ (unsigned long long)cb->cb.u.c.offset,
+ (unsigned long long)cb->cb.u.c.nbytes);
+ }
+#endif
+
cb_array[0] = &cb->cb;
do {
r = io_submit(e->aio_context, 1, cb_array);
diff --git a/lib/filters/filter-signature.c b/lib/filters/filter-signature.c
index 6a81203a9..bc9c70eba 100644
--- a/lib/filters/filter-signature.c
+++ b/lib/filters/filter-signature.c
@@ -27,7 +27,7 @@ static int _ignore_signature(struct cmd_context *cmd, struct dev_filter *f __att
char buf[BUFSIZE];
int ret = 0;
- if (!scan_bcache) {
+ if (!io_data_ready) {
/* let pass, call again after scan */
log_debug_devs("filter signature deferred %s", dev_name(dev));
dev->flags |= DEV_FILTER_AFTER_SCAN;
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
index 2a5c8ece1..9ace66a52 100644
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -247,14 +247,10 @@ static int _raw_write_mda_header(const struct format_type *fmt,
MDA_HEADER_SIZE -
sizeof(mdah->checksum_xl)));
- dev_set_last_byte(dev, start_byte + MDA_HEADER_SIZE);
-
if (!dev_write_bytes(dev, start_byte, MDA_HEADER_SIZE, mdah)) {
- dev_unset_last_byte(dev);
- log_error("Failed to write mda header to %s fd %d", dev_name(dev), dev->bcache_fd);
+ log_error("Failed to write mda header to %s", dev_name(dev));
return 0;
}
- dev_unset_last_byte(dev);
return 1;
}
@@ -950,8 +946,6 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
goto out;
}
- dev_set_last_byte(mdac->area.dev, mda_start + mdah->size);
-
log_debug_metadata("VG %s %u metadata write at %llu size %llu (wrap %llu)",
vg->name, vg->seqno,
(unsigned long long)write1_start,
@@ -959,8 +953,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
(unsigned long long)write2_size);
if (!dev_write_bytes(mdac->area.dev, write1_start, (size_t)write1_size, write_buf)) {
- log_error("Failed to write metadata to %s fd %d", devname, mdac->area.dev->bcache_fd);
- dev_unset_last_byte(mdac->area.dev);
+ log_error("Failed to write metadata to %s", devname);
goto out;
}
@@ -972,14 +965,11 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
if (!dev_write_bytes(mdac->area.dev, write2_start, write2_size,
write_buf + new_size - new_wrap)) {
- log_error("Failed to write metadata wrap to %s fd %d", devname, mdac->area.dev->bcache_fd);
- dev_unset_last_byte(mdac->area.dev);
+ log_error("Failed to write metadata wrap to %s", devname);
goto out;
}
}
- dev_unset_last_byte(mdac->area.dev);
-
rlocn_new->checksum = calc_crc(INITIAL_CRC,
(uint8_t *)write_buf,
(uint32_t)(new_size - new_wrap));
diff --git a/lib/label/hints.c b/lib/label/hints.c
index 6510fcf74..b87ab317c 100644
--- a/lib/label/hints.c
+++ b/lib/label/hints.c
@@ -141,7 +141,6 @@
#include "lib/misc/crc.h"
#include "lib/mm/xlate.h"
#include "lib/cache/lvmcache.h"
-#include "lib/device/bcache.h"
#include "lib/commands/toolcontext.h"
#include "lib/activate/activate.h"
#include "lib/label/hints.h"
diff --git a/lib/label/label.c b/lib/label/label.c
index fe59394b0..7673d0347 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -19,7 +19,7 @@
#include "lib/misc/crc.h"
#include "lib/mm/xlate.h"
#include "lib/cache/lvmcache.h"
-#include "lib/device/bcache.h"
+#include "lib/device/io-manager.h"
#include "lib/commands/toolcontext.h"
#include "lib/activate/activate.h"
#include "lib/label/hints.h"
@@ -31,9 +31,73 @@
#include <sys/types.h>
#include <sys/resource.h>
-/* FIXME Allow for larger labels? Restricted to single sector currently */
+int io_data_ready;
-static uint64_t _current_bcache_size_bytes;
+static uint64_t _current_io_size_bytes;
+
+static int _get_dev(struct device *dev, unsigned flags)
+{
+ if (dev->iodev) {
+ if (flags == dev->iom_flags)
+ return 1;
+
+ /* currently writable, want to read */
+ if ((flags & EF_READ_ONLY) && !dev->iom_flags)
+ return 1;
+
+ /* currently excl, want to read */
+ if ((flags & EF_READ_ONLY) && (dev->iom_flags & EF_EXCL))
+ return 1;
+
+ /* currently excl, want to write */
+ if (!flags && (dev->iom_flags & EF_EXCL))
+ return 1;
+
+ /* currently readonly, want to write */
+ if (!flags && (dev->iom_flags & EF_READ_ONLY)) {
+ log_print("dev reopen for writing %s", dev_name(dev));
+ io_put_dev(dev->iodev);
+ dev->iom_flags = 0;
+ goto get;
+ }
+
+ /* currently non-excl, want excl */
+ if ((flags & EF_EXCL) && !(dev->iom_flags & EF_EXCL)) {
+ log_print("dev reopen excl %s", dev_name(dev));
+ io_put_dev(dev->iodev);
+ dev->iom_flags = 0;
+ goto get;
+ }
+
+ /* Can this happen? */
+ log_print("dev reopen flags %x iom_flags %x %s", flags, dev->iom_flags, dev_name(dev));
+ io_put_dev(dev->iodev);
+ dev->iom_flags = 0;
+ }
+get:
+ dev->iodev = io_get_dev(lvm_iom, dev_name(dev), flags);
+
+ if (!dev->iodev) {
+ log_error("No io device available %s", dev_name(dev));
+ return 0;
+ }
+
+ dev->iom_flags = flags;
+
+ return 1;
+}
+
+static void _put_dev(struct device *dev)
+{
+ if (!dev->iodev) {
+ log_error("put_dev no iodev %s", dev_name(dev));
+ return;
+ }
+
+ io_put_dev(dev->iodev);
+ dev->iodev = NULL;
+ dev->iom_flags = 0;
+}
/*
* Internal labeller struct.
@@ -119,7 +183,7 @@ int label_remove(struct device *dev)
log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev));
- if (!label_scan_open_excl(dev)) {
+ if (!_get_dev(dev, EF_EXCL)) {
log_error("Failed to open device %s", dev_name(dev));
return 0;
}
@@ -169,6 +233,7 @@ int label_remove(struct device *dev)
}
}
+ _put_dev(dev);
return r;
}
@@ -207,22 +272,19 @@ int label_write(struct device *dev, struct label *label)
PRIu32 ".", dev_name(dev), label->sector,
xlate32(lh->offset_xl));
- if (!label_scan_open(dev)) {
+ if (!_get_dev(dev, 0)) {
log_error("Failed to open device %s", dev_name(dev));
return 0;
}
offset = label->sector << SECTOR_SHIFT;
- dev_set_last_byte(dev, offset + LABEL_SIZE);
-
if (!dev_write_bytes(dev, offset, LABEL_SIZE, buf)) {
log_debug_devs("Failed to write label to %s", dev_name(dev));
r = 0;
}
- dev_unset_last_byte(dev);
-
+ _put_dev(dev);
return r;
}
@@ -249,17 +311,10 @@ struct label *label_create(struct labeller *labeller)
}
-/* global variable for accessing the bcache populated by label scan */
-struct bcache *scan_bcache;
+/* global variable for accessing the io-manager populated by label scan */
+struct io_manager *lvm_iom;
-#define BCACHE_BLOCK_SIZE_IN_SECTORS 256 /* 256*512 = 128K */
-
-static bool _in_bcache(struct device *dev)
-{
- if (!dev)
- return NULL;
- return (dev->flags & DEV_IN_BCACHE) ? true : false;
-}
+#define IOM_BLOCK_SIZE_IN_SECTORS 64 /* 64*512 = 32K */
static struct labeller *_find_lvm_header(struct device *dev,
char *scan_buf,
@@ -284,8 +339,8 @@ static struct labeller *_find_lvm_header(struct device *dev,
sector += LABEL_SIZE >> SECTOR_SHIFT) {
/*
- * The scan_buf passed in is a bcache block, which is
- * BCACHE_BLOCK_SIZE_IN_SECTORS large. So if start_sector is
+ * The scan_buf passed in is a iom block, which is
+ * IOM_BLOCK_SIZE_IN_SECTORS large. So if start_sector is
* one of the last couple sectors in that buffer, we need to
* break early.
*/
@@ -367,8 +422,8 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
/*
* The device may have signatures that exclude it from being processed.
- * If filters were applied before bcache data was available, some
- * filters may have deferred their check until the point where bcache
+ * If filters were applied before iom data was available, some
+ * filters may have deferred their check until the point where iom
* data had been read (here). They set this flag to indicate that the
* filters should be retested now that data from the device is ready.
*/
@@ -401,7 +456,7 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
* FIXME: we don't need to copy one sector from bb->data into label_buf,
* we can just point label_buf at one sector in ld->buf.
*/
- if (!(labeller = _find_lvm_header(dev, bb->data, BCACHE_BLOCK_SIZE_IN_SECTORS, label_buf, &sector, block_sector, start_sector))) {
+ if (!(labeller = _find_lvm_header(dev, bb->data, IOM_BLOCK_SIZE_IN_SECTORS, label_buf, &sector, block_sector, start_sector))) {
/*
* Non-PVs exit here
@@ -461,165 +516,6 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
return ret;
}
-static int _scan_dev_open(struct device *dev)
-{
- struct dm_list *name_list;
- struct dm_str_list *name_sl;
- const char *name;
- struct stat sbuf;
- int retried = 0;
- int flags = 0;
- int fd;
-
- if (!dev)
- return 0;
-
- if (dev->flags & DEV_IN_BCACHE) {
- /* Shouldn't happen */
- log_error("Device open %s has DEV_IN_BCACHE already set", dev_name(dev));
- dev->flags &= ~DEV_IN_BCACHE;
- }
-
- if (dev->bcache_fd > 0) {
- /* Shouldn't happen */
- log_error("Device open %s already open with fd %d",
- dev_name(dev), dev->bcache_fd);
- return 0;
- }
-
- /*
- * All the names for this device (major:minor) are kept on
- * dev->aliases, the first one is the primary/preferred name.
- */
- if (!(name_list = dm_list_first(&dev->aliases))) {
- /* Shouldn't happen */
- log_error("Device open %s %d:%d has no path names.",
- dev_name(dev), (int)MAJOR(dev->dev), (int)MINOR(dev->dev));
- return 0;
- }
- name_sl = dm_list_item(name_list, struct dm_str_list);
- name = name_sl->str;
-
- flags |= O_DIRECT;
- flags |= O_NOATIME;
-
- /*
- * FIXME: udev is a train wreck when we open RDWR and close, so we
- * need to only use RDWR when we actually need to write, and use
- * RDONLY otherwise. Fix, disable or scrap udev nonsense so we can
- * just open with RDWR by default.
- */
-
- if (dev->flags & DEV_BCACHE_EXCL) {
- flags |= O_EXCL;
- flags |= O_RDWR;
- } else if (dev->flags & DEV_BCACHE_WRITE) {
- flags |= O_RDWR;
- } else {
- flags |= O_RDONLY;
- }
-
-retry_open:
-
- fd = open(name, flags, 0777);
-
- if (fd < 0) {
- if ((errno == EBUSY) && (flags & O_EXCL)) {
- log_error("Can't open %s exclusively. Mounted filesystem?",
- dev_name(dev));
- } else {
- int major, minor;
-
- /*
- * Shouldn't happen, if it does, print stat info to help figure
- * out what's wrong.
- */
-
- major = (int)MAJOR(dev->dev);
- minor = (int)MINOR(dev->dev);
-
- log_error("Device open %s %d:%d failed errno %d", name, major, minor, errno);
-
- if (stat(name, &sbuf)) {
- log_debug_devs("Device open %s %d:%d stat failed errno %d",
- name, major, minor, errno);
- } else if (sbuf.st_rdev != dev->dev) {
- log_debug_devs("Device open %s %d:%d stat %d:%d does not match.",
- name, major, minor,
- (int)MAJOR(sbuf.st_rdev), (int)MINOR(sbuf.st_rdev));
- }
-
- if (!retried) {
- /*
- * FIXME: remove this, the theory for this retry is that
- * there may be a udev race that we can sometimes mask by
- * retrying. This is here until we can figure out if it's
- * needed and if so fix the real problem.
- */
- usleep(5000);
- log_debug_devs("Device open %s retry", dev_name(dev));
- retried = 1;
- goto retry_open;
- }
- }
- return 0;
- }
-
- dev->flags |= DEV_IN_BCACHE;
- dev->bcache_fd = fd;
- return 1;
-}
-
-static int _scan_dev_close(struct device *dev)
-{
- if (!(dev->flags & DEV_IN_BCACHE))
- log_error("scan_dev_close %s no DEV_IN_BCACHE set", dev_name(dev));
-
- dev->flags &= ~DEV_IN_BCACHE;
- dev->flags &= ~DEV_BCACHE_EXCL;
-
- if (dev->bcache_fd < 0) {
- log_error("scan_dev_close %s already closed", dev_name(dev));
- return 0;
- }
-
- if (close(dev->bcache_fd))
- log_warn("close %s errno %d", dev_name(dev), errno);
- dev->bcache_fd = -1;
- return 1;
-}
-
-static void _drop_bad_aliases(struct device *dev)
-{
- struct dm_str_list *strl, *strl2;
- const char *name;
- struct stat sbuf;
- int major = (int)MAJOR(dev->dev);
- int minor = (int)MINOR(dev->dev);
- int bad;
-
- dm_list_iterate_items_safe(strl, strl2, &dev->aliases) {
- name = strl->str;
- bad = 0;
-
- if (stat(name, &sbuf)) {
- bad = 1;
- log_debug_devs("Device path check %d:%d %s stat failed errno %d",
- major, minor, name, errno);
- } else if (sbuf.st_rdev != dev->dev) {
- bad = 1;
- log_debug_devs("Device path check %d:%d %s stat %d:%d does not match.",
- major, minor, name,
- (int)MAJOR(sbuf.st_rdev), (int)MINOR(sbuf.st_rdev));
- }
-
- if (bad) {
- log_debug_devs("Device path check %d:%d dropping path %s.", major, minor, name);
- dev_cache_failed_path(dev, name);
- }
- }
-}
-
/*
* Read or reread label/metadata from selected devs.
*
@@ -639,7 +535,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
struct dm_list reopen_devs;
struct device_list *devl, *devl2;
struct block *bb;
- int retried_open = 0;
int scan_read_errors = 0;
int scan_process_errors = 0;
int scan_failed_count = 0;
@@ -657,7 +552,7 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
log_debug_devs("Scanning %d devices for VG info", dm_list_size(devs));
scan_more:
- rem_prefetches = bcache_max_prefetches(scan_bcache);
+ rem_prefetches = io_max_prefetches(lvm_iom);
submit_count = 0;
dm_list_iterate_items_safe(devl, devl2, devs) {
@@ -672,16 +567,17 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
if (!rem_prefetches)
break;
- if (!_in_bcache(devl->dev)) {
- if (!_scan_dev_open(devl->dev)) {
- log_debug_devs("Scan failed to open %s.", dev_name(devl->dev));
- dm_list_del(&devl->list);
- dm_list_add(&reopen_devs, &devl->list);
- continue;
- }
- }
+ if (!_get_dev(devl->dev, EF_READ_ONLY))
+ break;
- bcache_prefetch(scan_bcache, devl->dev->bcache_fd, 0);
+ /*
+ * Prefetch the first block of the disk which holds the label
+ * and pv header, the mda header, and some the metadata text
+ * (if current metadata text is further into the metadata area,
+ * it will not be in this block and will require reading another
+ * block or more later.)
+ */
+ io_prefetch_block(lvm_iom, devl->dev->iodev, 0);
rem_prefetches--;
submit_count++;
@@ -698,18 +594,18 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
scan_failed = 0;
is_lvm_device = 0;
- if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) {
+ if (!io_get_block(lvm_iom, devl->dev->iodev, 0, 0, &bb)) {
log_debug_devs("Scan failed to read %s error %d.", dev_name(devl->dev), error);
scan_failed = 1;
scan_read_errors++;
scan_failed_count++;
lvmcache_del_dev(devl->dev);
} else {
- log_debug_devs("Processing data from device %s %d:%d fd %d block %p",
+ log_debug_devs("Processing data from device %s %d:%d block %p",
dev_name(devl->dev),
(int)MAJOR(devl->dev->dev),
(int)MINOR(devl->dev->dev),
- devl->dev->bcache_fd, bb);
+ bb);
ret = _process_block(cmd, f, devl->dev, bb, 0, 0, &is_lvm_device);
@@ -722,18 +618,27 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
}
if (bb)
- bcache_put(bb);
+ io_put_block(bb);
/*
- * Keep the bcache block of lvm devices we have processed so
- * that the vg_read phase can reuse it. If bcache failed to
- * read the block, or the device does not belong to lvm, then
- * drop it from bcache.
+ * If iom failed to read the block, or the device does not
+ * belong to lvm, then drop it from iom.
*/
- if (scan_failed || !is_lvm_device) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
- _scan_dev_close(devl->dev);
- }
+ if (scan_failed || !is_lvm_device)
+ io_invalidate_dev(lvm_iom, devl->dev->iodev);
+
+ /*
+ * Allow io manager to drop this dev (close the fd and
+ * invalidate the cached block) if needed due to a full cache.
+ * If the common case, the cache should not be full and fds do
+ * not reach the max, so the dev will remain open in iomanager
+ * and the block we've read will remain cached, and when
+ * vg_read() comes to reading the metadata again, no new open
+ * or read will be needed. In the uncommon case, vg_read()
+ * will trigger a new open() and rereading the data from disk.
+ */
+ io_put_dev(devl->dev->iodev);
+ devl->dev->iodev = NULL;
dm_list_del(&devl->list);
dm_list_add(&done_devs, &devl->list);
@@ -742,51 +647,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
if (!dm_list_empty(devs))
goto scan_more;
- /*
- * We're done scanning all the devs. If we failed to open any of them
- * the first time through, refresh device paths and retry. We failed
- * to open the devs on the reopen_devs list.
- *
- * FIXME: it's not clear if or why this helps.
- */
- if (!dm_list_empty(&reopen_devs)) {
- if (retried_open) {
- /* Don't try again. */
- scan_failed_count += dm_list_size(&reopen_devs);
- dm_list_splice(&done_devs, &reopen_devs);
- goto out;
- }
- retried_open = 1;
-
- dm_list_iterate_items_safe(devl, devl2, &reopen_devs) {
- _drop_bad_aliases(devl->dev);
-
- if (dm_list_empty(&devl->dev->aliases)) {
- log_warn("WARNING: Scan ignoring device %d:%d with no paths.",
- (int)MAJOR(devl->dev->dev),
- (int)MINOR(devl->dev->dev));
-
- dm_list_del(&devl->list);
- lvmcache_del_dev(devl->dev);
- scan_failed_count++;
- }
- }
-
- /*
- * This will search the system's /dev for new path names and
- * could help us reopen the device if it finds a new preferred
- * path name for this dev's major:minor. It does that by
- * inserting a new preferred path name on dev->aliases. open
- * uses the first name from that list.
- */
- log_debug_devs("Scanning refreshing device paths.");
- dev_cache_scan();
-
- /* Put devs that failed to open back on the original list to retry. */
- dm_list_splice(devs, &reopen_devs);
- goto scan_more;
- }
-out:
log_debug_devs("Scanned devices: read errors %d process errors %d failed %d",
scan_read_errors, scan_process_errors, scan_failed_count);
@@ -800,50 +660,52 @@ out:
/*
* We don't know ahead of time if we will find some VG metadata
- * that is larger than the total size of the bcache, which would
+ * that is larger than the total size of the iom, which would
* prevent us from reading/writing the VG since we do not dynamically
- * increase the bcache size when we find it's too small. In these
+ * increase the iom size when we find it's too small. In these
* cases the user would need to set io_memory_size to be larger
* than the max VG metadata size (lvm does not impose any limit on
* the metadata size.)
*/
-#define MIN_BCACHE_BLOCKS 32 /* 4MB (32 * 128KB) */
-#define MAX_BCACHE_BLOCKS 4096 /* 512MB (4096 * 128KB) */
+#define MIN_IOM_BLOCKS 32 /* 4MB (128 * 32KB) */
+#define MAX_IOM_BLOCKS 16384 /* 512MB (16384 * 32KB) */
-static int _setup_bcache(void)
+#define IOM_MAX_DEVS 4096
+
+static int _setup_io_manager(void)
{
- struct io_engine_ *ioe = NULL;
+ struct io_engine *ioe = NULL;
int iomem_kb = io_memory_size();
- int block_size_kb = (BCACHE_BLOCK_SIZE_IN_SECTORS * 512) / 1024;
+ int block_size_kb = (IOM_BLOCK_SIZE_IN_SECTORS * 512) / 1024;
int cache_blocks;
cache_blocks = iomem_kb / block_size_kb;
- if (cache_blocks < MIN_BCACHE_BLOCKS)
- cache_blocks = MIN_BCACHE_BLOCKS;
+ if (cache_blocks < MIN_IOM_BLOCKS)
+ cache_blocks = MIN_IOM_BLOCKS;
- if (cache_blocks > MAX_BCACHE_BLOCKS)
- cache_blocks = MAX_BCACHE_BLOCKS;
+ if (cache_blocks > MAX_IOM_BLOCKS)
+ cache_blocks = MAX_IOM_BLOCKS;
- _current_bcache_size_bytes = cache_blocks * BCACHE_BLOCK_SIZE_IN_SECTORS * 512;
+ _current_io_size_bytes = cache_blocks * IOM_BLOCK_SIZE_IN_SECTORS * 512;
if (use_aio()) {
- if (!(ioe = create_async_io_engine_())) {
+ if (!(ioe = create_async_io_engine(true))) {
log_warn("Failed to set up async io, using sync io.");
init_use_aio(0);
}
}
if (!ioe) {
- if (!(ioe = create_sync_io_engine_())) {
+ if (!(ioe = create_sync_io_engine(true))) {
log_error("Failed to set up sync io.");
return 0;
}
}
- if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) {
- log_error("Failed to create bcache with %d cache blocks.", cache_blocks);
+ if (!(lvm_iom = io_manager_create(IOM_BLOCK_SIZE_IN_SECTORS, cache_blocks, IOM_MAX_DEVS, ioe))) {
+ log_error("Failed to create io-manager with %d cache blocks.", cache_blocks);
return 0;
}
@@ -939,6 +801,14 @@ int label_scan(struct cmd_context *cmd)
dm_list_init(&scan_devs);
dm_list_init(&hints_list);
+ if (lvm_iom)
+ io_invalidate_all(lvm_iom);
+
+ if (!lvm_iom) {
+ if (!_setup_io_manager())
+ return 0;
+ }
+
/*
* dev_cache_scan() creates a list of devices on the system
* (saved in in dev-cache) which we can iterate through to
@@ -990,23 +860,9 @@ int label_scan(struct cmd_context *cmd)
continue;
devl->dev = dev;
dm_list_add(&all_devs, &devl->list);
-
- /*
- * label_scan should not generally be called a second time,
- * so this will usually not be true.
- */
- if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
- }
};
dev_iter_destroy(iter);
- if (!scan_bcache) {
- if (!_setup_bcache())
- return 0;
- }
-
/*
* In some common cases we can avoid scanning all devices
* by using hints which tell us which devices are PVs, which
@@ -1039,35 +895,37 @@ int label_scan(struct cmd_context *cmd)
*/
_prepare_open_file_limit(cmd, dm_list_size(&scan_devs));
+ io_data_ready = 1;
+
/*
* Do the main scan.
*/
_scan_list(cmd, cmd->filter, &scan_devs, NULL);
/*
- * Metadata could be larger than total size of bcache, and bcache
+ * Metadata could be larger than total size of iom, and iom
* cannot currently be resized during the command. If this is the
* case (or within reach), warn that io_memory_size needs to be
* set larger.
*
- * Even if bcache out of space did not cause a failure during scan, it
+ * Even if iom out of space did not cause a failure during scan, it
* may cause a failure during the next vg_read phase or during vg_write.
*
- * If there was an error during scan, we could recreate bcache here
+ * If there was an error during scan, we could recreate iom here
* with a larger size and then restart label_scan. But, this does not
- * address the problem of writing new metadata that excedes the bcache
+ * address the problem of writing new metadata that excedes the iom
* size and failing, which would often be hit first, i.e. we'll fail
* to write new metadata exceding the max size before we have a chance
* to read any metadata with that size, unless we find an existing vg
* that has been previously created with the larger size.
*
- * If the largest metadata is within 1MB of the bcache size, then start
+ * If the largest metadata is within 1MB of the iom size, then start
* warning.
*/
max_metadata_size_bytes = lvmcache_max_metadata_size();
- if (max_metadata_size_bytes + (1024 * 1024) > _current_bcache_size_bytes) {
- /* we want bcache to be 1MB larger than the max metadata seen */
+ if (max_metadata_size_bytes + (1024 * 1024) > _current_io_size_bytes) {
+ /* we want io-manager to be 1MB larger than the max metadata seen */
uint64_t want_size_kb = (max_metadata_size_bytes / 1024) + 1024;
uint64_t remainder;
if ((remainder = (want_size_kb % 1024)))
@@ -1154,7 +1012,7 @@ int label_scan(struct cmd_context *cmd)
* Scan and cache lvm data from the listed devices. If a device is already
* scanned and cached, this replaces the previously cached lvm data for the
* device. This is called when vg_read() wants to guarantee that it is using
- * the latest data from the devices in the VG (since the scan populated bcache
+ * the latest data from the devices in the VG (since the scan populated iom
* without a lock.)
*/
@@ -1162,16 +1020,16 @@ int label_scan_devs(struct cmd_context *cmd, struct dev_filter *f, struct dm_lis
{
struct device_list *devl;
- if (!scan_bcache) {
- if (!_setup_bcache())
+ if (!lvm_iom) {
+ if (!_setup_io_manager())
return 0;
}
dm_list_iterate_items(devl, devs) {
- if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
- _scan_dev_close(devl->dev);
- }
+ if (!_get_dev(devl->dev, EF_READ_ONLY))
+ continue;
+
+ io_invalidate_dev(lvm_iom, devl->dev->iodev);
}
_scan_list(cmd, f, devs, NULL);
@@ -1189,21 +1047,16 @@ int label_scan_devs_rw(struct cmd_context *cmd, struct dev_filter *f, struct dm_
{
struct device_list *devl;
- if (!scan_bcache) {
- if (!_setup_bcache())
+ if (!lvm_iom) {
+ if (!_setup_io_manager())
return 0;
}
dm_list_iterate_items(devl, devs) {
- if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
- _scan_dev_close(devl->dev);
- }
- /*
- * With this flag set, _scan_dev_open() done by
- * _scan_list() will do open RW
- */
- devl->dev->flags |= DEV_BCACHE_WRITE;
+ if (!_get_dev(devl->dev, 0))
+ continue;
+
+ io_invalidate_dev(lvm_iom, devl->dev->iodev);
}
_scan_list(cmd, f, devs, NULL);
@@ -1217,15 +1070,10 @@ int label_scan_devs_excl(struct dm_list *devs)
int failed = 0;
dm_list_iterate_items(devl, devs) {
- if (_in_bcache(devl->dev)) {
- bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd);
- _scan_dev_close(devl->dev);
- }
- /*
- * With this flag set, _scan_dev_open() done by
- * _scan_list() will do open EXCL
- */
- devl->dev->flags |= DEV_BCACHE_EXCL;
+ if (!_get_dev(devl->dev, EF_EXCL))
+ continue;
+
+ io_invalidate_dev(lvm_iom, devl->dev->iodev);
}
_scan_list(NULL, NULL, devs, &failed);
@@ -1237,15 +1085,15 @@ int label_scan_devs_excl(struct dm_list *devs)
void label_scan_invalidate(struct device *dev)
{
- if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
- }
+ if (!_get_dev(dev, EF_READ_ONLY))
+ return;
+ io_invalidate_dev(lvm_iom, dev->iodev);
+ _put_dev(dev);
}
/*
* If a PV is stacked on an LV, then the LV is kept open
- * in bcache, and needs to be closed so the open fd doesn't
+ * in iom, and needs to be closed so the open fd doesn't
* interfere with processing the LV.
*/
@@ -1264,44 +1112,34 @@ void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv
}
/*
- * Empty the bcache of all blocks and close all open fds,
- * but keep the bcache set up.
+ * Empty the iom of all blocks and close all open fds,
+ * but keep the iom set up.
*/
void label_scan_drop(struct cmd_context *cmd)
{
- struct dev_iter *iter;
- struct device *dev;
-
- if (!(iter = dev_iter_create(NULL, 0)))
- return;
-
- while ((dev = dev_iter_get(cmd, iter))) {
- if (_in_bcache(dev))
- _scan_dev_close(dev);
- }
- dev_iter_destroy(iter);
+ if (lvm_iom)
+ io_invalidate_all(lvm_iom);
}
/*
- * Close devices that are open because bcache is holding blocks for them.
- * Destroy the bcache.
+ * Close devices that are open because iom is holding blocks for them.
+ * Destroy the iom.
*/
void label_scan_destroy(struct cmd_context *cmd)
{
- if (!scan_bcache)
+ if (!lvm_iom)
return;
- label_scan_drop(cmd);
-
- bcache_destroy(scan_bcache);
- scan_bcache = NULL;
+ io_invalidate_all(lvm_iom);
+ io_manager_destroy(lvm_iom);
+ lvm_iom = NULL;
}
/*
* Read (or re-read) and process (or re-process) the data for a device. This
- * will reset (clear and repopulate) the bcache and lvmcache info for this
+ * will reset (clear and repopulate) the iom and lvmcache info for this
* device. There are only a couple odd places that want to reread a specific
* device, this is not a commonly used function.
*/
@@ -1319,11 +1157,13 @@ int label_read(struct device *dev)
dm_list_init(&one_dev);
dm_list_add(&one_dev, &devl->list);
- if (_in_bcache(dev)) {
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
+ if (!_get_dev(dev, EF_READ_ONLY)) {
+ log_error("No io device available for %s", dev_name(devl->dev));
+ return 0;
}
+ io_invalidate_dev(lvm_iom, dev->iodev);
+
_scan_list(NULL, NULL, &one_dev, &failed);
free(devl);
@@ -1333,264 +1173,166 @@ int label_read(struct device *dev)
return 1;
}
-int label_scan_setup_bcache(void)
+int label_scan_setup_io_manager(void)
{
- if (!scan_bcache) {
- if (!_setup_bcache())
+ if (!lvm_iom) {
+ if (!_setup_io_manager())
return 0;
}
return 1;
}
-/*
- * This is needed to write to a new non-lvm device.
- * Scanning that dev would not keep it open or in
- * bcache, but to use bcache_write we need the dev
- * to be open so we can use dev->bcache_fd to write.
- */
-
+/* FIXME: probably not needed, read will do it */
int label_scan_open(struct device *dev)
{
- if (!_in_bcache(dev))
- return _scan_dev_open(dev);
- return 1;
+ return _get_dev(dev, EF_READ_ONLY);
}
int label_scan_open_excl(struct device *dev)
{
- if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_EXCL)) {
- /* FIXME: avoid tossing out bcache blocks just to replace fd. */
- log_debug("Close and reopen excl %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
- }
- dev->flags |= DEV_BCACHE_EXCL;
- dev->flags |= DEV_BCACHE_WRITE;
- return label_scan_open(dev);
+ return _get_dev(dev, EF_EXCL);
}
+/* FIXME: probably not needed, write will do it */
int label_scan_open_rw(struct device *dev)
{
- if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) {
- /* FIXME: avoid tossing out bcache blocks just to replace fd. */
- log_debug("Close and reopen rw %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
- }
- dev->flags |= DEV_BCACHE_WRITE;
- return label_scan_open(dev);
+ return _get_dev(dev, 0);
}
bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data)
{
- if (!scan_bcache) {
+ int put = !dev->iodev;
+
+ if (!lvm_iom) {
/* Should not happen */
- log_error("dev_read bcache not set up %s", dev_name(dev));
+ log_error("dev_read io manager not set up %s", dev_name(dev));
return false;
}
- if (dev->bcache_fd <= 0) {
- /* This is not often needed. */
- if (!label_scan_open(dev)) {
- log_error("Error opening device %s for reading at %llu length %u.",
- dev_name(dev), (unsigned long long)start, (uint32_t)len);
- return false;
- }
- }
+ if (!_get_dev(dev, EF_READ_ONLY))
+ return false;
- if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data)) {
+ if (!io_read_bytes(lvm_iom, dev->iodev, start, len, data)) {
log_error("Error reading device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
+
+ if (put)
+ _put_dev(dev);
return true;
}
bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data)
{
+ int put = !dev->iodev;
+
if (test_mode())
return true;
- if (!scan_bcache) {
+ if (!lvm_iom) {
/* Should not happen */
- log_error("dev_write bcache not set up %s", dev_name(dev));
+ log_error("dev_write io manager not set up %s", dev_name(dev));
return false;
}
- if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) {
- /* FIXME: avoid tossing out bcache blocks just to replace fd. */
- log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
-
- dev->flags |= DEV_BCACHE_WRITE;
- label_scan_open(dev);
- }
-
- if (dev->bcache_fd <= 0) {
- /* This is not often needed. */
- dev->flags |= DEV_BCACHE_WRITE;
- if (!label_scan_open(dev)) {
- log_error("Error opening device %s for writing at %llu length %u.",
- dev_name(dev), (unsigned long long)start, (uint32_t)len);
- return false;
- }
- }
+ if (!_get_dev(dev, 0))
+ return false;
- if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data)) {
+ if (!io_write_bytes(lvm_iom, dev->iodev, start, len, data)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
- if (!bcache_flush(scan_bcache)) {
+ if (!io_flush(lvm_iom)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
+
+ if (put)
+ _put_dev(dev);
return true;
}
bool dev_write_zeros(struct device *dev, uint64_t start, size_t len)
{
+ int put = !dev->iodev;
+
if (test_mode())
return true;
- if (!scan_bcache) {
- log_error("dev_write_zeros bcache not set up %s", dev_name(dev));
+ if (!lvm_iom) {
+ log_error("dev_write_zeros io manager not set up %s", dev_name(dev));
return false;
}
- if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) {
- /* FIXME: avoid tossing out bcache blocks just to replace fd. */
- log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
-
- dev->flags |= DEV_BCACHE_WRITE;
- label_scan_open(dev);
- }
-
- if (dev->bcache_fd <= 0) {
- /* This is not often needed. */
- dev->flags |= DEV_BCACHE_WRITE;
- if (!label_scan_open(dev)) {
- log_error("Error opening device %s for writing at %llu length %u.",
- dev_name(dev), (unsigned long long)start, (uint32_t)len);
- return false;
- }
- }
-
- dev_set_last_byte(dev, start + len);
+ if (!_get_dev(dev, 0))
+ return false;
- if (!bcache_zero_bytes(scan_bcache, dev->bcache_fd, start, len)) {
+ if (!io_zero_bytes(lvm_iom, dev->iodev, start, len)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- dev_unset_last_byte(dev);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
- if (!bcache_flush(scan_bcache)) {
+ if (!io_flush(lvm_iom)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- dev_unset_last_byte(dev);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
- dev_unset_last_byte(dev);
+
+ if (put)
+ _put_dev(dev);
return true;
}
bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
{
+ int put = !dev->iodev;
+
if (test_mode())
return true;
- if (!scan_bcache) {
- log_error("dev_set_bytes bcache not set up %s", dev_name(dev));
+ if (!lvm_iom) {
+ log_error("dev_set_bytes io manager not set up %s", dev_name(dev));
return false;
}
- if (_in_bcache(dev) && !(dev->flags & DEV_BCACHE_WRITE)) {
- /* FIXME: avoid tossing out bcache blocks just to replace fd. */
- log_debug("Close and reopen to write %s", dev_name(dev));
- bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
- _scan_dev_close(dev);
- /* goes to label_scan_open() since bcache_fd < 0 */
- }
-
- if (dev->bcache_fd <= 0) {
- /* This is not often needed. */
- dev->flags |= DEV_BCACHE_WRITE;
- if (!label_scan_open(dev)) {
- log_error("Error opening device %s for writing at %llu length %u.",
- dev_name(dev), (unsigned long long)start, (uint32_t)len);
- return false;
- }
- }
-
- dev_set_last_byte(dev, start + len);
+ if (!_get_dev(dev, 0))
+ return false;
- if (!bcache_set_bytes(scan_bcache, dev->bcache_fd, start, len, val)) {
+ if (!io_set_bytes(lvm_iom, dev->iodev, start, len, val)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- dev_unset_last_byte(dev);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
- if (!bcache_flush(scan_bcache)) {
+ if (!io_flush(lvm_iom)) {
log_error("Error writing device %s at %llu length %u.",
dev_name(dev), (unsigned long long)start, (uint32_t)len);
- dev_unset_last_byte(dev);
- label_scan_invalidate(dev);
+ if (put)
+ _put_dev(dev);
return false;
}
- dev_unset_last_byte(dev);
+ if (put)
+ _put_dev(dev);
return true;
}
-void dev_set_last_byte(struct device *dev, uint64_t offset)
-{
- unsigned int physical_block_size = 0;
- unsigned int logical_block_size = 0;
- unsigned int bs;
-
- if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size)) {
- stack;
- return; /* FIXME: error path ? */
- }
-
- if ((physical_block_size == 512) && (logical_block_size == 512))
- bs = 512;
- else if ((physical_block_size == 4096) && (logical_block_size == 4096))
- bs = 4096;
- else if ((physical_block_size == 512) || (logical_block_size == 512)) {
- log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
- physical_block_size, logical_block_size);
- bs = 512;
- } else if ((physical_block_size == 4096) || (logical_block_size == 4096)) {
- log_debug("Set last byte mixed block sizes physical %u logical %u using 4096",
- physical_block_size, logical_block_size);
- bs = 4096;
- } else {
- log_debug("Set last byte mixed block sizes physical %u logical %u using 512",
- physical_block_size, logical_block_size);
- bs = 512;
- }
-
- bcache_set_last_byte(scan_bcache, dev->bcache_fd, offset, bs);
-}
-
-void dev_unset_last_byte(struct device *dev)
-{
- bcache_unset_last_byte(scan_bcache, dev->bcache_fd);
-}
-
diff --git a/lib/label/label.h b/lib/label/label.h
index f06b7df63..b60f9b5e1 100644
--- a/lib/label/label.h
+++ b/lib/label/label.h
@@ -18,7 +18,7 @@
#include "lib/uuid/uuid.h"
#include "lib/device/device.h"
-#include "lib/device/bcache.h"
+#include "lib/device/io-manager.h"
#define LABEL_ID "LABELONE"
#define LABEL_SIZE SECTOR_SIZE /* Think very carefully before changing this */
@@ -100,7 +100,8 @@ int label_write(struct device *dev, struct label *label);
struct label *label_create(struct labeller *labeller);
void label_destroy(struct label *label);
-extern struct bcache *scan_bcache;
+extern struct io_manager *lvm_iom;
+extern int io_data_ready; /* set once io manager is populated with device blocks */
int label_scan(struct cmd_context *cmd);
int label_scan_devs(struct cmd_context *cmd, struct dev_filter *f, struct dm_list *devs);
@@ -113,20 +114,17 @@ void label_scan_destroy(struct cmd_context *cmd);
int label_read(struct device *dev);
int label_read_sector(struct device *dev, uint64_t scan_sector);
void label_scan_confirm(struct device *dev);
-int label_scan_setup_bcache(void);
+int label_scan_setup_io_manager(void);
int label_scan_open(struct device *dev);
int label_scan_open_excl(struct device *dev);
int label_scan_open_rw(struct device *dev);
/*
- * Wrappers around bcache equivalents.
- * (these make it easier to disable bcache and revert to direct rw if needed)
+ * Wrappers around io-manager equivalents.
*/
bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data);
bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data);
bool dev_write_zeros(struct device *dev, uint64_t start, size_t len);
bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val);
-void dev_set_last_byte(struct device *dev, uint64_t offset);
-void dev_unset_last_byte(struct device *dev);
#endif
diff --git a/lib/metadata/mirror.c b/lib/metadata/mirror.c
index 845945728..73489a91e 100644
--- a/lib/metadata/mirror.c
+++ b/lib/metadata/mirror.c
@@ -302,14 +302,10 @@ static int _write_log_header(struct cmd_context *cmd, struct logical_volume *lv)
return 0;
}
- dev_set_last_byte(dev, sizeof(log_header));
-
if (!dev_write_bytes(dev, UINT64_C(0), sizeof(log_header), &log_header)) {
- dev_unset_last_byte(dev);
log_error("Failed to write log header to %s.", name);
return 0;
}
- dev_unset_last_byte(dev);
label_scan_invalidate(dev);
diff --git a/tools/pvck.c b/tools/pvck.c
index af2dd8e8a..a59240938 100644
--- a/tools/pvck.c
+++ b/tools/pvck.c
@@ -1126,7 +1126,7 @@ static int _dump_headers(struct cmd_context *cmd,
return ECMD_FAILED;
}
- label_scan_setup_bcache();
+ label_scan_setup_io_manager();
if (!_dump_label_and_pv_header(cmd, 1, dev, NULL,
&mda1_offset, &mda1_size, &mda2_offset, &mda2_size))
@@ -1199,7 +1199,7 @@ static int _dump_metadata(struct cmd_context *cmd,
return ECMD_FAILED;
}
- label_scan_setup_bcache();
+ label_scan_setup_io_manager();
if (!_dump_label_and_pv_header(cmd, 0, dev, NULL,
&mda1_offset, &mda1_size, &mda2_offset, &mda2_size))
@@ -1324,7 +1324,7 @@ int pvck(struct cmd_context *cmd, int argc, char **argv)
if (arg_is_set(cmd, labelsector_ARG))
labelsector = arg_uint64_value(cmd, labelsector_ARG, UINT64_C(0));
- label_scan_setup_bcache();
+ label_scan_setup_io_manager();
for (i = 0; i < argc; i++) {
pv_name = argv[i];