summaryrefslogtreecommitdiff
path: root/device_mapper
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2019-11-20 16:07:27 -0600
committerDavid Teigland <teigland@redhat.com>2020-04-15 12:10:32 -0500
commitd9e8895a96539d75166c0f74e58f5ed4e729e551 (patch)
tree31a570fa5dfee258072f553af32cd66988eb16a8 /device_mapper
parentb6b4ad8e28eff7476cb04c4cb93312b06605b82f (diff)
downloadlvm2-d9e8895a96539d75166c0f74e58f5ed4e729e551.tar.gz
Allow dm-integrity to be used for raid images
dm-integrity stores checksums of the data written to an LV, and returns an error if data read from the LV does not match the previously saved checksum. When used on raid images, dm-raid will correct the error by reading the block from another image, and the device user sees no error. The integrity metadata (checksums) are stored on an internal LV allocated by lvm for each linear image. The internal LV is allocated on the same PV as the image. Create a raid LV with an integrity layer over each raid image (for raid levels 1,4,5,6,10): lvcreate --type raidN --raidintegrity y [options] Add an integrity layer to images of an existing raid LV: lvconvert --raidintegrity y LV Remove the integrity layer from images of a raid LV: lvconvert --raidintegrity n LV Settings Use --raidintegritymode journal|bitmap (journal is default) to configure the method used by dm-integrity to ensure crash consistency. Initialization When integrity is added to an LV, the kernel needs to initialize the integrity metadata/checksums for all blocks in the LV. The data corruption checking performed by dm-integrity will only operate on areas of the LV that are already initialized. The progress of integrity initialization is reported by the "syncpercent" LV reporting field (and under the Cpy%Sync lvs column.) Example: create a raid1 LV with integrity: $ lvcreate --type raid1 -m1 --raidintegrity y -n rr -L1G foo Creating integrity metadata LV rr_rimage_0_imeta with size 12.00 MiB. Logical volume "rr_rimage_0_imeta" created. Creating integrity metadata LV rr_rimage_1_imeta with size 12.00 MiB. Logical volume "rr_rimage_1_imeta" created. Logical volume "rr" created. $ lvs -a foo LV VG Attr LSize Origin Cpy%Sync rr foo rwi-a-r--- 1.00g 4.93 [rr_rimage_0] foo gwi-aor--- 1.00g [rr_rimage_0_iorig] 41.02 [rr_rimage_0_imeta] foo ewi-ao---- 12.00m [rr_rimage_0_iorig] foo -wi-ao---- 1.00g [rr_rimage_1] foo gwi-aor--- 1.00g [rr_rimage_1_iorig] 39.45 [rr_rimage_1_imeta] foo ewi-ao---- 12.00m [rr_rimage_1_iorig] foo -wi-ao---- 1.00g [rr_rmeta_0] foo ewi-aor--- 4.00m [rr_rmeta_1] foo ewi-aor--- 4.00m
Diffstat (limited to 'device_mapper')
-rw-r--r--device_mapper/all.h39
-rw-r--r--device_mapper/ioctl/libdm-iface.c31
-rw-r--r--device_mapper/ioctl/libdm-targets.h1
-rw-r--r--device_mapper/libdm-deptree.c154
-rw-r--r--device_mapper/libdm-targets.c27
5 files changed, 245 insertions, 7 deletions
diff --git a/device_mapper/all.h b/device_mapper/all.h
index b23485f00..f00b6a5dc 100644
--- a/device_mapper/all.h
+++ b/device_mapper/all.h
@@ -234,6 +234,7 @@ int dm_task_suppress_identical_reload(struct dm_task *dmt);
int dm_task_secure_data(struct dm_task *dmt);
int dm_task_retry_remove(struct dm_task *dmt);
int dm_task_deferred_remove(struct dm_task *dmt);
+void dm_task_skip_reload_params_compare(struct dm_task *dmt);
/*
* Record timestamp immediately after the ioctl returns.
@@ -392,6 +393,15 @@ struct dm_status_writecache {
int dm_get_status_writecache(struct dm_pool *mem, const char *params,
struct dm_status_writecache **status);
+struct dm_status_integrity {
+ uint64_t number_of_mismatches;
+ uint64_t provided_data_sectors;
+ uint64_t recalc_sector;
+};
+
+int dm_get_status_integrity(struct dm_pool *mem, const char *params,
+ struct dm_status_integrity **status);
+
/*
* Parse params from STATUS call for snapshot target
*
@@ -970,6 +980,35 @@ int dm_tree_node_add_writecache_target(struct dm_tree_node *node,
uint32_t writecache_block_size,
struct writecache_settings *settings);
+struct integrity_settings {
+ char mode[8];
+ uint32_t tag_size;
+ uint32_t block_size; /* optional table param always set by lvm */
+ const char *internal_hash; /* optional table param always set by lvm */
+
+ uint32_t journal_sectors;
+ uint32_t interleave_sectors;
+ uint32_t buffer_sectors;
+ uint32_t journal_watermark;
+ uint32_t commit_time;
+ uint32_t bitmap_flush_interval;
+ uint64_t sectors_per_bit;
+
+ unsigned journal_sectors_set:1;
+ unsigned interleave_sectors_set:1;
+ unsigned buffer_sectors_set:1;
+ unsigned journal_watermark_set:1;
+ unsigned commit_time_set:1;
+ unsigned bitmap_flush_interval_set:1;
+ unsigned sectors_per_bit_set:1;
+};
+
+int dm_tree_node_add_integrity_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *meta_uuid,
+ struct integrity_settings *settings,
+ int recalculate);
/*
* VDO target
diff --git a/device_mapper/ioctl/libdm-iface.c b/device_mapper/ioctl/libdm-iface.c
index fe04af8bf..25e7d1a75 100644
--- a/device_mapper/ioctl/libdm-iface.c
+++ b/device_mapper/ioctl/libdm-iface.c
@@ -805,6 +805,11 @@ int dm_task_suppress_identical_reload(struct dm_task *dmt)
return 1;
}
+void dm_task_skip_reload_params_compare(struct dm_task *dmt)
+{
+ dmt->skip_reload_params_compare = 1;
+}
+
int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node)
{
switch (add_node) {
@@ -1575,11 +1580,29 @@ static int _reload_with_suppression_v4(struct dm_task *dmt)
len = strlen(t2->params);
while (len-- > 0 && t2->params[len] == ' ')
t2->params[len] = '\0';
- if ((t1->start != t2->start) ||
- (t1->length != t2->length) ||
- (strcmp(t1->type, t2->type)) ||
- (strcmp(t1->params, t2->params)))
+
+ if (t1->start != t2->start) {
+ log_debug("reload %u:%u start diff", task->major, task->minor);
+ goto no_match;
+ }
+ if (t1->length != t2->length) {
+ log_debug("reload %u:%u length diff", task->major, task->minor);
goto no_match;
+ }
+ if (strcmp(t1->type, t2->type)) {
+ log_debug("reload %u:%u type diff %s %s", task->major, task->minor, t1->type, t2->type);
+ goto no_match;
+ }
+ if (strcmp(t1->params, t2->params)) {
+ if (dmt->skip_reload_params_compare)
+ log_debug("reload %u:%u skip params ignore %s %s",
+ task->major, task->minor, t1->params, t2->params);
+ else {
+ log_debug("reload %u:%u params diff", task->major, task->minor);
+ goto no_match;
+ }
+ }
+
t1 = t1->next;
t2 = t2->next;
}
diff --git a/device_mapper/ioctl/libdm-targets.h b/device_mapper/ioctl/libdm-targets.h
index b5b20d5e9..9786a7eda 100644
--- a/device_mapper/ioctl/libdm-targets.h
+++ b/device_mapper/ioctl/libdm-targets.h
@@ -59,6 +59,7 @@ struct dm_task {
int skip_lockfs;
int query_inactive_table;
int suppress_identical_reload;
+ int skip_reload_params_compare;
dm_add_node_t add_node;
uint64_t existing_table_size;
int cookie_set;
diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c
index 7fac6ab20..9ba24cbbf 100644
--- a/device_mapper/libdm-deptree.c
+++ b/device_mapper/libdm-deptree.c
@@ -38,6 +38,7 @@ enum {
SEG_STRIPED,
SEG_ZERO,
SEG_WRITECACHE,
+ SEG_INTEGRITY,
SEG_THIN_POOL,
SEG_THIN,
SEG_VDO,
@@ -78,6 +79,7 @@ static const struct {
{ SEG_STRIPED, "striped" },
{ SEG_ZERO, "zero"},
{ SEG_WRITECACHE, "writecache"},
+ { SEG_INTEGRITY, "integrity"},
{ SEG_THIN_POOL, "thin-pool"},
{ SEG_THIN, "thin"},
{ SEG_VDO, "vdo" },
@@ -221,6 +223,11 @@ struct load_segment {
int writecache_pmem; /* writecache, 1 if pmem, 0 if ssd */
uint32_t writecache_block_size; /* writecache, in bytes */
struct writecache_settings writecache_settings; /* writecache */
+
+ uint64_t integrity_data_sectors; /* integrity (provided_data_sectors) */
+ struct dm_tree_node *integrity_meta_node; /* integrity */
+ struct integrity_settings integrity_settings; /* integrity */
+ int integrity_recalculate; /* integrity */
};
/* Per-device properties */
@@ -268,6 +275,16 @@ struct load_properties {
unsigned delay_resume_if_extended;
/*
+ * When comparing table lines to decide if a reload is
+ * needed, ignore any differences betwen the lvm device
+ * params and the kernel-reported device params.
+ * dm-integrity reports many internal parameters on the
+ * table line when lvm does not explicitly set them,
+ * causing lvm and the kernel to have differing params.
+ */
+ unsigned skip_reload_params_compare;
+
+ /*
* Call node_send_messages(), set to 2 if there are messages
* When != 0, it validates matching transaction id, thus thin-pools
* where transation_id is passed as 0 are never validated, this
@@ -2705,6 +2722,84 @@ static int _writecache_emit_segment_line(struct dm_task *dmt,
return 1;
}
+static int _integrity_emit_segment_line(struct dm_task *dmt,
+ struct load_segment *seg,
+ char *params, size_t paramsize)
+{
+ struct integrity_settings *set = &seg->integrity_settings;
+ int pos = 0;
+ int count;
+ char origin_dev[DM_FORMAT_DEV_BUFSIZE];
+ char meta_dev[DM_FORMAT_DEV_BUFSIZE];
+
+ if (!_build_dev_string(origin_dev, sizeof(origin_dev), seg->origin))
+ return_0;
+
+ if (seg->integrity_meta_node &&
+ !_build_dev_string(meta_dev, sizeof(meta_dev), seg->integrity_meta_node))
+ return_0;
+
+ count = 3; /* block_size, internal_hash, fix_padding options are always passed */
+
+ if (seg->integrity_meta_node)
+ count++;
+
+ if (seg->integrity_recalculate)
+ count++;
+
+ if (set->journal_sectors_set)
+ count++;
+ if (set->interleave_sectors_set)
+ count++;
+ if (set->buffer_sectors_set)
+ count++;
+ if (set->journal_watermark_set)
+ count++;
+ if (set->commit_time_set)
+ count++;
+ if (set->bitmap_flush_interval_set)
+ count++;
+ if (set->sectors_per_bit_set)
+ count++;
+
+ EMIT_PARAMS(pos, "%s 0 %u %s %d fix_padding block_size:%u internal_hash:%s",
+ origin_dev,
+ set->tag_size,
+ set->mode,
+ count,
+ set->block_size,
+ set->internal_hash);
+
+ if (seg->integrity_meta_node)
+ EMIT_PARAMS(pos, " meta_device:%s", meta_dev);
+
+ if (seg->integrity_recalculate)
+ EMIT_PARAMS(pos, " recalculate");
+
+ if (set->journal_sectors_set)
+ EMIT_PARAMS(pos, " journal_sectors:%u", set->journal_sectors);
+
+ if (set->interleave_sectors_set)
+ EMIT_PARAMS(pos, " ineterleave_sectors:%u", set->interleave_sectors);
+
+ if (set->buffer_sectors_set)
+ EMIT_PARAMS(pos, " buffer_sectors:%u", set->buffer_sectors);
+
+ if (set->journal_watermark_set)
+ EMIT_PARAMS(pos, " journal_watermark:%u", set->journal_watermark);
+
+ if (set->commit_time_set)
+ EMIT_PARAMS(pos, " commit_time:%u", set->commit_time);
+
+ if (set->bitmap_flush_interval_set)
+ EMIT_PARAMS(pos, " bitmap_flush_interval:%u", set->bitmap_flush_interval);
+
+ if (set->sectors_per_bit_set)
+ EMIT_PARAMS(pos, " sectors_per_bit:%llu", (unsigned long long)set->sectors_per_bit);
+
+ return 1;
+}
+
static int _thin_pool_emit_segment_line(struct dm_task *dmt,
struct load_segment *seg,
char *params, size_t paramsize)
@@ -2889,6 +2984,10 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
if (!_writecache_emit_segment_line(dmt, seg, params, paramsize))
return_0;
break;
+ case SEG_INTEGRITY:
+ if (!_integrity_emit_segment_line(dmt, seg, params, paramsize))
+ return_0;
+ break;
}
switch(seg->type) {
@@ -2901,6 +3000,7 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
case SEG_THIN:
case SEG_CACHE:
case SEG_WRITECACHE:
+ case SEG_INTEGRITY:
break;
case SEG_CRYPT:
case SEG_LINEAR:
@@ -3005,6 +3105,9 @@ static int _load_node(struct dm_tree_node *dnode)
if (!dm_task_suppress_identical_reload(dmt))
log_warn("WARNING: Failed to suppress reload of identical tables.");
+ if (dnode->props.skip_reload_params_compare)
+ dm_task_skip_reload_params_compare(dmt);
+
if ((r = dm_task_run(dmt))) {
r = dm_task_get_info(dmt, &dnode->info);
if (r && !dnode->info.inactive_table)
@@ -3023,8 +3126,8 @@ static int _load_node(struct dm_tree_node *dnode)
if (!existing_table_size && dnode->props.delay_resume_if_new)
dnode->props.size_changed = 0;
- log_debug_activation("Table size changed from %" PRIu64 " to %"
- PRIu64 " for %s.%s", existing_table_size,
+ log_debug_activation("Table size changed from %" PRIu64 " to %" PRIu64 " for %s.%s",
+ existing_table_size,
seg_start, _node_name(dnode),
dnode->props.size_changed ? "" : " (Ignoring.)");
@@ -3136,7 +3239,10 @@ int dm_tree_preload_children(struct dm_tree_node *dnode,
}
/* No resume for a device without parents or with unchanged or smaller size */
- if (!dm_tree_node_num_children(child, 1) || (child->props.size_changed <= 0))
+ if (!dm_tree_node_num_children(child, 1))
+ continue;
+
+ if (child->props.size_changed <= 0)
continue;
if (!child->info.inactive_table && !child->info.suspended)
@@ -3738,6 +3844,48 @@ int dm_tree_node_add_writecache_target(struct dm_tree_node *node,
return 1;
}
+int dm_tree_node_add_integrity_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *meta_uuid,
+ struct integrity_settings *settings,
+ int recalculate)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _add_segment(node, SEG_INTEGRITY, size)))
+ return_0;
+
+ if (!meta_uuid) {
+ log_error("No integrity meta uuid.");
+ return 0;
+ }
+
+ if (!(seg->integrity_meta_node = dm_tree_find_node_by_uuid(node->dtree, meta_uuid))) {
+ log_error("Missing integrity's meta uuid %s.", meta_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, seg->integrity_meta_node))
+ return_0;
+
+ if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree, origin_uuid))) {
+ log_error("Missing integrity's origin uuid %s.", origin_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, seg->origin))
+ return_0;
+
+ memcpy(&seg->integrity_settings, settings, sizeof(struct integrity_settings));
+
+ seg->integrity_recalculate = recalculate;
+
+ node->props.skip_reload_params_compare = 1;
+
+ return 1;
+}
+
int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
uint64_t size,
const char *rlog_uuid,
diff --git a/device_mapper/libdm-targets.c b/device_mapper/libdm-targets.c
index 86cb84713..bfe76c5ff 100644
--- a/device_mapper/libdm-targets.c
+++ b/device_mapper/libdm-targets.c
@@ -380,6 +380,33 @@ int dm_get_status_writecache(struct dm_pool *mem, const char *params,
return 1;
}
+int dm_get_status_integrity(struct dm_pool *mem, const char *params,
+ struct dm_status_integrity **status)
+{
+ struct dm_status_integrity *s;
+ char recalc_str[16] = "\0";
+
+ if (!(s = dm_pool_zalloc(mem, sizeof(*s))))
+ return_0;
+
+ if (sscanf(params, "%llu %llu %s",
+ (unsigned long long *)&s->number_of_mismatches,
+ (unsigned long long *)&s->provided_data_sectors,
+ recalc_str) != 3) {
+ log_error("Failed to parse integrity params: %s.", params);
+ dm_pool_free(mem, s);
+ return 0;
+ }
+
+ if (recalc_str[0] == '-')
+ s->recalc_sector = 0;
+ else
+ s->recalc_sector = strtoull(recalc_str, NULL, 0);
+
+ *status = s;
+ return 1;
+}
+
int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s)
{
int pos;