summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2017-02-24 01:57:04 +0100
committerHeinz Mauelshagen <heinzm@redhat.com>2017-02-24 05:20:58 +0100
commit92691e345d12515d61e6e7748e5b9ddc4eff49c0 (patch)
tree46e45ad34f24a88dea6b9b77090f8caac13b16b8
parentc1865b0a864e0623b2395c96e7853e4637e46772 (diff)
downloadlvm2-92691e345d12515d61e6e7748e5b9ddc4eff49c0.tar.gz
lvconvert: add infrastructure for RaidLV reshaping support
In order to support striped raid5/6/10 LV reshaping (change of LV type, stripesize or number of legs), this patch introduces more local infrastructure to raid_manip.c used by followup patches. Changes: - add lv_raid_data_copies returning raid type specific number; needed for raid10 with more than 2 data copies - remove _shift_and_rename_image_components() constraint to support more than 10 raid legs - add function to calculate total rimage length used by out-of-place reshape space allocation - add out-of-place reshape space alloc/relocate/free functions - move _data_rimages_count() used by reshape space alloc/realocate functions Related: rhbz834579 Related: rhbz1191935 Related: rhbz1191978
-rw-r--r--lib/metadata/lv_manip.c2
-rw-r--r--lib/metadata/raid_manip.c470
-rw-r--r--lib/raid/raid.c2
3 files changed, 443 insertions, 31 deletions
diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c
index f3cad0977..b98391b8e 100644
--- a/lib/metadata/lv_manip.c
+++ b/lib/metadata/lv_manip.c
@@ -957,7 +957,7 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
seg->stripe_size = stripe_size;
seg->area_count = area_count;
seg->area_len = area_len;
- seg->data_copies = data_copies ? : 0; // lv_raid_data_copies(segtype, area_count);
+ seg->data_copies = data_copies ? : lv_raid_data_copies(segtype, area_count);
seg->chunk_size = chunk_size;
seg->region_size = region_size;
seg->extents_copied = extents_copied;
diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c
index 70e0a55bd..b2a557293 100644
--- a/lib/metadata/raid_manip.c
+++ b/lib/metadata/raid_manip.c
@@ -708,12 +708,11 @@ static int _reorder_raid10_near_seg_areas(struct lv_segment *seg, enum raid0_rai
*
* Returns: 1 on success, 0 on failure
*/
+static char *_generate_raid_name(struct logical_volume *lv,
+ const char *suffix, int count);
static int _shift_and_rename_image_components(struct lv_segment *seg)
{
- int len;
- char *shift_name;
uint32_t s, missing;
- struct cmd_context *cmd = seg->lv->vg->cmd;
/*
* All LVs must be properly named for their index before
@@ -724,21 +723,12 @@ static int _shift_and_rename_image_components(struct lv_segment *seg)
if (!seg_is_raid(seg))
return_0;
- if (seg->area_count > 10) {
- /*
- * FIXME: Handling more would mean I'd have
- * to handle double digits
- */
- log_error("Unable handle arrays with more than 10 devices.");
- return 0;
- }
-
log_very_verbose("Shifting images in %s.", display_lvname(seg->lv));
for (s = 0, missing = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) == AREA_UNASSIGNED) {
if (seg_metatype(seg, s) != AREA_UNASSIGNED) {
- log_error(INTERNAL_ERROR "Metadata segment area"
+ log_error(INTERNAL_ERROR "Metadata segment area."
" #%d should be AREA_UNASSIGNED.", s);
return 0;
}
@@ -753,24 +743,16 @@ static int _shift_and_rename_image_components(struct lv_segment *seg)
display_lvname(seg_lv(seg, s)), missing);
/* Alter rmeta name */
- shift_name = dm_pool_strdup(cmd->mem, seg_metalv(seg, s)->name);
- if (!shift_name) {
+ if (!(seg_metalv(seg, s)->name = _generate_raid_name(seg->lv, "rmeta", s - missing))) {
log_error("Memory allocation failed.");
return 0;
}
- len = strlen(shift_name) - 1;
- shift_name[len] -= missing;
- seg_metalv(seg, s)->name = shift_name;
/* Alter rimage name */
- shift_name = dm_pool_strdup(cmd->mem, seg_lv(seg, s)->name);
- if (!shift_name) {
+ if (!(seg_lv(seg, s)->name = _generate_raid_name(seg->lv, "rimage", s - missing))) {
log_error("Memory allocation failed.");
return 0;
}
- len = strlen(shift_name) - 1;
- shift_name[len] -= missing;
- seg_lv(seg, s)->name = shift_name;
seg->areas[s - missing] = seg->areas[s];
seg->meta_areas[s - missing] = seg->meta_areas[s];
@@ -1030,6 +1012,442 @@ uint32_t raid_rimage_extents(const struct segment_type *segtype,
return r > UINT_MAX ? 0 : (uint32_t) r;
}
+/* Return number of data copies for @segtype */
+uint32_t lv_raid_data_copies(const struct segment_type *segtype, uint32_t area_count)
+{
+ if (segtype_is_any_raid10(segtype))
+ /* FIXME: change for variable number of data copies */
+ return 2;
+ else if (segtype_is_mirrored(segtype))
+ return area_count;
+ else if (segtype_is_striped_raid(segtype))
+ return segtype->parity_devs + 1;
+ return 1;
+}
+
+
+/* Return data images count for @total_rimages depending on @seg's type */
+static uint32_t _data_rimages_count(const struct lv_segment *seg, const uint32_t total_rimages)
+{
+ if (!seg_is_thin(seg) && total_rimages <= seg->segtype->parity_devs)
+ return_0;
+
+ return total_rimages - seg->segtype->parity_devs;
+}
+
+/* Get total area len of @lv, i.e. sum of area_len of all segments */
+static uint32_t _lv_total_rimage_len(struct logical_volume *lv)
+{
+ uint32_t s;
+ struct lv_segment *seg = first_seg(lv);
+
+ if (seg_is_raid(seg)) {
+ for (s = 0; s < seg->area_count; s++)
+ if (seg_lv(seg, s))
+ return seg_lv(seg, s)->le_count;
+ } else
+ return lv->le_count;
+
+ return_0;
+}
+
+/*
+ * HM helper:
+ *
+ * Compare the raid levels in segtype @t1 and @t2
+ *
+ * Return 1 if same, else 0
+ */
+static int _cmp_level(const struct segment_type *t1, const struct segment_type *t2)
+{
+ if ((segtype_is_any_raid10(t1) && !segtype_is_any_raid10(t2)) ||
+ (!segtype_is_any_raid10(t1) && segtype_is_any_raid10(t2)))
+ return 0;
+
+ return !strncmp(t1->name, t2->name, 5);
+}
+
+/*
+ * HM Helper:
+ *
+ * Check for same raid levels in segtype @t1 and @t2
+ *
+ * Return 1 if same, else != 1
+ */
+__attribute__ ((__unused__))
+static int is_same_level(const struct segment_type *t1, const struct segment_type *t2)
+{
+ return _cmp_level(t1, t2);
+}
+
+/* Return # of reshape LEs per device for @seg */
+static uint32_t _reshape_len_per_dev(struct lv_segment *seg)
+{
+ return seg->reshape_len;
+}
+
+/* Return # of reshape LEs per @lv (sum of all sub LVs reshape LEs) */
+static uint32_t _reshape_len_per_lv(struct logical_volume *lv)
+{
+ struct lv_segment *seg = first_seg(lv);
+
+ return _reshape_len_per_dev(seg) * _data_rimages_count(seg, seg->area_count);
+}
+
+/*
+ * HM Helper:
+ *
+ * store the allocated reshape length per data image
+ * in the only segment of the top-level RAID @lv and
+ * in the first segment of each sub lv.
+ */
+static int _lv_set_reshape_len(struct logical_volume *lv, uint32_t reshape_len)
+{
+ uint32_t s;
+ struct lv_segment *data_seg, *seg = first_seg(lv);
+
+ if (reshape_len >= lv->le_count - 1)
+ return_0;
+
+ seg->reshape_len = reshape_len;
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (!seg_lv(seg, s))
+ return_0;
+
+ reshape_len = seg->reshape_len;
+ dm_list_iterate_items(data_seg, &seg_lv(seg, s)->segments) {
+ data_seg->reshape_len = reshape_len;
+ reshape_len = 0;
+ }
+ }
+
+ return 1;
+}
+
+/* HM Helper:
+ *
+ * correct segments logical start extents in all sub LVs of @lv
+ * after having reordered any segments in sub LVs e.g. because of
+ * reshape space (re)allocation.
+ */
+static int _lv_set_image_lvs_start_les(struct logical_volume *lv)
+{
+ uint32_t le, s;
+ struct lv_segment *data_seg, *seg = first_seg(lv);
+
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (!seg_lv(seg, s))
+ return_0;
+
+ le = 0;
+ dm_list_iterate_items(data_seg, &(seg_lv(seg, s)->segments)) {
+ data_seg->reshape_len = le ? 0 : seg->reshape_len;
+ data_seg->le = le;
+ le += data_seg->len;
+ }
+ }
+
+ /* Try merging rimage sub LV segments _after_ adjusting start LEs */
+ for (s = 0; s < seg->area_count; s++)
+ if (!lv_merge_segments(seg_lv(seg, s)))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Relocate @out_of_place_les_per_disk from @lv's data images begin <-> end depending on @where
+ *
+ * @where:
+ * alloc_begin: end -> begin
+ * alloc_end: begin -> end
+ */
+enum alloc_where { alloc_begin, alloc_end, alloc_anywhere, alloc_none };
+static int _lv_relocate_reshape_space(struct logical_volume *lv, enum alloc_where where)
+{
+ uint32_t le, begin, end, s;
+ struct logical_volume *dlv;
+ struct dm_list *insert;
+ struct lv_segment *data_seg, *seg = first_seg(lv);
+
+ if (!_reshape_len_per_dev(seg))
+ return_0;
+
+ /*
+ * Move the reshape LEs of each stripe (i.e. the data image sub lv)
+ * in the first/last segment(s) across to the opposite end of the
+ * address space
+ */
+ for (s = 0; s < seg->area_count; s++) {
+ if (!(dlv = seg_lv(seg, s)))
+ return_0;
+
+ switch (where) {
+ case alloc_begin:
+ /* Move to the beginning -> start moving to the beginning from "end - reshape LEs" to end */
+ begin = dlv->le_count - _reshape_len_per_dev(seg);
+ end = dlv->le_count;
+ break;
+ case alloc_end:
+ /* Move to the end -> start moving to the end from 0 and end with reshape LEs */
+ begin = 0;
+ end = _reshape_len_per_dev(seg);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "bogus reshape space reallocation request [%d]", where);
+ return 0;
+ }
+
+ /* Ensure segment boundary at begin/end of reshape space */
+ if (!lv_split_segment(dlv, begin ?: end))
+ return_0;
+
+ /* Select destination to move to (begin/end) */
+ insert = begin ? dlv->segments.n : &dlv->segments;
+ if (!(data_seg = find_seg_by_le(dlv, begin)))
+ return_0;
+
+ le = begin;
+ while (le < end) {
+ struct dm_list *n = data_seg->list.n;
+
+ le += data_seg->len;
+
+ dm_list_move(insert, &data_seg->list);
+
+ /* If moving to the begin, adjust insertion point so that we don't reverse order */
+ if (begin)
+ insert = data_seg->list.n;
+
+ data_seg = dm_list_item(n, struct lv_segment);
+ }
+
+ le = 0;
+ dm_list_iterate_items(data_seg, &dlv->segments) {
+ data_seg->reshape_len = le ? 0 : _reshape_len_per_dev(seg);
+ data_seg->le = le;
+ le += data_seg->len;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Check if we've got out of space reshape
+ * capacity in @lv and allocate if necessary.
+ *
+ * We inquire the targets status interface to retrieve
+ * the current data_offset and the device size and
+ * compare that to the size of the component image LV
+ * to tell if an extension of the LV is needed or
+ * existing space can just be used,
+ *
+ * Three different scenarios need to be covered:
+ *
+ * - we have to reshape forwards
+ * (true for adding disks to a raid set) ->
+ * add extent to each component image upfront
+ * or move an existing one at the end across;
+ * kernel will set component devs data_offset to
+ * the passed in one and new_data_offset to 0,
+ * i.e. the data starts at offset 0 after the reshape
+ *
+ * - we have to reshape backwards
+ * (true for removing disks form a raid set) ->
+ * add extent to each component image by the end
+ * or use already existing one from a previous reshape;
+ * kernel will leave the data_offset of each component dev
+ * at 0 and set new_data_offset to the passed in one,
+ * i.e. the data will be at offset new_data_offset != 0
+ * after the reshape
+ *
+ * - we are free to reshape either way
+ * (true for layout changes keeping number of disks) ->
+ * let the kernel identify free out of place reshape space
+ * and select the appropriate data_offset and reshape direction
+ *
+ * Kernel will always be told to put data offset
+ * on an extent boundary.
+ * When we convert to mappings outside MD ones such as linear,
+ * striped and mirror _and_ data_offset != 0, split the first segment
+ * and adjust the rest to remove the reshape space.
+ * If it's at the end, just lv_reduce() and set seg->reshape_len to 0.
+ *
+ * Does not write metadata!
+ */
+static int _lv_alloc_reshape_space(struct logical_volume *lv,
+ enum alloc_where where,
+ enum alloc_where *where_it_was,
+ struct dm_list *allocate_pvs)
+{
+ uint32_t out_of_place_les_per_disk;
+ uint64_t data_offset;
+ struct lv_segment *seg = first_seg(lv);
+
+ if (!seg->stripe_size)
+ return_0;
+
+ /* Ensure min out-of-place reshape space 1 MiB */
+ out_of_place_les_per_disk = max(2048U, (unsigned) seg->stripe_size);
+ out_of_place_les_per_disk = (uint32_t) max(out_of_place_les_per_disk / (unsigned long long) lv->vg->extent_size, 1ULL);
+
+ /* Get data_offset and dev_sectors from the kernel */
+ if (!lv_raid_data_offset(lv, &data_offset)) {
+ log_error("Can't get data offset and dev size for %s from kernel.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /*
+ * If we have reshape space allocated and it has to grow,
+ * relocate it to the end in case kernel says it is at the
+ * beginning in order to grow the LV.
+ */
+ if (_reshape_len_per_dev(seg)) {
+ if (out_of_place_les_per_disk > _reshape_len_per_dev(seg)) {
+ /* Kernel says data is at data_offset > 0 -> relocate reshape space at the begin to the end */
+ if (data_offset && !_lv_relocate_reshape_space(lv, alloc_end))
+ return_0;
+
+ data_offset = 0;
+ out_of_place_les_per_disk -= _reshape_len_per_dev(seg);
+ } else
+ out_of_place_les_per_disk = 0;
+ }
+
+ /*
+ * If we don't reshape space allocated extend the LV.
+ *
+ * first_seg(lv)->reshape_len (only segment of top level raid LV)
+ * is accounting for the data rimages so that unchanged
+ * lv_extend()/lv_reduce() can be used to allocate/free,
+ * because seg->len etc. still holds the whole size as before
+ * including the reshape space
+ */
+ if (out_of_place_les_per_disk) {
+ uint32_t data_rimages = _data_rimages_count(seg, seg->area_count);
+ uint32_t reshape_len = out_of_place_les_per_disk * data_rimages;
+ uint32_t prev_rimage_len = _lv_total_rimage_len(lv);
+ uint64_t lv_size = lv->size;
+
+ if (!lv_extend(lv, seg->segtype, data_rimages,
+ seg->stripe_size, 1, seg->region_size,
+ reshape_len /* # of reshape LEs to add */,
+ allocate_pvs, lv->alloc, 0)) {
+ log_error("Failed to allocate out-of-place reshape space for %s.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ lv->size = lv_size;
+ /* pay attention to lv_extend maybe having allocated more because of layout specific rounding */
+ if (!_lv_set_reshape_len(lv, _lv_total_rimage_len(lv) - prev_rimage_len))
+ return 0;
+ }
+
+ /* Preset data offset in case we fail relocating reshape space below */
+ seg->data_offset = 0;
+
+ /*
+ * Handle reshape space relocation
+ */
+ switch (where) {
+ case alloc_begin:
+ /* Kernel says data is at data_offset == 0 -> relocate reshape space at the end to the begin */
+ if (!data_offset && !_lv_relocate_reshape_space(lv, where))
+ return_0;
+ break;
+
+ case alloc_end:
+ /* Kernel says data is at data_offset > 0 -> relocate reshape space at the begin to the end */
+ if (data_offset && !_lv_relocate_reshape_space(lv, where))
+ return_0;
+ break;
+
+ case alloc_anywhere:
+ /* We don't care where the space is, kernel will just toggle data_offset accordingly */
+ break;
+
+ default:
+ log_error(INTERNAL_ERROR "Bogus reshape space allocation request.");
+ return 0;
+ }
+
+ if (where_it_was)
+ *where_it_was = data_offset ? alloc_begin : alloc_end;
+
+ /* Inform kernel about the reshape length in sectors */
+ seg->data_offset = _reshape_len_per_dev(seg) * lv->vg->extent_size;
+
+ return _lv_set_image_lvs_start_les(lv);
+}
+
+/* Remove any reshape space from the data LVs of @lv */
+static int _lv_free_reshape_space_with_status(struct logical_volume *lv, enum alloc_where *where_it_was)
+{
+ uint32_t total_reshape_len;
+ struct lv_segment *seg = first_seg(lv);
+
+ if ((total_reshape_len = _reshape_len_per_lv(lv))) {
+ enum alloc_where where;
+ /*
+ * raid10:
+ *
+ * the allocator will have added times #data_copies stripes,
+ * so we need to lv_reduce() less visible size.
+ */
+ if (seg_is_any_raid10(seg)) {
+ if (total_reshape_len % seg->data_copies)
+ return_0;
+
+ total_reshape_len /= seg->data_copies;
+ }
+
+ /*
+ * Got reshape space on request to free it.
+ *
+ * If it happens to be at the beginning of
+ * the data LVs, remap it to the end in order
+ * to be able to free it via lv_reduce().
+ */
+ if (!_lv_alloc_reshape_space(lv, alloc_end, &where, NULL))
+ return_0;
+
+ seg->extents_copied = first_seg(lv)->area_len;
+ if (!lv_reduce(lv, total_reshape_len))
+ return_0;
+
+ seg->extents_copied = first_seg(lv)->area_len;
+
+ if (!_lv_set_reshape_len(lv, 0))
+ return 0;
+
+ /*
+ * Only in case reshape space was freed at the beginning,
+ * which is indicated by "where == alloc_begin",
+ * tell kernel to adjust data_offsets on raid devices to 0.
+ *
+ * The special, unused value '1' for seg->data_offset will cause
+ * "data_offset 0" to be emitted in the segment line.
+ */
+ seg->data_offset = (where == alloc_begin) ? 1 : 0;
+
+ } else if (where_it_was)
+ *where_it_was = alloc_none;
+
+ return 1;
+}
+
+__attribute__ ((__unused__))
+static int _lv_free_reshape_space(struct logical_volume *lv)
+{
+ return _lv_free_reshape_space_with_status(lv, NULL);
+}
+
+
/*
* _alloc_rmeta_for_lv
* @lv
@@ -4088,12 +4506,6 @@ static takeover_fn_t _get_takeover_fn(const struct lv_segment *seg, const struct
return _takeover_fns[_segtype_ix(seg->segtype, seg->area_count)][_segtype_ix(new_segtype, new_image_count)];
}
-/* Number of data (not parity) rimages */
-static uint32_t _data_rimages_count(const struct lv_segment *seg, const uint32_t total_rimages)
-{
- return total_rimages - seg->segtype->parity_devs;
-}
-
/*
* Determine whether data_copies, stripes, stripe_size are
* possible for conversion from seg_from to new_segtype.
diff --git a/lib/raid/raid.c b/lib/raid/raid.c
index 398e8b004..59db43793 100644
--- a/lib/raid/raid.c
+++ b/lib/raid/raid.c
@@ -171,7 +171,7 @@ static int _raid_text_import(struct lv_segment *seg,
}
if (seg->data_copies < 2)
- seg->data_copies = 0; // lv_raid_data_copies(seg->segtype, seg->area_count);
+ seg->data_copies = lv_raid_data_copies(seg->segtype, seg->area_count);
if (seg_is_any_raid0(seg))
seg->area_len /= seg->area_count;