summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ANNOUNCE-3.133
-rw-r--r--Assemble.c29
-rw-r--r--ChangeLog10
-rw-r--r--Create.c24
-rw-r--r--Detail.c9
-rw-r--r--Grow.c1693
-rw-r--r--Manage.c24
-rw-r--r--ReadMe.c11
-rwxr-xr-xinventory4
-rw-r--r--md_p.h2
-rw-r--r--mdadm.8133
-rw-r--r--mdadm.c134
-rw-r--r--mdadm.h18
-rw-r--r--mdadm.spec2
-rw-r--r--mdassemble.82
-rw-r--r--mdassemble.c6
-rw-r--r--mdmon.82
-rw-r--r--restripe.c459
-rw-r--r--super-ddf.c15
-rw-r--r--super-intel.c5
-rw-r--r--super0.c58
-rw-r--r--super1.c52
-rw-r--r--sysfs.c58
-rw-r--r--test6
-rw-r--r--tests/00linear14
-rw-r--r--tests/00multipath8
-rw-r--r--tests/00raid014
-rw-r--r--tests/00raid18
-rw-r--r--tests/00raid102
-rw-r--r--tests/00raid44
-rw-r--r--tests/00raid510
-rw-r--r--tests/00raid66
-rw-r--r--tests/02lineargrow10
-rw-r--r--tests/02r1grow2
-rw-r--r--tests/02r5grow6
-rw-r--r--tests/02r6grow6
-rw-r--r--tests/03r0assem35
-rw-r--r--tests/03r5assem4
-rw-r--r--tests/03r5assemV12
-rw-r--r--tests/04r0update4
-rw-r--r--tests/05r1-bitmapfile6
-rw-r--r--tests/05r1-grow-external4
-rw-r--r--tests/05r1-grow-internal6
-rw-r--r--tests/05r1-grow-internal-12
-rw-r--r--tests/05r1-internalbitmap2
-rw-r--r--tests/05r1-internalbitmap-v1a2
-rw-r--r--tests/05r1-internalbitmap-v1b2
-rw-r--r--tests/05r1-internalbitmap-v1c2
-rw-r--r--tests/05r1-n3-bitmapfile2
-rw-r--r--tests/05r1-re-add10
-rw-r--r--tests/05r5-bitmapfile6
-rw-r--r--tests/05r5-internalbitmap8
-rw-r--r--tests/05r6-bitmapfile6
-rw-r--r--tests/06r5swap2
-rw-r--r--tests/06update-uuid4
-rw-r--r--tests/06wrmostly6
-rw-r--r--tests/07autoassemble10
-rw-r--r--tests/07autodetect6
-rw-r--r--tests/07changelevelintr60
-rw-r--r--tests/07changelevels107
-rw-r--r--tests/07layouts91
-rw-r--r--tests/07reshape5intr4
-rw-r--r--tests/07testreshape514
-rw-r--r--tests/10ddf-create8
-rw-r--r--util.c71
65 files changed, 2723 insertions, 642 deletions
diff --git a/ANNOUNCE-3.1 b/ANNOUNCE-3.1
new file mode 100644
index 0000000..343b85d
--- /dev/null
+++ b/ANNOUNCE-3.1
@@ -0,0 +1,33 @@
+Subject: ANNOUNCE: mdadm 3.1 - A tool for managing Soft RAID under Linux
+
+Hot on the heals of 3.0.3 I am pleased to announce the availability of
+ mdadm version 3.1
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+It contains significant feature enhancements over 3.0.x
+
+The brief change log is:
+ - Support --grow to change the layout of RAID4/5/6
+ - Support --grow to change the chunksize of raid 4/5/6
+ - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
+ back.
+ - Support --grow to reduce the number of devices in RAID4/5/6.
+ - Support restart of these grow options which assembling an array
+ which is partially grown.
+ - Assorted tests of this code, and of different RAID6 layouts.
+
+Note that a 2.6.31 or later is needed to have access to these.
+Reducing devices in a RAID4/5/6 requires 2.6.32.
+Changing RAID5 to RAID1 requires 2.6.33.
+
+You should only upgrade if you need to use, or which to test, these
+features.
+
+NeilBrown 22nd October 2009
diff --git a/Assemble.c b/Assemble.c
index 311666c..014d644 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -683,7 +683,7 @@ int Assemble(struct supertype *st, char *mddev,
> devices[most_recent].i.events)
most_recent = devcnt;
}
- if (content->array.level == -4)
+ if (content->array.level == LEVEL_MULTIPATH)
/* with multipath, the raid_disk from the superblock is meaningless */
i = devcnt;
else
@@ -776,8 +776,8 @@ int Assemble(struct supertype *st, char *mddev,
/* note: we ignore error flags in multipath arrays
* as they don't make sense
*/
- if (content->array.level != -4)
- if (!(devices[j].i.disk.state & (1<<MD_DISK_SYNC))) {
+ if (content->array.level != LEVEL_MULTIPATH)
+ if (!(devices[j].i.disk.state & (1<<MD_DISK_ACTIVE))) {
if (!(devices[j].i.disk.state
& (1<<MD_DISK_FAULTY)))
sparecnt++;
@@ -990,6 +990,10 @@ int Assemble(struct supertype *st, char *mddev,
if (content->reshape_active) {
int err = 0;
int *fdlist = malloc(sizeof(int)* bestcnt);
+ if (verbose > 0)
+ fprintf(stderr, Name ":%s has an active reshape - checking "
+ "if critical section needs to be restored\n",
+ chosen_name);
for (i=0; i<bestcnt; i++) {
int j = best[i];
if (j >= 0) {
@@ -1004,13 +1008,15 @@ int Assemble(struct supertype *st, char *mddev,
fdlist[i] = -1;
}
if (!err)
- err = Grow_restart(st, content, fdlist, bestcnt, backup_file);
+ err = Grow_restart(st, content, fdlist, bestcnt, backup_file, verbose > 0);
while (i>0) {
i--;
if (fdlist[i]>=0) close(fdlist[i]);
}
if (err) {
fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
+ if (backup_file == NULL)
+ fprintf(stderr," Possibly you needed to specify the --backup-file\n");
close(mdfd);
return err;
}
@@ -1119,7 +1125,20 @@ int Assemble(struct supertype *st, char *mddev,
content->array.layout, clean, avail, okcnt) &&
(okcnt >= req_cnt || start_partial_ok)
))) {
- if (ioctl(mdfd, RUN_ARRAY, NULL)==0) {
+ /* This array is good-to-go.
+ * If a reshape is in progress then we might need to
+ * continue monitoring it. In that case we start
+ * it read-only and let the grow code make it writable.
+ */
+ int rv;
+#ifndef MDASSEMBLE
+ if (content->reshape_active &&
+ content->delta_disks <= 0)
+ rv = Grow_continue(mdfd, st, content, backup_file);
+ else
+#endif
+ rv = ioctl(mdfd, RUN_ARRAY, NULL);
+ if (rv == 0) {
if (verbose >= 0) {
fprintf(stderr, Name ": %s has been started with %d drive%s",
mddev, okcnt, okcnt==1?"":"s");
diff --git a/ChangeLog b/ChangeLog
index f2e60ee..1719420 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,16 @@
Please see git logs for detailed change log.
This file just contains highlight.
+Changes Prior to release 3.1
+ - Support --grow to change the layout of RAID4/5/6
+ - Support --grow to change the chunksize of raid 4/5/6
+ - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
+ back.
+ - Support --grow to reduce the number of devices in RAID4/5/6.
+ - Support restart of these grow options which assembling an array
+ which is partially grown.
+ - Assorted tests of this code, and of different RAID6 layouts.
+
Changes Prior to release 3.0.3
- Improvements for creating arrays giving just a name, like 'foo',
rather than the full '/dev/md/foo'.
diff --git a/Create.c b/Create.c
index c96b319..5b01b63 100644
--- a/Create.c
+++ b/Create.c
@@ -195,7 +195,7 @@ int Create(struct supertype *st, char *mddev,
if (st && st->ss->external && sparedisks) {
fprintf(stderr,
Name ": This metadata type does not support "
- "spare disks are create time\n");
+ "spare disks at create time\n");
return 1;
}
if (subdevs > raiddisks+sparedisks) {
@@ -234,8 +234,15 @@ int Create(struct supertype *st, char *mddev,
case 10:
case 6:
case 0:
- case LEVEL_LINEAR: /* linear */
if (chunk == 0) {
+ chunk = 512;
+ if (verbose > 0)
+ fprintf(stderr, Name ": chunk size defaults to 512K\n");
+ }
+ break;
+ case LEVEL_LINEAR:
+ /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
+ if (get_linux_version() < 2006016 && chunk == 0) {
chunk = 64;
if (verbose > 0)
fprintf(stderr, Name ": chunk size defaults to 64K\n");
@@ -325,7 +332,7 @@ int Create(struct supertype *st, char *mddev,
raiddisks,
chunk, size*2, dname,
&freesize,
- verbose > 0)) {
+ verbose >= 0)) {
fprintf(stderr,
Name ": %s is not suitable for "
@@ -368,6 +375,17 @@ int Create(struct supertype *st, char *mddev,
warn |= check_ext2(fd, dname);
warn |= check_reiser(fd, dname);
warn |= check_raid(fd, dname);
+ if (st && strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1 &&
+ did_default &&
+ level == 1) {
+ warn = 1;
+ fprintf(stderr, Name ": Note: this array has metadata at the start and\n"
+ " may not be suitable as a boot device. If you plan to\n"
+ " store '/' or '/boot' on this device please ensure that\n"
+ " your boot-loader understands md/v1.x metadata, or use\n"
+ " --metadata=1.0\n");
+ }
close(fd);
}
}
diff --git a/Detail.c b/Detail.c
index 544cfdb..0e47a05 100644
--- a/Detail.c
+++ b/Detail.c
@@ -325,6 +325,10 @@ int Detail(char *dev, int brief, int export, int test, char *homehost)
c = map_num(r5layout, array.layout);
printf(" Layout : %s\n", c?c:"-unknown-");
}
+ if (array.level == 6) {
+ c = map_num(r6layout, array.layout);
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (array.level == 10) {
printf(" Layout :");
print_r10_layout(array.layout);
@@ -378,6 +382,11 @@ This is pretty boring
printf(" New Layout : %s\n",
c?c:"-unknown-");
}
+ if (info.new_level == 6) {
+ char *c = map_num(r6layout, info.new_layout);
+ printf(" New Layout : %s\n",
+ c?c:"-unknown-");
+ }
if (info.new_level == 10) {
printf(" New Layout : near=%d, %s=%d\n",
info.new_layout&255,
diff --git a/Grow.c b/Grow.c
index 1805604..7764bdb 100644
--- a/Grow.c
+++ b/Grow.c
@@ -23,6 +23,7 @@
*/
#include "mdadm.h"
#include "dlink.h"
+#include <sys/mman.h>
#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
#error no endian defined
@@ -30,6 +31,10 @@
#include "md_u.h"
#include "md_p.h"
+#ifndef offsetof
+#define offsetof(t,f) ((size_t)&(((t*)0)->f))
+#endif
+
int Grow_Add_device(char *devname, int fd, char *newdev)
{
/* Add a device to an active array.
@@ -376,23 +381,28 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
- * The superblock goes 1K form the end of the used space on the
+ * The superblock goes 4K from the end of the used space on the
* device.
* It if written after the backup is complete.
* It has the following structure.
*/
-struct mdp_backup_super {
- char magic[16]; /* md_backup_data-1 */
+static struct mdp_backup_super {
+ char magic[16]; /* md_backup_data-1 or -2 */
__u8 set_uuid[16];
__u64 mtime;
/* start/sizes in 512byte sectors */
- __u64 devstart;
+ __u64 devstart; /* address on backup device/file of data */
__u64 arraystart;
__u64 length;
__u32 sb_csum; /* csum of preceeding bytes. */
- __u8 pad[512-68];
-} __attribute__((aligned(512))) bsb;
+ __u32 pad1;
+ __u64 devstart2; /* offset in to data of second section */
+ __u64 arraystart2;
+ __u64 length2;
+ __u32 sb_csum2; /* csum of preceeding bytes. */
+ __u8 pad[512-68-32];
+} __attribute__((aligned(512))) bsb, bsb2;
int bsb_csum(char *buf, int len)
{
@@ -403,32 +413,107 @@ int bsb_csum(char *buf, int len)
return __cpu_to_le32(csum);
}
+static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets);
+static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets);
+static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ unsigned long long start,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets);
+
+int freeze_array(struct mdinfo *sra)
+{
+ /* Try to freeze resync on this array.
+ * Return -1 if the array is busy,
+ * return 0 if this kernel doesn't support 'frozen'
+ * return 1 if it worked.
+ */
+ char buf[20];
+ if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
+ return 0;
+ if (strcmp(buf, "idle\n") != 0 &&
+ strcmp(buf, "frozen\n") != 0)
+ return -1;
+ if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
+ return 0;
+ return 1;
+}
+
+void unfreeze_array(struct mdinfo *sra, int frozen)
+{
+ /* If 'frozen' is 1, unfreeze the array */
+ if (frozen > 0)
+ sysfs_set_str(sra, NULL, "sync_action", "idle");
+}
+
+void wait_reshape(struct mdinfo *sra)
+{
+ int fd = sysfs_get_fd(sra, NULL, "sync_action");
+ char action[20];
+
+ do {
+ fd_set rfds;
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ select(fd+1, NULL, NULL, &rfds, NULL);
+
+ if (sysfs_fd_get_str(fd, action, 20) < 0) {
+ close(fd);
+ return;
+ }
+ } while (strncmp(action, "reshape", 7) == 0);
+}
+
+
int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
- int level, int layout, int chunksize, int raid_disks)
+ int level, char *layout_str, int chunksize, int raid_disks)
{
/* Make some changes in the shape of an array.
* The kernel must support the change.
- * Different reshapes have subtly different meaning for different
- * levels, so we need to check the current state of the array
- * and go from there.
+ *
+ * There are three different changes. Each can trigger
+ * a resync or recovery so we freeze that until we have
+ * requested everything (if kernel supports freezing - 2.6.30).
+ * The steps are:
+ * - change size (i.e. component_size)
+ * - change level
+ * - change layout/chunksize/ndisks
+ *
+ * The last can require a reshape. It is different on different
+ * levels so we need to check the level before actioning it.
+ * Some times the level change needs to be requested after the
+ * reshape (e.g. raid6->raid5, raid5->raid0)
+ *
*/
- struct mdu_array_info_s array;
+ struct mdu_array_info_s array, orig;
char *c;
-
+ int rv = 0;
struct supertype *st;
- int nlevel, olevel;
int nchunk, ochunk;
int nlayout, olayout;
int ndisks, odisks;
int ndata, odata;
- unsigned long long nstripe, ostripe, last_block;
+ int orig_level = UnSet;
+ char alt_layout[40];
int *fdlist;
unsigned long long *offsets;
- int d, i, spares;
+ int d, i;
int nrdisks;
int err;
+ int frozen;
+ unsigned long a,b, blocks, stripes;
+ int cache;
+ unsigned long long array_size;
+ int changed = 0;
+ int done;
struct mdinfo *sra;
struct mdinfo *sd;
@@ -438,127 +523,306 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
devname);
return 1;
}
+
+ if (size >= 0 &&
+ (chunksize || level!= UnSet || layout_str || raid_disks)) {
+ fprintf(stderr, Name ": cannot change component size at the same time "
+ "as other changes.\n"
+ " Change size first, then check data is intact before "
+ "making other changes.\n");
+ return 1;
+ }
+
+ if (raid_disks && raid_disks < array.raid_disks && array.level > 1 &&
+ get_linux_version() < 2006032 &&
+ !check_env("MDADM_FORCE_FEWER")) {
+ fprintf(stderr, Name ": reducing the number of devices is not safe before Linux 2.6.32\n"
+ " Please use a newer kernel\n");
+ return 1;
+ }
+ sra = sysfs_read(fd, 0, GET_LEVEL);
+ frozen = freeze_array(sra);
+ if (frozen < 0) {
+ fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
+ " be reshaped\n", devname);
+ return 1;
+ }
+
+ /* ========= set size =============== */
+ if (size >= 0 && (size == 0 || size != array.size)) {
+ array.size = size;
+ if (array.size != size) {
+ /* got truncated to 32bit, write to
+ * component_size instead
+ */
+ if (sra)
+ rv = sysfs_set_num(sra, NULL,
+ "component_size", size);
+ else
+ rv = -1;
+ } else
+ rv = ioctl(fd, SET_ARRAY_INFO, &array);
+ if (rv != 0) {
+ fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto release;
+ }
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (!quiet)
+ fprintf(stderr, Name ": component size of %s has been set to %dK\n",
+ devname, array.size);
+ changed = 1;
+ }
+
+ /* ======= set level =========== */
+ if (level != UnSet && level != array.level) {
+ /* Trying to change the level.
+ * We might need to change layout first and schedule a
+ * level change for later.
+ * Level changes that can happen immediately are:
+ * 0->4,5,6 1->5 4->5,6 5->1,6
+ * Level changes that need a layout change first are:
+ * 6->5,4,0 : need a -6 layout, or parity-last
+ * 5->4,0 : need parity-last
+ */
+ if ((array.level == 6 || array.level == 5) &&
+ (level == 5 || level == 4 || level == 0)) {
+ /* Don't change level yet, but choose intermediate
+ * layout
+ */
+ if (level == 5) {
+ if (layout_str == NULL)
+ switch (array.layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ case ALGORITHM_LEFT_ASYMMETRIC_6:
+ case ALGORITHM_ROTATING_N_RESTART:
+ layout_str = "left-asymmetric-6";
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ case ALGORITHM_LEFT_SYMMETRIC_6:
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ layout_str = "left-symmetric-6";
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ case ALGORITHM_RIGHT_ASYMMETRIC_6:
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ layout_str = "right-asymmetric-6";
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ case ALGORITHM_RIGHT_SYMMETRIC_6:
+ layout_str = "right-symmetric-6";
+ break;
+ case ALGORITHM_PARITY_0:
+ case ALGORITHM_PARITY_0_6:
+ layout_str = "parity-first-6";
+ break;
+ case ALGORITHM_PARITY_N:
+ layout_str = "parity-last";
+ break;
+ default:
+ fprintf(stderr, Name ": %s: cannot"
+ "convert layout to RAID5 equivalent\n",
+ devname);
+ rv = 1;
+ goto release;
+ }
+ else {
+ int l = map_name(r5layout, layout_str);
+ if (l == UnSet) {
+ fprintf(stderr, Name ": %s: layout '%s' not recognised\n",
+ devname, layout_str);
+ rv = 1;
+ goto release;
+ }
+ if (l != ALGORITHM_PARITY_N) {
+ /* need the -6 version */
+ char *ls = map_num(r5layout, l);
+ strcat(strcpy(alt_layout, ls),
+ "-6");
+ layout_str = alt_layout;
+ }
+ }
+ if (raid_disks)
+ /* The final raid6->raid5 conversion
+ * will reduce the number of disks,
+ * so now we need to aim higher
+ */
+ raid_disks++;
+ } else
+ layout_str = "parity-last";
+ } else {
+ c = map_num(pers, level);
+ if (c == NULL) {
+ rv = 1;/* not possible */
+ goto release;
+ }
+ err = sysfs_set_str(sra, NULL, "level", c);
+ if (err) {
+ fprintf(stderr, Name ": %s: could not set level to %s\n",
+ devname, c);
+ rv = 1;
+ goto release;
+ }
+ orig = array;
+ orig_level = orig.level;
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (layout_str == NULL &&
+ orig.level == 5 && level == 6 &&
+ array.layout != orig.layout)
+ layout_str = map_num(r5layout, orig.layout);
+ if (!quiet)
+ fprintf(stderr, Name " level of %s changed to %s\n",
+ devname, c);
+ changed = 1;
+ }
+ }
+
+ /* ========= set shape (chunk_size / layout / ndisks) ============== */
+ /* Check if layout change is a no-op */
+ if (layout_str) switch(array.level) {
+ case 5:
+ if (array.layout == map_name(r5layout, layout_str))
+ layout_str = NULL;
+ break;
+ case 6:
+ if (layout_str == NULL &&
+ ((chunksize && chunksize * 1024 != array.chunk_size) ||
+ (raid_disks && raid_disks != array.raid_disks)) &&
+ array.layout >= 16) {
+ fprintf(stderr, Name
+ ": %s has a non-standard layout. If you wish to preserve this\n"
+ " during the reshape, please specify --layout=preserve\n"
+ " If you want to change it, specify a layout or use --layout=normalise\n",
+ devname);
+ rv = 1;
+ goto release;
+ }
+ if (strcmp(layout_str, "normalise") == 0 ||
+ strcmp(layout_str, "normalize") == 0) {
+ char *hyphen;
+ strcpy(alt_layout, map_num(r6layout, array.layout));
+ hyphen = strrchr(alt_layout, '-');
+ if (hyphen && strcmp(hyphen, "-6") == 0) {
+ *hyphen = 0;
+ layout_str = alt_layout;
+ }
+ }
+
+ if (array.layout == map_name(r6layout, layout_str))
+ layout_str = NULL;
+ if (layout_str && strcmp(layout_str, "preserve") == 0)
+ layout_str = NULL;
+ break;
+ }
+ if (layout_str == NULL
+ && (chunksize == 0 || chunksize*1024 == array.chunk_size)
+ && (raid_disks == 0 || raid_disks == array.raid_disks)) {
+ rv = 0;
+ if (level != UnSet && level != array.level) {
+ /* Looks like this level change doesn't need
+ * a reshape after all.
+ */
+ c = map_num(pers, level);
+ if (c) {
+ rv = sysfs_set_str(sra, NULL, "level", c);
+ if (rv)
+ fprintf(stderr, Name ": %s: could not set level to %s\n",
+ devname, c);
+ }
+ } else if (!changed && !quiet)
+ fprintf(stderr, Name ": %s: no change requested\n",
+ devname);
+ goto release;
+ }
+
c = map_num(pers, array.level);
if (c == NULL) c = "-unknown-";
switch(array.level) {
default: /* raid0, linear, multipath cannot be reconfigured */
fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
c, devname);
- return 1;
+ rv = 1;
+ break;
case LEVEL_FAULTY: /* only 'layout' change is permitted */
- if (size >= 0) {
- fprintf(stderr, Name ": %s: Cannot change size of a 'faulty' array\n",
- devname);
- return 1;
- }
- if (level != UnSet && level != LEVEL_FAULTY) {
- fprintf(stderr, Name ": %s: Cannot change RAID level of a 'faulty' array\n",
- devname);
- return 1;
- }
if (chunksize || raid_disks) {
fprintf(stderr, Name ": %s: Cannot change chunksize or disks of a 'faulty' array\n",
devname);
- return 1;
+ rv = 1;
+ break;
+ }
+ if (layout_str == NULL)
+ break; /* nothing to do.... */
+
+ array.layout = parse_layout_faulty(layout_str);
+ if (array.layout < 0) {
+ int rv;
+ fprintf(stderr, Name ": %s: layout %s not understood for 'faulty' array\n",
+ devname, layout_str);
+ rv = 1;
+ break;
}
- if (layout == UnSet)
- return 0; /* nothing to do.... */
-
- array.layout = layout;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
devname, strerror(errno));
- return 1;
- }
- if (!quiet)
+ rv = 1;
+ } else if (!quiet)
printf("layout for %s set to %d\n", devname, array.layout);
- return 0;
+ break;
- case 1: /* raid_disks and size can each be changed. They are independant */
+ case 1: /* only raid_disks can each be changed. */
- if (level != UnSet && level != 1) {
- fprintf(stderr, Name ": %s: Cannot change RAID level of a RAID1 array.\n",
- devname);
- return 1;
- }
- if (chunksize || layout != UnSet) {
- fprintf(stderr, Name ": %s: Cannot change chunk size of layout for a RAID1 array.\n",
+ if (chunksize || layout_str != NULL) {
+ fprintf(stderr, Name ": %s: Cannot change chunk size or layout for a RAID1 array.\n",
devname);
- return 1;
+ rv = 1;
+ break;
}
-
- /* Each can trigger a resync/recovery which will block the
- * other from happening. Later we could block
- * resync for the duration via 'sync_action'...
- */
if (raid_disks > 0) {
array.raid_disks = raid_disks;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
}
}
- if (size >= 0) {
- array.size = size;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
- fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
- devname, strerror(errno));
- return 1;
- }
- }
- return 0;
+ break;
case 4:
case 5:
case 6:
- st = super_by_fd(fd);
- /* size can be changed independently.
- * layout/chunksize/raid_disks/level can be changed
+ /*
+ * layout/chunksize/raid_disks can be changed
* though the kernel may not support it all.
- * If 'suspend_lo' is not present in devfs, then
- * these cannot be changed.
*/
- if (size >= 0) {
- /* Cannot change other details as well.. */
- if (layout != UnSet ||
- chunksize != 0 ||
- raid_disks != 0 ||
- level != UnSet) {
- fprintf(stderr, Name ": %s: Cannot change shape as well as size of a %s array.\n",
- devname, c);
- return 1;
- }
- array.size = size;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
- fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
- devname, strerror(errno));
- return 1;
- }
- return 0;
- }
- /* Ok, just change the shape. This can be awkward.
- * There are three possibilities.
- * 1/ The array will shrink. We don't support this
- * possibility. Maybe one day...
- * 2/ The array will not change size. This is easy enough
- * to do, but not reliably. If the process is aborted
- * the array *will* be corrupted. So maybe we can allow
- * this but only if the user is really certain. e.g.
- * --really-risk-everything
- * 3/ The array will grow. This can be reliably achieved.
+ st = super_by_fd(fd);
+
+ /*
+ * There are three possibilities.
+ * 1/ The array will shrink.
+ * We need to ensure the reshape will pause before reaching
+ * the 'critical section'. We also need to fork and wait for
+ * that to happen. When it does we
+ * suspend/backup/complete/unfreeze
+ *
+ * 2/ The array will not change size.
+ * This requires that we keep a backup of a sliding window
+ * so that we can restore data after a crash. So we need
+ * to fork and monitor progress.
+ *
+ * 3/ The array will grow. This is relatively easy.
* However the kernel's restripe routines will cheerfully
* overwrite some early data before it is safe. So we
* need to make a backup of the early parts of the array
* and be ready to restore it if rebuild aborts very early.
*
- * We backup data by writing it to all spares (there must be
- * at least 1, so even raid6->raid5 requires a spare to be
- * present).
+ * We backup data by writing it to one spare, or to a
+ * file which was given on command line.
*
+ * [FOLLOWING IS OLD AND PARTLY WRONG]
* So: we enumerate the devices in the array and
* make sure we can open all of them.
* Then we freeze the early part of the array and
@@ -568,71 +832,123 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
* and finally invalidate the copied data and unfreeze the
* start of the array.
*
- * Before we can do this we need to decide:
- * - will the array grow? Just calculate size
- * - how much needs to be saved: count stripes.
- * - where to save data... good question.
- *
+ * In each case, we first make sure that storage is available
+ * for the required backup.
+ * Then we:
+ * - request the shape change.
+ * - for to handle backup etc.
*/
- nlevel = olevel = array.level;
nchunk = ochunk = array.chunk_size;
nlayout = olayout = array.layout;
ndisks = odisks = array.raid_disks;
- if (level != UnSet) nlevel = level;
- if (chunksize) nchunk = chunksize;
- if (layout != UnSet) nlayout = layout;
+ if (chunksize) {
+ nchunk = chunksize * 1024;
+ if (array.size % chunksize) {
+ fprintf(stderr, Name ": component size %dK is not"
+ " a multiple of chunksize %dK\n",
+ array.size, chunksize);
+ break;
+ }
+ }
+ if (layout_str != NULL)
+ switch(array.level) {
+ case 4: /* ignore layout */
+ break;
+ case 5:
+ nlayout = map_name(r5layout, layout_str);
+ if (nlayout == UnSet) {
+ fprintf(stderr, Name ": layout %s not understood for raid5.\n",
+ layout_str);
+ rv = 1;
+ goto release;
+ }
+ break;
+
+ case 6:
+ nlayout = map_name(r6layout, layout_str);
+ if (nlayout == UnSet) {
+ fprintf(stderr, Name ": layout %s not understood for raid6.\n",
+ layout_str);
+ rv = 1;
+ goto release;
+ }
+ break;
+ }
if (raid_disks) ndisks = raid_disks;
odata = odisks-1;
- if (olevel == 6) odata--; /* number of data disks */
ndata = ndisks-1;
- if (nlevel == 6) ndata--;
+ if (array.level == 6) {
+ odata--; /* number of data disks */
+ ndata--;
+ }
- if (ndata < odata) {
- fprintf(stderr, Name ": %s: Cannot reduce number of data disks (yet).\n",
- devname);
- return 1;
+ if (odata == ndata &&
+ get_linux_version() < 2006032) {
+ fprintf(stderr, Name ": in-place reshape is not safe before 2.6.32, sorry.\n");
+ break;
}
- if (ndata == odata) {
- fprintf(stderr, Name ": %s: Cannot reshape array without increasing size (yet).\n",
- devname);
- return 1;
+
+ /* Check that we can hold all the data */
+ size = ndata * array.size;
+ get_dev_size(fd, NULL, &array_size);
+ if (size < (array_size/1024)) {
+ fprintf(stderr, Name ": this change will reduce the size of the array.\n"
+ " use --grow --array-size first to truncate array.\n"
+ " e.g. mdadm --grow %s --array-size %llu\n",
+ devname, size);
+ rv = 1;
+ break;
}
- /* Well, it is growing... so how much do we need to backup.
- * Need to backup a full number of new-stripes, such that the
- * last one does not over-write any place that it would be read
- * from
+
+ /* So how much do we need to backup.
+ * We need an amount of data which is both a whole number of
+ * old stripes and a whole number of new stripes.
+ * So LCM for (chunksize*datadisks).
*/
- nstripe = ostripe = 0;
- while (nstripe >= ostripe) {
- nstripe += nchunk/512;
- last_block = nstripe * ndata;
- ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
+ a = ochunk/512 * odata;
+ b = nchunk/512 * ndata;
+ /* Find GCD */
+ while (a != b) {
+ if (a < b)
+ b -= a;
+ if (b < a)
+ a -= b;
}
- fprintf(stderr, Name ": Need to backup %lluK of critical "
- "section..\n", last_block/2);
+ /* LCM == product / GCD */
+ blocks = ochunk/512 * nchunk/512 * odata * ndata / a;
+ sysfs_free(sra);
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
GET_CACHE);
+
+ if (ndata == odata) {
+ /* Make 'blocks' bigger for better throughput, but
+ * not so big that we reject it below.
+ * Try for 16 megabytes
+ */
+ while (blocks * 32 < sra->component_size &&
+ blocks < 16*1024*2)
+ blocks *= 2;
+ } else
+ fprintf(stderr, Name ": Need to backup %luK of critical "
+ "section..\n", blocks/2);
+
if (!sra) {
fprintf(stderr, Name ": %s: Cannot get array details from sysfs\n",
devname);
- return 1;
+ rv = 1;
+ break;
}
- if (last_block >= sra->component_size/2) {
+ if (blocks >= sra->component_size/2) {
fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
devname);
- return 1;
+ rv = 1;
+ break;
}
- if (sra->array.spare_disks == 0 && backup_file == NULL) {
- fprintf(stderr, Name ": %s: Cannot grow - need a spare or backup-file to backup critical section\n",
- devname);
- return 1;
- }
-
nrdisks = array.nr_disks + sra->array.spare_disks;
/* Now we need to open all these devices so we can read/write.
*/
@@ -640,7 +956,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
offsets = malloc((1+nrdisks) * sizeof(offsets[0]));
if (!fdlist || !offsets) {
fprintf(stderr, Name ": malloc failed: grow aborted\n");
- return 1;
+ rv = 1;
+ break;
}
for (d=0; d <= nrdisks; d++)
fdlist[d] = -1;
@@ -653,214 +970,635 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
sd->disk.minor, 1);
fdlist[sd->disk.raid_disk]
= dev_open(dn, O_RDONLY);
- offsets[sd->disk.raid_disk] = sd->data_offset;
+ offsets[sd->disk.raid_disk] = sd->data_offset*512;
if (fdlist[sd->disk.raid_disk] < 0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
devname, dn?dn:"-unknown-");
- goto abort;
+ rv = 1;
+ goto release;
}
- } else {
+ } else if (backup_file == NULL) {
/* spare */
char *dn = map_dev(sd->disk.major,
sd->disk.minor, 1);
fdlist[d] = dev_open(dn, O_RDWR);
- offsets[d] = sd->data_offset;
+ offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
if (fdlist[d]<0) {
fprintf(stderr, Name ": %s: cannot open component %s\n",
devname, dn?dn:"-unknown");
- goto abort;
+ rv = 1;
+ goto release;
}
d++;
}
}
- for (i=0 ; i<array.raid_disks; i++)
- if (fdlist[i] < 0) {
- fprintf(stderr, Name ": %s: failed to find device %d. Array might be degraded.\n"
- " --grow aborted\n", devname, i);
- goto abort;
+ if (backup_file == NULL) {
+ if (ndata <= odata) {
+ fprintf(stderr, Name ": %s: Cannot grow - need backup-file\n",
+ devname);
+ rv = 1;
+ break;
+ } else if (sra->array.spare_disks == 0) {
+ fprintf(stderr, Name ": %s: Cannot grow - need a spare or "
+ "backup-file to backup critical section\n",
+ devname);
+ rv = 1;
+ break;
+ }
+ if (d == array.raid_disks) {
+ fprintf(stderr, Name ": %s: No spare device for backup\n",
+ devname);
+ rv = 1;
+ break;
}
- spares = sra->array.spare_disks;
- if (backup_file) {
- fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL, S_IRUSR | S_IWUSR);
+ } else {
+ /* need to check backup file is large enough */
+ char buf[512];
+ fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
+ S_IRUSR | S_IWUSR);
+ offsets[d] = 8 * 512;
if (fdlist[d] < 0) {
fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
devname, backup_file, strerror(errno));
- goto abort;
+ rv = 1;
+ break;
+ }
+ memset(buf, 0, 512);
+ for (i=0; i < blocks + 1 ; i++) {
+ if (write(fdlist[d], buf, 512) != 512) {
+ fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ rv = 1;
+ break;
+ }
+ }
+ if (fsync(fdlist[d]) != 0) {
+ fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ rv = 1;
+ break;
}
- offsets[d] = 8;
d++;
- spares++;
}
- if (fdlist[array.raid_disks] < 0) {
- fprintf(stderr, Name ": %s: failed to find a spare and no backup-file given - --grow aborted\n",
- devname);
- goto abort;
+
+ /* lastly, check that the internal stripe cache is
+ * large enough, or it won't work.
+ */
+
+ cache = (nchunk < ochunk) ? ochunk : nchunk;
+ cache = cache * 4 / 4096;
+ if (cache < blocks / 8 / odisks + 16)
+ /* Make it big enough to hold 'blocks' */
+ cache = blocks / 8 / odisks + 16;
+ if (sra->cache_size < cache)
+ sysfs_set_num(sra, NULL, "stripe_cache_size",
+ cache+1);
+ /* Right, everything seems fine. Let's kick things off.
+ * If only changing raid_disks, use ioctl, else use
+ * sysfs.
+ */
+ if (ochunk == nchunk && olayout == nlayout) {
+ array.raid_disks = ndisks;
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ rv = 1;
+ fprintf(stderr, Name ": Cannot set device shape for %s: %s\n",
+ devname, strerror(errno));
+ if (ndisks < odisks &&
+ get_linux_version() < 2006030)
+ fprintf(stderr, Name ": linux 2.6.30 or later required\n");
+
+ break;
+ }
+ } else {
+ /* set them all just in case some old 'new_*' value
+ * persists from some earlier problem
+ */
+ if (sysfs_set_num(sra, NULL, "chunk_size", nchunk) < 0)
+ rv = 1;
+ if (sysfs_set_num(sra, NULL, "layout", nlayout) < 0)
+ rv = 1;
+ if (sysfs_set_num(sra, NULL, "raid_disks", ndisks) < 0)
+ rv = 1;
+ if (rv) {
+ fprintf(stderr, Name ": Cannot set device shape for %s\n",
+ devname);
+ if (get_linux_version() < 2006030)
+ fprintf(stderr, Name ": linux 2.6.30 or later required\n");
+ break;
+ }
+ }
+
+ if (ndisks == 2 && odisks == 2) {
+ /* No reshape is needed in this trivial case */
+ rv = 0;
+ break;
}
+ /* set up the backup-super-block. This requires the
+ * uuid from the array.
+ */
/* Find a superblock */
- if (st->ss->load_super(st, fdlist[0], NULL)) {
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int devfd;
+ int ok;
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 1);
+ devfd = dev_open(dn, O_RDONLY);
+ if (devfd < 0)
+ continue;
+ ok = st->ss->load_super(st, devfd, NULL);
+ close(devfd);
+ if (ok >= 0)
+ break;
+ }
+ if (!sd) {
fprintf(stderr, Name ": %s: Cannot find a superblock\n",
devname);
- goto abort;
+ rv = 1;
+ break;
}
-
+ memset(&bsb, 0, 512);
memcpy(bsb.magic, "md_backup_data-1", 16);
st->ss->uuid_from_super(st, (int*)&bsb.set_uuid);
bsb.mtime = __cpu_to_le64(time(0));
- bsb.arraystart = 0;
- bsb.length = __cpu_to_le64(last_block);
-
- /* Decide offset for the backup, llseek the spares, and write
- * a leading superblock 4K earlier.
+ bsb.devstart2 = blocks;
+ stripes = blocks / (ochunk/512) / odata;
+ /* Now we just need to kick off the reshape and watch, while
+ * handling backups of the data...
+ * This is all done by a forked background process.
*/
- for (i=array.raid_disks; i<d; i++) {
- char abuf[4096+512];
- char *buf = (char*)(((unsigned long)abuf+511)& ~511);
- if (i==d-1 && backup_file) {
- /* This is the backup file */
- offsets[i] = 8;
- } else
- offsets[i] += sra->component_size - last_block - 8;
- if (lseek64(fdlist[i], (offsets[i]<<9) - 4096, 0)
- != (offsets[i]<<9) - 4096) {
- fprintf(stderr, Name ": could not seek...\n");
- goto abort;
- }
- memset(buf, 0, 4096);
- bsb.devstart = __cpu_to_le64(offsets[i]);
- bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
- memcpy(buf, &bsb, sizeof(bsb));
- if (write(fdlist[i], buf, 4096) != 4096) {
- fprintf(stderr, Name ": could not write leading superblock\n");
- goto abort;
+ switch(fork()) {
+ case 0:
+ close(fd);
+ if (check_env("MDADM_GROW_VERIFY"))
+ fd = open(devname, O_RDONLY | O_DIRECT);
+ else
+ fd = -1;
+ mlockall(MCL_FUTURE);
+
+ if (odata < ndata)
+ done = child_grow(fd, sra, stripes,
+ fdlist, offsets,
+ odisks, ochunk, array.level, olayout, odata,
+ d - odisks, fdlist+odisks, offsets+odisks);
+ else if (odata > ndata)
+ done = child_shrink(fd, sra, stripes,
+ fdlist, offsets,
+ odisks, ochunk, array.level, olayout, odata,
+ d - odisks, fdlist+odisks, offsets+odisks);
+ else
+ done = child_same_size(fd, sra, stripes,
+ fdlist, offsets,
+ 0,
+ odisks, ochunk, array.level, olayout, odata,
+ d - odisks, fdlist+odisks, offsets+odisks);
+ if (backup_file && done)
+ unlink(backup_file);
+ if (level != UnSet && level != array.level) {
+ /* We need to wait for the reshape to finish
+ * (which will have happened unless odata < ndata)
+ * and then set the level
+ */
+
+ c = map_num(pers, level);
+ if (c == NULL)
+ exit(0);/* not possible */
+
+ if (odata < ndata)
+ wait_reshape(sra);
+ err = sysfs_set_str(sra, NULL, "level", c);
+ if (err)
+ fprintf(stderr, Name ": %s: could not set level to %s\n",
+ devname, c);
}
+ exit(0);
+ case -1:
+ fprintf(stderr, Name ": Cannot run child to monitor reshape: %s\n",
+ strerror(errno));
+ rv = 1;
+ break;
+ default:
+ /* The child will take care of unfreezing the array */
+ frozen = 0;
+ break;
}
- array.level = nlevel;
- array.raid_disks = ndisks;
- array.chunk_size = nchunk;
- array.layout = nlayout;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
- if (errno == ENOSPC) {
- /* stripe cache is not big enough.
- * It needs to be 4 times chunksize_size,
- * and we assume pagesize is 4K
- */
- if (sra->cache_size < 4 * (nchunk/4096)) {
- sysfs_set_num(sra, NULL,
- "stripe_cache_size",
- 4 * (nchunk/4096) +1);
- if (ioctl(fd, SET_ARRAY_INFO,
- &array) == 0)
- goto ok;
+ break;
+
+ }
+
+ release:
+ if (rv && orig_level != UnSet && sra) {
+ c = map_num(pers, orig_level);
+ if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
+ fprintf(stderr, Name ": aborting level change\n");
+ }
+ if (sra)
+ unfreeze_array(sra, frozen);
+ return rv;
+}
+
+/*
+ * We run a child process in the background which performs the following
+ * steps:
+ * - wait for resync to reach a certain point
+ * - suspend io to the following section
+ * - backup that section
+ * - allow resync to proceed further
+ * - resume io
+ * - discard the backup.
+ *
+ * When are combined in slightly different ways in the three cases.
+ * Grow:
+ * - suspend/backup/allow/wait/resume/discard
+ * Shrink:
+ * - allow/wait/suspend/backup/allow/wait/resume/discard
+ * same-size:
+ * - wait/resume/discard/suspend/backup/allow
+ *
+ * suspend/backup/allow always come together
+ * wait/resume/discard do too.
+ * For the same-size case we have two backups to improve flow.
+ *
+ */
+
+int grow_backup(struct mdinfo *sra,
+ unsigned long long offset, /* per device */
+ unsigned long stripes, /* per device */
+ int *sources, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout,
+ int dests, int *destfd, unsigned long long *destoffsets,
+ int part, int *degraded,
+ char *buf)
+{
+ /* Backup 'blocks' sectors at 'offset' on each device of the array,
+ * to storage 'destfd' (offset 'destoffsets'), after first
+ * suspending IO. Then allow resync to continue
+ * over the suspended section.
+ * Use part 'part' of the backup-super-block.
+ */
+ int odata = disks;
+ int rv = 0;
+ int i;
+ unsigned long long new_degraded;
+ //printf("offset %llu\n", offset);
+ if (level >= 4)
+ odata--;
+ if (level == 6)
+ odata--;
+ sysfs_set_num(sra, NULL, "suspend_hi", (offset + stripes * chunk/512) * odata);
+ /* Check that array hasn't become degraded, else we might backup the wrong data */
+ sysfs_get_ll(sra, NULL, "degraded", &new_degraded);
+ if (new_degraded != *degraded) {
+ /* check each device to ensure it is still working */
+ struct mdinfo *sd;
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char sbuf[20];
+ if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
+ strstr(sbuf, "faulty") ||
+ strstr(sbuf, "in_sync") == NULL) {
+ /* this device is dead */
+ sd->disk.state = (1<<MD_DISK_FAULTY);
+ if (sd->disk.raid_disk >= 0 &&
+ sources[sd->disk.raid_disk] >= 0) {
+ close(sources[sd->disk.raid_disk]);
+ sources[sd->disk.raid_disk] = -1;
+ }
}
}
- fprintf(stderr, Name ": Cannot set device size/shape for %s: %s\n",
- devname, strerror(errno));
- goto abort;
}
- ok: ;
-
- /* suspend the relevant region */
- sysfs_set_num(sra, NULL, "suspend_hi", 0); /* just in case */
- if (sysfs_set_num(sra, NULL, "suspend_lo", 0) < 0 ||
- sysfs_set_num(sra, NULL, "suspend_hi", last_block) < 0) {
- fprintf(stderr, Name ": %s: failed to suspend device.\n",
- devname);
- goto abort_resume;
+ *degraded = new_degraded;
+ }
+ if (part) {
+ bsb.arraystart2 = __cpu_to_le64(offset * odata);
+ bsb.length2 = __cpu_to_le64(stripes * chunk/512 * odata);
+ } else {
+ bsb.arraystart = __cpu_to_le64(offset * odata);
+ bsb.length = __cpu_to_le64(stripes * chunk/512 * odata);
+ }
+ if (part)
+ bsb.magic[15] = '2';
+ for (i = 0; i < dests; i++)
+ if (part)
+ lseek64(destfd[i], destoffsets[i] + __le64_to_cpu(bsb.devstart2)*512, 0);
+ else
+ lseek64(destfd[i], destoffsets[i], 0);
+
+ rv = save_stripes(sources, offsets,
+ disks, chunk, level, layout,
+ dests, destfd,
+ offset*512*odata, stripes * chunk * odata,
+ buf);
+
+ if (rv)
+ return rv;
+ bsb.mtime = __cpu_to_le64(time(0));
+ for (i = 0; i < dests; i++) {
+ bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
+
+ bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
+ bsb.sb_csum2 = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum2)-((char*)&bsb));
+
+ lseek64(destfd[i], destoffsets[i] - 4096, 0);
+ write(destfd[i], &bsb, 512);
+ if (destoffsets[i] > 4096) {
+ lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0);
+ write(destfd[i], &bsb, 512);
}
+ fsync(destfd[i]);
+ }
+ return 0;
+}
- err = save_stripes(fdlist, offsets,
- odisks, ochunk, olevel, olayout,
- spares, fdlist+odisks,
- 0ULL, last_block*512);
+/* in 2.6.30, the value reported by sync_completed can be
+ * less that it should be by one stripe.
+ * This only happens when reshape hits sync_max and pauses.
+ * So allow wait_backup to either extent sync_max further
+ * than strictly necessary, or return before the
+ * sync has got quite as far as we would really like.
+ * This is what 'blocks2' is for.
+ * The various caller give appropriate values so that
+ * every works.
+ */
+int wait_backup(struct mdinfo *sra,
+ unsigned long long offset, /* per device */
+ unsigned long long blocks, /* per device */
+ unsigned long long blocks2, /* per device - hack */
+ int dests, int *destfd, unsigned long long *destoffsets,
+ int part)
+{
+ /* Wait for resync to pass the section that was backed up
+ * then erase the backup and allow IO
+ */
+ int fd = sysfs_get_fd(sra, NULL, "sync_completed");
+ unsigned long long completed;
+ int i;
- /* abort if there was an error */
- if (err < 0) {
- fprintf(stderr, Name ": %s: failed to save critical region\n",
- devname);
- goto abort_resume;
+ if (fd < 0)
+ return -1;
+ sysfs_set_num(sra, NULL, "sync_max", offset + blocks + blocks2);
+ if (offset == 0)
+ sysfs_set_str(sra, NULL, "sync_action", "reshape");
+ do {
+ char action[20];
+ fd_set rfds;
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ select(fd+1, NULL, NULL, &rfds, NULL);
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ close(fd);
+ return -1;
}
+ if (sysfs_get_str(sra, NULL, "sync_action",
+ action, 20) > 0 &&
+ strncmp(action, "reshape", 7) != 0)
+ break;
+ } while (completed < offset + blocks);
+ close(fd);
+
+ if (part) {
+ bsb.arraystart2 = __cpu_to_le64(0);
+ bsb.length2 = __cpu_to_le64(0);
+ } else {
+ bsb.arraystart = __cpu_to_le64(0);
+ bsb.length = __cpu_to_le64(0);
+ }
+ bsb.mtime = __cpu_to_le64(time(0));
+ for (i = 0; i < dests; i++) {
+ bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
+ bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
+ bsb.sb_csum2 = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum2)-((char*)&bsb));
+ lseek64(destfd[i], destoffsets[i]-4096, 0);
+ write(destfd[i], &bsb, 512);
+ fsync(destfd[i]);
+ }
+ return 0;
+}
- for (i=odisks; i<d ; i++) {
- bsb.devstart = __cpu_to_le64(offsets[i]);
- bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
- if (lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0) < 0 ||
- write(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb) ||
- fsync(fdlist[i]) != 0) {
- fprintf(stderr, Name ": %s: failed to save metadata for critical region backups.\n",
- devname);
- goto abort_resume;
- }
- }
+static void fail(char *msg)
+{
+ write(2, msg, strlen(msg));
+ write(2, "\n", 1);
+ exit(1);
+}
- /* start the reshape happening */
- if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
- fprintf(stderr, Name ": %s: failed to initiate reshape\n",
- devname);
- goto abort_resume;
+static char *abuf, *bbuf;
+static int abuflen;
+static void validate(int afd, int bfd, unsigned long long offset)
+{
+ /* check that the data in the backup against the array.
+ * This is only used for regression testing and should not
+ * be used while the array is active
+ */
+ if (afd < 0)
+ return;
+ lseek64(bfd, offset - 4096, 0);
+ if (read(bfd, &bsb2, 512) != 512)
+ fail("cannot read bsb");
+ if (bsb2.sb_csum != bsb_csum((char*)&bsb2,
+ ((char*)&bsb2.sb_csum)-((char*)&bsb2)))
+ fail("first csum bad");
+ if (memcmp(bsb2.magic, "md_backup_data", 14) != 0)
+ fail("magic is bad");
+ if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 &&
+ bsb2.sb_csum2 != bsb_csum((char*)&bsb2,
+ ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
+ fail("second csum bad");
+
+ if (__le64_to_cpu(bsb2.devstart)*512 != offset)
+ fail("devstart is wrong");
+
+ if (bsb2.length) {
+ unsigned long long len = __le64_to_cpu(bsb2.length)*512;
+
+ if (abuflen < len) {
+ free(abuf);
+ free(bbuf);
+ abuflen = len;
+ posix_memalign((void**)&abuf, 4096, abuflen);
+ posix_memalign((void**)&bbuf, 4096, abuflen);
}
- /* wait for reshape to pass the critical region */
- while(1) {
- unsigned long long comp;
- if (sysfs_get_ll(sra, NULL, "sync_completed", &comp)<0) {
- sleep(5);
- break;
- }
- if (comp >= nstripe)
- break;
- if (comp == 0) {
- /* Maybe it finished already */
- char action[20];
- if (sysfs_get_str(sra, NULL, "sync_action",
- action, 20) > 0 &&
- strncmp(action, "reshape", 7) != 0)
+ lseek64(bfd, offset, 0);
+ if (read(bfd, bbuf, len) != len) {
+ printf("len %llu\n", len);
+ fail("read first backup failed");
+ }
+ lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
+ if (read(afd, abuf, len) != len)
+ fail("read first from array failed");
+ if (memcmp(bbuf, abuf, len) != 0) {
+ int i;
+ printf("offset=%llu len=%llu\n",
+ __le64_to_cpu(bsb2.arraystart)*512, len);
+ for (i=0; i<len; i++)
+ if (bbuf[i] != abuf[i]) {
+ printf("first diff byte %d\n", i);
break;
- }
- sleep(1);
+ }
+ fail("data1 compare failed");
}
-
- /* invalidate superblocks */
- memset(&bsb, 0, sizeof(bsb));
- for (i=odisks; i<d ; i++) {
- lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
- if (write(fdlist[i], &bsb, sizeof(bsb)) < 0) {
- fprintf(stderr, Name ": %s: failed to invalidate metadata for raid disk %d\n",
- devname, i);
- }
+ }
+ if (bsb2.length2) {
+ unsigned long long len = __le64_to_cpu(bsb2.length2)*512;
+
+ if (abuflen < len) {
+ free(abuf);
+ free(bbuf);
+ abuflen = len;
+ abuf = malloc(abuflen);
+ bbuf = malloc(abuflen);
}
- /* unsuspend. */
- sysfs_set_num(sra, NULL, "suspend_lo", last_block);
-
- for (i=0; i<d; i++)
- if (fdlist[i] >= 0)
- close(fdlist[i]);
- free(fdlist);
- free(offsets);
- if (backup_file)
- unlink(backup_file);
-
- fprintf(stderr, Name ": ... critical section passed.\n");
- break;
+ lseek64(bfd, offset+__le64_to_cpu(bsb2.devstart2)*512, 0);
+ if (read(bfd, bbuf, len) != len)
+ fail("read second backup failed");
+ lseek64(afd, __le64_to_cpu(bsb2.arraystart2)*512, 0);
+ if (read(afd, abuf, len) != len)
+ fail("read second from array failed");
+ if (memcmp(bbuf, abuf, len) != 0)
+ fail("data2 compare failed");
}
- return 0;
+}
+static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
+ int *fds, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ char *buf;
+ int degraded = 0;
+
+ posix_memalign((void**)&buf, 4096, disks * chunk);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ grow_backup(sra, 0, stripes,
+ fds, offsets, disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ 0, &degraded, buf);
+ validate(afd, destfd[0], destoffsets[0]);
+ wait_backup(sra, 0, stripes * chunk / 512, stripes * chunk / 512,
+ dests, destfd, destoffsets,
+ 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", (stripes * chunk/512) * data);
+ free(buf);
+ /* FIXME this should probably be numeric */
+ sysfs_set_str(sra, NULL, "sync_max", "max");
+ return 1;
+}
- abort_resume:
- sysfs_set_num(sra, NULL, "suspend_lo", last_block);
- abort:
- for (i=0; i<array.nr_disks; i++)
- if (fdlist[i] >= 0)
- close(fdlist[i]);
- free(fdlist);
- free(offsets);
- if (backup_file)
- unlink(backup_file);
+static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
+ int *fds, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ char *buf;
+ unsigned long long start;
+ int rv;
+ int degraded = 0;
+
+ posix_memalign((void**)&buf, 4096, disks * chunk);
+ start = sra->component_size - stripes * chunk/512;
+ sysfs_set_num(sra, NULL, "sync_max", start);
+ sysfs_set_str(sra, NULL, "sync_action", "reshape");
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+ rv = wait_backup(sra, 0, start - stripes * chunk/512, stripes * chunk/512,
+ dests, destfd, destoffsets, 0);
+ if (rv < 0)
+ return 0;
+ grow_backup(sra, 0, stripes,
+ fds, offsets,
+ disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ 0, &degraded, buf);
+ validate(afd, destfd[0], destoffsets[0]);
+ wait_backup(sra, start, stripes*chunk/512, 0,
+ dests, destfd, destoffsets, 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", (stripes * chunk/512) * data);
+ free(buf);
+ /* FIXME this should probably be numeric */
+ sysfs_set_str(sra, NULL, "sync_max", "max");
return 1;
+}
+static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
+ int *fds, unsigned long long *offsets,
+ unsigned long long start,
+ int disks, int chunk, int level, int layout, int data,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ unsigned long long size;
+ unsigned long tailstripes = stripes;
+ int part;
+ char *buf;
+ unsigned long long speed;
+ int degraded = 0;
+
+
+ posix_memalign((void**)&buf, 4096, disks * chunk);
+
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+
+ sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
+ sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
+
+ grow_backup(sra, start, stripes,
+ fds, offsets,
+ disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ 0, &degraded, buf);
+ grow_backup(sra, (start + stripes) * chunk/512, stripes,
+ fds, offsets,
+ disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ 1, &degraded, buf);
+ validate(afd, destfd[0], destoffsets[0]);
+ part = 0;
+ start += stripes * 2; /* where to read next */
+ size = sra->component_size / (chunk/512);
+ while (start < size) {
+ if (wait_backup(sra, (start-stripes*2)*chunk/512,
+ stripes*chunk/512, 0,
+ dests, destfd, destoffsets,
+ part) < 0)
+ return 0;
+ sysfs_set_num(sra, NULL, "suspend_lo", start*chunk/512 * data);
+ if (start + stripes > size)
+ tailstripes = (size - start);
+
+ grow_backup(sra, start*chunk/512, tailstripes,
+ fds, offsets,
+ disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ part, &degraded, buf);
+ start += stripes;
+ part = 1 - part;
+ validate(afd, destfd[0], destoffsets[0]);
+ }
+ if (wait_backup(sra, (start-stripes*2) * chunk/512, stripes * chunk/512, 0,
+ dests, destfd, destoffsets,
+ part) < 0)
+ return 0;
+ sysfs_set_num(sra, NULL, "suspend_lo", ((start-stripes)*chunk/512) * data);
+ wait_backup(sra, (start-stripes) * chunk/512, tailstripes * chunk/512, 0,
+ dests, destfd, destoffsets,
+ 1-part);
+ sysfs_set_num(sra, NULL, "suspend_lo", (size*chunk/512) * data);
+ sysfs_set_num(sra, NULL, "sync_speed_min", speed);
+ free(buf);
+ return 1;
}
/*
@@ -868,27 +1606,35 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
* write that data into the array and update the super blocks with
* the new reshape_progress
*/
-int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt, char *backup_file)
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt,
+ char *backup_file, int verbose)
{
int i, j;
int old_disks;
unsigned long long *offsets;
- unsigned long long nstripe, ostripe, last_block;
+ unsigned long long nstripe, ostripe;
int ndata, odata;
- if (info->delta_disks < 0)
- return 1; /* cannot handle a shrink */
- if (info->new_level != info->array.level ||
- info->new_layout != info->array.layout ||
- info->new_chunk != info->array.chunk_size)
- return 1; /* Can only handle change in disks */
+ if (info->new_level != info->array.level)
+ return 1; /* Cannot handle level changes (they are instantaneous) */
+
+ odata = info->array.raid_disks - info->delta_disks - 1;
+ if (info->array.level == 6) odata--; /* number of data disks */
+ ndata = info->array.raid_disks - 1;
+ if (info->new_level == 6) ndata--;
old_disks = info->array.raid_disks - info->delta_disks;
+ if (info->delta_disks <= 0)
+ /* Didn't grow, so the backup file must have
+ * been used
+ */
+ old_disks = cnt;
for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
struct mdinfo dinfo;
- char buf[4096];
int fd;
+ int bsbsize;
+ char *devname, namebuf[20];
/* This was a spare and may have some saved data on it.
* Load the superblock, find and load the
@@ -899,8 +1645,12 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
*/
if (i == old_disks-1) {
fd = open(backup_file, O_RDONLY);
- if (fd<0)
+ if (fd<0) {
+ fprintf(stderr, Name ": backup file %s inaccessible: %s\n",
+ backup_file, strerror(errno));
continue;
+ }
+ devname = backup_file;
} else {
fd = fdlist[i];
if (fd < 0)
@@ -913,35 +1663,98 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
if (lseek64(fd,
(dinfo.data_offset + dinfo.component_size - 8) <<9,
- 0) < 0)
+ 0) < 0) {
+ fprintf(stderr, Name ": Cannot seek on device %d\n", i);
continue; /* Cannot seek */
+ }
+ sprintf(namebuf, "device-%d", i);
+ devname = namebuf;
}
- if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb))
+ if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb)) {
+ if (verbose)
+ fprintf(stderr, Name ": Cannot read from %s\n", devname);
continue; /* Cannot read */
- if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
+ }
+ if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0 &&
+ memcmp(bsb.magic, "md_backup_data-2", 16) != 0) {
+ if (verbose)
+ fprintf(stderr, Name ": No backup metadata on %s\n", devname);
continue;
- if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
+ }
+ if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) {
+ if (verbose)
+ fprintf(stderr, Name ": Bad backup-metadata checksum on %s\n", devname);
continue; /* bad checksum */
- if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
+ }
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0 &&
+ bsb.sb_csum2 != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb))) {
+ if (verbose)
+ fprintf(stderr, Name ": Bad backup-metadata checksum2 on %s\n", devname);
+ continue; /* Bad second checksum */
+ }
+ if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) {
+ if (verbose)
+ fprintf(stderr, Name ": Wrong uuid on backup-metadata on %s\n", devname);
continue; /* Wrong uuid */
+ }
- if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
- info->array.utime < __le64_to_cpu(bsb.mtime))
+ if (info->array.utime > __le64_to_cpu(bsb.mtime) + 10*60 ||
+ info->array.utime < __le64_to_cpu(bsb.mtime) - 10*60) {
+ if (verbose)
+ fprintf(stderr, Name ": too-old timestamp on backup-metadata on %s\n", devname);
continue; /* time stamp is too bad */
+ }
- if (__le64_to_cpu(bsb.arraystart) != 0)
- continue; /* Can only handle backup from start of array */
- if (__le64_to_cpu(bsb.length) <
- info->reshape_progress)
- continue; /* No new data here */
-
- if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0)
+ if (bsb.magic[15] == '1') {
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
+ info->reshape_progress) {
+ nonew:
+ if (verbose)
+ fprintf(stderr, Name ": backup-metadata found on %s but is not needed\n", devname);
+ continue; /* No new data here */
+ }
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ }
+ } else {
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if (__le64_to_cpu(bsb.arraystart) + __le64_to_cpu(bsb.length) <
+ info->reshape_progress &&
+ __le64_to_cpu(bsb.arraystart2) + __le64_to_cpu(bsb.length2) <
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress &&
+ __le64_to_cpu(bsb.arraystart2) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ }
+ }
+ if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) {
+ second_fail:
+ if (verbose)
+ fprintf(stderr, Name ": Failed to verify secondary backup-metadata block on %s\n",
+ devname);
continue; /* Cannot seek */
+ }
/* There should be a duplicate backup superblock 4k before here */
if (lseek64(fd, -4096, 1) < 0 ||
- read(fd, buf, 4096) != 4096 ||
- memcmp(buf, &bsb, sizeof(bsb)) != 0)
- continue; /* Cannot find leading superblock */
+ read(fd, &bsb2, 4096) != 4096)
+ goto second_fail; /* Cannot find leading superblock */
+ if (bsb.magic[15] == '1')
+ bsbsize = offsetof(struct mdp_backup_super, pad1);
+ else
+ bsbsize = offsetof(struct mdp_backup_super, pad);
+ if (memcmp(&bsb2, &bsb, bsbsize) != 0)
+ goto second_fail; /* Cannot find leading superblock */
/* Now need the data offsets for all devices. */
offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
@@ -953,7 +1766,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
continue;
st->ss->getinfo_super(st, &dinfo);
st->ss->free_super(st);
- offsets[j] = dinfo.data_offset;
+ offsets[j] = dinfo.data_offset * 512;
}
printf(Name ": restoring critical section\n");
@@ -963,47 +1776,263 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
info->new_level,
info->new_layout,
fd, __le64_to_cpu(bsb.devstart)*512,
- 0, __le64_to_cpu(bsb.length)*512)) {
+ __le64_to_cpu(bsb.arraystart)*512,
+ __le64_to_cpu(bsb.length)*512)) {
/* didn't succeed, so giveup */
+ if (verbose)
+ fprintf(stderr, Name ": Error restoring backup from %s\n",
+ devname);
return 1;
}
+
+ if (bsb.magic[15] == '2' &&
+ restore_stripes(fdlist, offsets,
+ info->array.raid_disks,
+ info->new_chunk,
+ info->new_level,
+ info->new_layout,
+ fd, __le64_to_cpu(bsb.devstart)*512 +
+ __le64_to_cpu(bsb.devstart2)*512,
+ __le64_to_cpu(bsb.arraystart2)*512,
+ __le64_to_cpu(bsb.length2)*512)) {
+ /* didn't succeed, so giveup */
+ if (verbose)
+ fprintf(stderr, Name ": Error restoring second backup from %s\n",
+ devname);
+ return 1;
+ }
+
/* Ok, so the data is restored. Let's update those superblocks. */
+ if (info->delta_disks >= 0) {
+ info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
+ __le64_to_cpu(bsb.length);
+ if (bsb.magic[15] == '2') {
+ unsigned long long p2 = __le64_to_cpu(bsb.arraystart2) +
+ __le64_to_cpu(bsb.length2);
+ if (p2 > info->reshape_progress)
+ info->reshape_progress = p2;
+ }
+ } else {
+ info->reshape_progress = __le64_to_cpu(bsb.arraystart);
+ if (bsb.magic[15] == '2') {
+ unsigned long long p2 = __le64_to_cpu(bsb.arraystart2);
+ if (p2 < info->reshape_progress)
+ info->reshape_progress = p2;
+ }
+ }
for (j=0; j<info->array.raid_disks; j++) {
if (fdlist[j] < 0) continue;
if (st->ss->load_super(st, fdlist[j], NULL))
continue;
st->ss->getinfo_super(st, &dinfo);
- dinfo.reshape_progress = __le64_to_cpu(bsb.length);
+ dinfo.reshape_progress = info->reshape_progress;
st->ss->update_super(st, &dinfo,
"_reshape_progress",
NULL,0, 0, NULL);
st->ss->store_super(st, fdlist[j]);
st->ss->free_super(st);
}
-
- /* And we are done! */
return 0;
}
/* Didn't find any backup data, try to see if any
* was needed.
*/
- nstripe = ostripe = 0;
- odata = info->array.raid_disks - info->delta_disks - 1;
- if (info->array.level == 6) odata--; /* number of data disks */
- ndata = info->array.raid_disks - 1;
- if (info->new_level == 6) ndata--;
- last_block = 0;
- while (nstripe >= ostripe) {
- nstripe += info->new_chunk / 512;
- last_block = nstripe * ndata;
- ostripe = last_block / odata / (info->array.chunk_size/512) *
- (info->array.chunk_size/512);
+ if (info->delta_disks < 0) {
+ /* When shrinking, the critical section is at the end.
+ * So see if we are before the critical section.
+ */
+ unsigned long long first_block;
+ nstripe = ostripe = 0;
+ first_block = 0;
+ while (ostripe >= nstripe) {
+ ostripe += info->array.chunk_size / 512;
+ first_block = ostripe * odata;
+ nstripe = first_block / ndata / (info->new_chunk/512) *
+ (info->new_chunk/512);
+ }
+
+ if (info->reshape_progress >= first_block)
+ return 0;
}
+ if (info->delta_disks > 0) {
+ /* See if we are beyond the critical section. */
+ unsigned long long last_block;
+ nstripe = ostripe = 0;
+ last_block = 0;
+ while (nstripe >= ostripe) {
+ nstripe += info->new_chunk / 512;
+ last_block = nstripe * ndata;
+ ostripe = last_block / odata / (info->array.chunk_size/512) *
+ (info->array.chunk_size/512);
+ }
- if (info->reshape_progress >= last_block)
- return 0;
+ if (info->reshape_progress >= last_block)
+ return 0;
+ }
/* needed to recover critical section! */
+ if (verbose)
+ fprintf(stderr, Name ": Failed to find backup of critical section\n");
return 1;
}
+
+int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
+ char *backup_file)
+{
+ /* Array is assembled and ready to be started, but
+ * monitoring is probably required.
+ * So:
+ * - start read-only
+ * - set upper bound for resync
+ * - initialise the 'suspend' boundaries
+ * - switch to read-write
+ * - fork and continue monitoring
+ */
+ int err;
+ int backup_list[1];
+ unsigned long long backup_offsets[1];
+ int odisks, ndisks, ochunk, nchunk,odata,ndata;
+ unsigned long a,b,blocks,stripes;
+ int backup_fd;
+ int *fds;
+ unsigned long long *offsets;
+ int d;
+ struct mdinfo *sra, *sd;
+ int rv;
+ int cache;
+ int done = 0;
+
+ err = sysfs_set_str(info, NULL, "array_state", "readonly");
+ if (err)
+ return err;
+
+ /* make sure reshape doesn't progress until we are ready */
+ sysfs_set_str(info, NULL, "sync_max", "0");
+ sysfs_set_str(info, NULL, "array_state", "active"); /* FIXME or clean */
+
+ /* ndisks is not growing, so raid_disks is old and +delta is new */
+ odisks = info->array.raid_disks;
+ ndisks = odisks + info->delta_disks;
+ odata = odisks - 1;
+ ndata = ndisks - 1;
+ if (info->array.level == 6) {
+ odata--;
+ ndata--;
+ }
+ ochunk = info->array.chunk_size;
+ nchunk = info->new_chunk;
+
+
+ a = ochunk/512 * odata;
+ b = nchunk/512 * ndata;
+ /* Find GCD */
+ while (a != b) {
+ if (a < b)
+ b -= a;
+ if (b < a)
+ a -= b;
+ }
+ /* LCM == product / GCD */
+ blocks = ochunk/512 * nchunk/512 * odata * ndata / a;
+
+ sra = sysfs_read(-1, devname2devnum(info->sys_name),
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+ GET_CACHE);
+
+
+ if (ndata == odata)
+ while (blocks * 32 < sra->component_size &&
+ blocks < 16*1024*2)
+ blocks *= 2;
+ stripes = blocks / (info->array.chunk_size/512) / odata;
+
+ /* check that the internal stripe cache is
+ * large enough, or it won't work.
+ */
+ cache = (nchunk < ochunk) ? ochunk : nchunk;
+ cache = cache * 4 / 4096;
+ if (cache < blocks / 8 / odisks + 16)
+ /* Make it big enough to hold 'blocks' */
+ cache = blocks / 8 / odisks + 16;
+ if (sra->cache_size < cache)
+ sysfs_set_num(sra, NULL, "stripe_cache_size",
+ cache+1);
+
+ memset(&bsb, 0, 512);
+ memcpy(bsb.magic, "md_backup_data-1", 16);
+ memcpy(&bsb.set_uuid, info->uuid, 16);
+ bsb.mtime = __cpu_to_le64(time(0));
+ bsb.devstart2 = blocks;
+
+ backup_fd = open(backup_file, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
+ backup_list[0] = backup_fd;
+ backup_offsets[0] = 8 * 512;
+ fds = malloc(odisks * sizeof(fds[0]));
+ offsets = malloc(odisks * sizeof(offsets[0]));
+ for (d=0; d<odisks; d++)
+ fds[d] = -1;
+
+ for (sd = sra->devs; sd; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fds[sd->disk.raid_disk]
+ = dev_open(dn, O_RDONLY);
+ offsets[sd->disk.raid_disk] = sd->data_offset*512;
+ if (fds[sd->disk.raid_disk] < 0) {
+ fprintf(stderr, Name ": %s: cannot open component %s\n",
+ info->sys_name, dn?dn:"-unknown-");
+ rv = 1;
+ goto release;
+ }
+ free(dn);
+ }
+ }
+
+ switch(fork()) {
+ case 0:
+ close(mdfd);
+ mlockall(MCL_FUTURE);
+ if (info->delta_disks < 0)
+ done = child_shrink(-1, info, stripes,
+ fds, offsets,
+ info->array.raid_disks,
+ info->array.chunk_size,
+ info->array.level, info->array.layout,
+ odata,
+ 1, backup_list, backup_offsets);
+ else if (info->delta_disks == 0) {
+ /* The 'start' is a per-device stripe number.
+ * reshape_progress is a per-array sector number.
+ * So divide by ndata * chunk_size
+ */
+ unsigned long long start = info->reshape_progress / ndata;
+ start /= (info->array.chunk_size/512);
+ done = child_same_size(-1, info, stripes,
+ fds, offsets,
+ start,
+ info->array.raid_disks,
+ info->array.chunk_size,
+ info->array.level, info->array.layout,
+ odata,
+ 1, backup_list, backup_offsets);
+ }
+ if (backup_file && done)
+ unlink(backup_file);
+ /* FIXME should I intuit a level change */
+ exit(0);
+ case -1:
+ fprintf(stderr, Name ": Cannot run child to continue monitoring reshape: %s\n",
+ strerror(errno));
+ return 1;
+ default:
+ break;
+ }
+release:
+ return 0;
+}
+
+
diff --git a/Manage.c b/Manage.c
index 84eb3ab..fb9b972 100644
--- a/Manage.c
+++ b/Manage.c
@@ -306,24 +306,6 @@ int Manage_resize(char *devname, int fd, long long size, int raid_disks)
return 0;
}
-int Manage_reconfig(char *devname, int fd, int layout)
-{
- mdu_array_info_t info;
- if (ioctl(fd, GET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot get array information for %s: %s\n",
- devname, strerror(errno));
- return 1;
- }
- info.layout = layout;
- printf("layout set to %d\n", info.layout);
- if (ioctl(fd, SET_ARRAY_INFO, &info) != 0) {
- fprintf(stderr, Name ": Cannot set layout for %s: %s\n",
- devname, strerror(errno));
- return 1;
- }
- return 0;
-}
-
int Manage_subdevs(char *devname, int fd,
mddev_dev_t devlist, int verbose)
{
@@ -596,6 +578,12 @@ int Manage_subdevs(char *devname, int fd,
/* fall back on normal-add */
}
}
+ if (dv->re_add) {
+ fprintf(stderr, Name
+ ": --re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 1;
+ }
} else {
/* non-persistent. Must ensure that new drive
* is at least array.size big.
diff --git a/ReadMe.c b/ReadMe.c
index 1c118f2..fc069d3 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -24,7 +24,7 @@
#include "mdadm.h"
-char Version[] = Name " - v3.0.3 - 22nd October 2009\n";
+char Version[] = Name " - v3.1 - 22nd October 2009\n";
/*
* File: ReadMe.c
@@ -86,11 +86,11 @@ char Version[] = Name " - v3.0.3 - 22nd October 2009\n";
* At the time if writing, there is only minimal support.
*/
-char short_options[]="-ABCDEFGIQhVXWvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
+char short_options[]="-ABCDEFGIQhVXWZvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
char short_bitmap_options[]=
- "-ABCDEFGIQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
+ "-ABCDEFGIQhVXWZvqb:c:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
char short_bitmap_auto_options[]=
- "-ABCDEFGIQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:";
+ "-ABCDEFGIQhVXWZvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:";
struct option long_options[] = {
{"manage", 0, 0, '@'},
@@ -185,6 +185,7 @@ struct option long_options[] = {
{"syslog", 0, 0, 'y'},
/* For Grow */
{"backup-file", 1,0, BackupFile},
+ {"array-size", 1, 0, 'Z'},
/* For Incremental */
{"rebuild-map", 0, 0, 'r'},
@@ -529,6 +530,8 @@ char Help_grow[] =
" --backup-file= file : A file on a differt device to store data for a\n"
" : short time while increasing raid-devices on a\n"
" : RAID4/5/6 array. Not needed when a spare is present.\n"
+" --array-size= -Z : Change visible size of array. This does not change\n"
+" : any data on the device, and is not stable across restarts.\n"
;
char Help_incr[] =
diff --git a/inventory b/inventory
index f0f7dac..e3a6924 100755
--- a/inventory
+++ b/inventory
@@ -3,6 +3,7 @@ ANNOUNCE-3.0
ANNOUNCE-3.0.1
ANNOUNCE-3.0.2
ANNOUNCE-3.0.3
+ANNOUNCE-3.1
Assemble.c
bitmap.c
bitmap.h
@@ -121,6 +122,9 @@ tests/06update-uuid
tests/06wrmostly
tests/07autoassemble
tests/07autodetect
+tests/07changelevelintr
+tests/07changelevels
+tests/07layouts
tests/07reshape5intr
tests/07testreshape5
tests/08imsm-overlap
diff --git a/md_p.h b/md_p.h
index a9f0204..4594a36 100644
--- a/md_p.h
+++ b/md_p.h
@@ -75,7 +75,7 @@
* Device "operational" state bits
*/
#define MD_DISK_FAULTY 0 /* disk is faulty / operational */
-#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */
+#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
diff --git a/mdadm.8 b/mdadm.8
index d79f660..93615f9 100644
--- a/mdadm.8
+++ b/mdadm.8
@@ -5,7 +5,7 @@
.\" the Free Software Foundation; either version 2 of the License, or
.\" (at your option) any later version.
.\" See file COPYING in distribution for details.
-.TH MDADM 8 "" v3.0.3
+.TH MDADM 8 "" v3.1
.SH NAME
mdadm \- manage MD devices
.I aka
@@ -123,7 +123,9 @@ missing, spare, or failed drives, so there is nothing to monitor.
Grow (or shrink) an array, or otherwise reshape it in some way.
Currently supported growth options including changing the active size
of component devices and changing the number of active devices in RAID
-levels 1/4/5/6, as well as adding or removing a write-intent bitmap.
+levels 1/4/5/6, changing the RAID level between 1, 5, and 6, changing
+the chunk size and layout for RAID5 and RAID5, as well as adding or
+removing a write-intent bitmap.
.TP
.B "Incremental Assembly"
@@ -309,7 +311,7 @@ says to get a list of array devices from
.TP
.BR \-e ", " \-\-metadata=
Declare the style of RAID metadata (superblock) to be used. The
-default is 0.90 for
+default is 1.1 for
.BR \-\-create ,
and to guess for other operations.
The default can be overridden by setting the
@@ -321,15 +323,16 @@ keyword in
Options are:
.RS
-.IP "0, 0.90, default"
+.IP "0, 0.90"
Use the original 0.90 format superblock. This format limits arrays to
28 component devices and limits component devices of levels 1 and
greater to 2 terabytes.
-.IP "1, 1.0, 1.1, 1.2"
+.IP "1, 1.0, 1.1, 1.2 default"
Use the new version-1 format superblock. This has few restrictions.
The different sub-versions store the superblock at different locations
on the device, either at the end (for 1.0), at the start (for 1.1) or
-4K from the start (for 1.2).
+4K from the start (for 1.2). '1' is equivalent to '1.0', 'default' is
+equivalent to '1.1'.
.IP ddf
Use the "Industry Standard" DDF (Disk Data Format) format defined by
SNIA.
@@ -422,8 +425,25 @@ This value can not be used with
metadata such as DDF and IMSM.
.TP
+.BR \-Z ", " \-\-array-size=
+This is only meaningful with
+.B \-\-grow
+and its effect is not persistent: when the array is stopped an
+restarted the default array size will be restored.
+
+Setting the array-size causes the array to appear smaller to programs
+that access the data. This is particularly needed before reshaping an
+array so that it will be smaller. As the reshape is not reversible,
+but setting the size with
+.B \-\-array-size
+is, it is required that the array size is reduced as appropriate
+before the number of devices in the array is reduced.
+
+.TP
.BR \-c ", " \-\-chunk=
-Specify chunk size of kibibytes. The default is 64.
+Specify chunk size of kibibytes. The default when creating an
+array is 512KB. To ensure compatibility with earlier versions, the
+default when Building and array with no persistent metadata is 64KB.
This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
.TP
@@ -433,7 +453,8 @@ component will be rounded down to a multiple of this size.
This is a synonym for
.B \-\-chunk
but highlights the different meaning for Linear as compared to other
-RAID levels.
+RAID levels. The default is 64K if a kernel earlier than 2.6.16 is in
+use, and is 0K (i.e. no rounding) in later kernels.
.TP
.BR \-l ", " \-\-level=
@@ -520,11 +541,6 @@ option to set subsequent failure modes.
"clear" or "none" will remove any pending or periodic failure modes,
and "flush" will clear any persistent faults.
-To set the parity with
-.BR \-\-grow ,
-the level of the array ("faulty")
-must be specified before the fault mode is specified.
-
Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
by a small number. The default is 'n2'. The supported options are:
@@ -550,6 +566,18 @@ devices in the array. It does not need to divide evenly into that
number (e.g. it is perfectly legal to have an 'n2' layout for an array
with an odd number of devices).
+When an array is converted between RAID5 and RAID6 an intermediate
+RAID6 layout is used in which the second parity block (Q) is always on
+the last device. To convert a RAID5 to RAID6 and leave it in this new
+layout (which does not require re-striping) use
+.BR \-\-layout=preserve .
+This will try to avoid any restriping.
+
+The converse of this is
+.B \-\-layout=normalise
+which will change a non-standard RAID6 layout into a more standard
+arrangement.
+
.TP
.BR \-\-parity=
same as
@@ -586,8 +614,8 @@ When using a file based bitmap, the default is to use the smallest
size that is at-least 4 and requires no more than 2^21 chunks.
When using an
.B internal
-bitmap, the chunksize is automatically determined to make best use of
-available space.
+bitmap, the chunksize defaults to 64Meg, or larger if necessary to
+fit the bitmap into the available space.
.TP
.BR \-W ", " \-\-write\-mostly
@@ -632,6 +660,21 @@ should be stored on a separate device, not on the RAID array being
reshaped.
.TP
+.BR \-\-array-size= ", " \-Z
+Set the size of the array which is seen by users of the device such as
+filesystems. This can be less that the real size, but never greater.
+The size set this way does not persist across restarts of the array.
+
+This is most useful when reducing the number of devices in a RAID5 or
+RAID6. Such arrays require the array-size to be reduced before a
+reshape can be performed that reduces the real size.
+
+A value of
+.B max
+restores the apparent size of the array to be whatever the real
+amount of available space is.
+
+.TP
.BR \-N ", " \-\-name=
Set a
.B name
@@ -722,6 +765,7 @@ number, and there is no entry in /dev for that number and with a
non-standard name. Names that are not in 'standard' format are only
allowed in "/dev/md/".
+.ig XX
.\".TP
.\".BR \-\-symlink = no
.\"Normally when
@@ -743,6 +787,7 @@ allowed in "/dev/md/".
.\"to enforce this even if it is suppressing
.\".IR mdadm.conf .
.\"
+.XX
.SH For assemble:
@@ -1950,7 +1995,12 @@ Currently the only support available is to
change the "size" attribute
for RAID1, RAID5 and RAID6.
.IP \(bu 4
-increase the "raid\-devices" attribute of RAID1, RAID5, and RAID6.
+increase or decrease the "raid\-devices" attribute of RAID1, RAID5,
+and RAID6.
+.IP \bu 4
+change the chunk-size and layout of RAID5 and RAID6.
+.IP \bu 4
+convert between RAID1 and RAID5, and between RAID5 and RAID6.
.IP \(bu 4
add a write-intent bitmap to any array which supports these bitmaps, or
remove a write-intent bitmap from such an array.
@@ -1993,10 +2043,22 @@ devices which were in those slots must be failed and removed.
When the number of devices is increased, any hot spares that are
present will be activated immediately.
-Increasing the number of active devices in a RAID5 is much more
+Changing the number of active devices in a RAID5 or RAID6 is much more
effort. Every block in the array will need to be read and written
-back to a new location. From 2.6.17, the Linux Kernel is able to do
-this safely, including restarting an interrupted "reshape".
+back to a new location. From 2.6.17, the Linux Kernel is able to
+increase the number of devices in a RAID5 safely, including restarting
+an interrupted "reshape". From 2.6.31, the Linux Kernel is able to
+increase or decrease the number of devices in a RAID5 or RAID6.
+
+When decreasing the number of devices, the size of the array will also
+decrease. If there was data in the array, it could get destroyed and
+this is not reversible. To help prevent accidents,
+.I mdadm
+requires that the size of the array be decreased first with
+.BR "mdadm --grow --array-size" .
+This is a reversible change which simply makes the end of the array
+inaccessible. The integrity of any data can then be checked before
+the non-reversible reduction in the number of devices is request.
When relocating the first few stripes on a RAID5, it is not possible
to keep the data on disk completely consistent and crash-proof. To
@@ -2011,6 +2073,31 @@ critical period, the same file must be passed to
.B \-\-assemble
to restore the backup and reassemble the array.
+.SS LEVEL CHANGES
+
+Changing the RAID level of any array happens instantaneously. However
+in the RAID to RAID6 case this requires a non-standard layout of the
+RAID6 data, and in the RAID6 to RAID5 case that non-standard layout is
+required before the change can be accomplish. So while the level
+change is instant, the accompanying layout change can take quite a
+long time.
+
+.SS CHUNK-SIZE AND LAYOUT CHANGES
+
+Changing the chunk-size of layout without also changing the number of
+devices as the same time will involve re-writing all blocks in-place.
+To ensure against data loss in the case of a crash, a
+.B --backup-file
+must be provided for these changes. Small sections of the array will
+be copied to the backup file while they are being rearranged.
+
+If the reshape is interrupted for any reason, this backup file must be
+make available to
+.B "mdadm --assemble"
+so the array can be reassembled. Consequently the file cannot be
+stored on the device being reshaped.
+
+
.SS BITMAP CHANGES
A write-intent bitmap can be added to, or removed from, an active
@@ -2285,6 +2372,14 @@ can be started.
Any devices which are components of /dev/md4 will be marked as faulty
and then remove from the array.
+.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4
+.br
+The array
+.B /dev/md4
+which is currently a RAID5 array will be converted to RAID6. There
+should normally already be a spare drive attached to the array as a
+RAID6 needs one more drive than a matching RAID5.
+
.B " mdadm --create /dev/md/ddf --metadata=ddf --raid-disks 6 /dev/sd[a-f]"
.br
Create a DDF array over 6 devices.
diff --git a/mdadm.c b/mdadm.c
index 4651e73..3dc8be9 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -41,8 +41,10 @@ int main(int argc, char *argv[])
int chunk = 0;
long long size = -1;
+ long long array_size = -1;
int level = UnSet;
int layout = UnSet;
+ char *layout_str = NULL;
int raiddisks = 0;
int max_disks = MD_SB_DISKS; /* just a default */
int sparedisks = 0;
@@ -102,7 +104,6 @@ int main(int argc, char *argv[])
int rebuild_map = 0;
int auto_update_home = 0;
- int copies;
int print_help = 0;
FILE *outf;
@@ -324,6 +325,7 @@ int main(int argc, char *argv[])
* could depend on the mode */
#define O(a,b) ((a<<8)|b)
switch (O(mode,opt)) {
+ case O(GROW,'c'):
case O(CREATE,'c'):
case O(BUILD,'c'): /* chunk or rounding */
if (chunk) {
@@ -386,16 +388,36 @@ int main(int argc, char *argv[])
if (strcmp(optarg, "max")==0)
size = 0;
else {
- size = strtoll(optarg, &c, 10);
- if (!optarg[0] || *c || size < 4) {
+ size = parse_size(optarg);
+ if (size < 8) {
fprintf(stderr, Name ": invalid size: %s\n",
optarg);
exit(2);
}
+ /* convert sectors to K */
+ size /= 2;
}
continue;
- case O(GROW,'l'): /* hack - needed to understand layout */
+ case O(GROW,'Z'): /* array size */
+ if (array_size >= 0) {
+ fprintf(stderr, Name ": array-size may only be specified once. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ if (strcmp(optarg, "max") == 0)
+ array_size = 0;
+ else {
+ array_size = parse_size(optarg);
+ if (array_size <= 0) {
+ fprintf(stderr, Name ": invalid array size: %s\n",
+ optarg);
+ exit(2);
+ }
+ }
+ continue;
+
+ case O(GROW,'l'):
case O(CREATE,'l'):
case O(BUILD,'l'): /* set raid level*/
if (level != UnSet) {
@@ -425,9 +447,18 @@ int main(int argc, char *argv[])
ident.level = level;
continue;
+ case O(GROW, 'p'): /* new layout */
+ if (layout_str) {
+ fprintf(stderr,Name ": layout may only be sent once. "
+ "Second value was %s\n", optarg);
+ exit(2);
+ }
+ layout_str = optarg;
+ /* 'Grow' will parse the value */
+ continue;
+
case O(CREATE,'p'): /* raid5 layout */
case O(BUILD,'p'): /* faulty layout */
- case O(GROW, 'p'): /* faulty reconfig */
if (layout != UnSet) {
fprintf(stderr,Name ": layout may only be sent once. "
"Second value was %s\n", optarg);
@@ -460,38 +491,23 @@ int main(int argc, char *argv[])
break;
case 10:
- /* 'f', 'o' or 'n' followed by a number <= raid_disks */
- if ((optarg[0] != 'n' && optarg[0] != 'f' && optarg[0] != 'o') ||
- (copies = strtoul(optarg+1, &cp, 10)) < 1 ||
- copies > 200 ||
- *cp) {
+ layout = parse_layout_10(optarg);
+ if (layout < 0) {
fprintf(stderr, Name ": layout for raid10 must be 'nNN', 'oNN' or 'fNN' where NN is a number, not %s\n", optarg);
exit(2);
}
- if (optarg[0] == 'n')
- layout = 256 + copies;
- else if (optarg[0] == 'o')
- layout = 0x10000 + (copies<<8) + 1;
- else
- layout = 1 + (copies<<8);
break;
- case -5: /* Faulty
- * modeNNN
- */
-
- {
- int ln = strcspn(optarg, "0123456789");
- char *m = strdup(optarg);
- int mode;
- m[ln] = 0;
- mode = map_name(faultylayout, m);
- if (mode == UnSet) {
+ case LEVEL_FAULTY:
+ /* Faulty
+ * modeNNN
+ */
+ layout = parse_layout_faulty(optarg);
+ if (layout == -1) {
fprintf(stderr, Name ": layout %s not understood for faulty.\n",
optarg);
exit(2);
}
- layout = mode | (atoi(optarg+ln)<< ModeShift);
- }
+ break;
}
continue;
@@ -1398,11 +1414,42 @@ int main(int argc, char *argv[])
break;
case GROW:
+ if (array_size >= 0) {
+ /* alway impose array size first, independent of
+ * anything else
+ * Do not allow level or raid_disks changes at the
+ * same time as that can be irreversibly destructive.
+ */
+ struct mdinfo sra;
+ int err;
+ if (raiddisks || level != UnSet) {
+ fprintf(stderr, Name ": cannot change array size in same operation "
+ "as changing raiddisks or level.\n"
+ " Change size first, then check that data is still intact.\n");
+ rv = 1;
+ break;
+ }
+ sysfs_init(&sra, mdfd, 0);
+ if (array_size == 0)
+ err = sysfs_set_str(&sra, NULL, "array_size", "default");
+ else
+ err = sysfs_set_num(&sra, NULL, "array_size", array_size / 2);
+ if (err < 0) {
+ if (errno == E2BIG)
+ fprintf(stderr, Name ": --array-size setting"
+ " is too large.\n");
+ else
+ fprintf(stderr, Name ": current kernel does"
+ " not support setting --array-size\n");
+ rv = 1;
+ break;
+ }
+ }
if (devs_found > 1) {
/* must be '-a'. */
- if (size >= 0 || raiddisks) {
- fprintf(stderr, Name ": --size, --raiddisks, and --add are exclusing in --grow mode\n");
+ if (size >= 0 || raiddisks || chunk || layout_str != NULL || bitmap_file) {
+ fprintf(stderr, Name ": --add cannot be used with other geometry changes in --grow mode\n");
rv = 1;
break;
}
@@ -1411,20 +1458,21 @@ int main(int argc, char *argv[])
if (rv)
break;
}
- } else if ((size >= 0) + (raiddisks != 0) + (layout != UnSet) + (bitmap_file != NULL)> 1) {
- fprintf(stderr, Name ": can change at most one of size, raiddisks, bitmap, and layout\n");
- rv = 1;
- break;
- } else if (layout != UnSet)
- rv = Manage_reconfig(devlist->devname, mdfd, layout);
- else if (size >= 0 || raiddisks)
- rv = Grow_reshape(devlist->devname, mdfd, quiet, backup_file,
- size, level, layout, chunk, raiddisks);
- else if (bitmap_file) {
- if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+ } else if (bitmap_file) {
+ if (size >= 0 || raiddisks || chunk || layout_str != NULL) {
+ fprintf(stderr, Name ": --bitmap changes cannot be used with other geometry changes in --grow mode\n");
+ rv = 1;
+ break;
+ }
+ if (delay == 0)
+ delay = DEFAULT_BITMAP_DELAY;
rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file,
bitmap_chunk, delay, write_behind, force);
- } else
+ } else if (size >= 0 || raiddisks != 0 || layout_str != NULL
+ || chunk != 0 || level != UnSet) {
+ rv = Grow_reshape(devlist->devname, mdfd, quiet, backup_file,
+ size, level, layout_str, chunk, raiddisks);
+ } else if (array_size < 0)
fprintf(stderr, Name ": no changes to --grow\n");
break;
case INCREMENTAL:
diff --git a/mdadm.h b/mdadm.h
index 2e2275c..261cdb7 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -370,8 +370,12 @@ extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long val);
extern int sysfs_uevent(struct mdinfo *sra, char *event);
+extern int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
+ char *name);
+extern int sysfs_fd_get_ll(int fd, unsigned long long *val);
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
+extern int sysfs_fd_get_str(int fd, char *val, int size);
extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
@@ -386,7 +390,8 @@ extern int load_sys(char *path, char *buf);
extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
- unsigned long long start, unsigned long long length);
+ unsigned long long start, unsigned long long length,
+ char *buf);
extern int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int source, unsigned long long read_offset,
@@ -708,7 +713,6 @@ extern int add_dev(const char *name, const struct stat *stb, int flag, struct FT
extern int Manage_ro(char *devname, int fd, int readonly);
extern int Manage_runstop(char *devname, int fd, int runstop, int quiet);
extern int Manage_resize(char *devname, int fd, long long size, int raid_disks);
-extern int Manage_reconfig(char *devname, int fd, int layout);
extern int Manage_subdevs(char *devname, int fd,
mddev_dev_t devlist, int verbose);
extern int autodetect(void);
@@ -716,10 +720,11 @@ extern int Grow_Add_device(char *devname, int fd, char *newdev);
extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
extern int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
- int level, int layout, int chunksize, int raid_disks);
+ int level, char *layout_str, int chunksize, int raid_disks);
extern int Grow_restart(struct supertype *st, struct mdinfo *info,
- int *fdlist, int cnt, char *backup_file);
-
+ int *fdlist, int cnt, char *backup_file, int verbose);
+extern int Grow_continue(int mdfd, struct supertype *st,
+ struct mdinfo *info, char *backup_file);
extern int Assemble(struct supertype *st, char *mddev,
mddev_ident_t ident,
@@ -775,7 +780,10 @@ extern unsigned long bitmap_sectors(struct bitmap_super_s *bsb);
extern int md_get_version(int fd);
extern int get_linux_version(void);
+extern long long parse_size(char *size);
extern int parse_uuid(char *str, int uuid[4]);
+extern int parse_layout_10(char *layout);
+extern int parse_layout_faulty(char *layout);
extern int check_ext2(int fd, char *name);
extern int check_reiser(int fd, char *name);
extern int check_raid(int fd, char *name);
diff --git a/mdadm.spec b/mdadm.spec
index 6bc36cd..633fe60 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,6 +1,6 @@
Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
-Version: 3.0.3
+Version: 3.1
Release: 1
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tgz
URL: http://neil.brown.name/blog/mdadm
diff --git a/mdassemble.8 b/mdassemble.8
index 5520e8e..ecb0970 100644
--- a/mdassemble.8
+++ b/mdassemble.8
@@ -1,5 +1,5 @@
.\" -*- nroff -*-
-.TH MDASSEMBLE 8 "" v3.0.3
+.TH MDASSEMBLE 8 "" v3.1
.SH NAME
mdassemble \- assemble MD devices
.I aka
diff --git a/mdassemble.c b/mdassemble.c
index cf83795..d0d0707 100644
--- a/mdassemble.c
+++ b/mdassemble.c
@@ -28,7 +28,7 @@
/* from readme.c */
mapping_t pers[] = {
- { "linear", -1},
+ { "linear", LEVEL_LINEAR},
{ "raid0", 0},
{ "0", 0},
{ "stripe", 0},
@@ -39,8 +39,8 @@ mapping_t pers[] = {
{ "4", 4},
{ "raid5", 5},
{ "5", 5},
- { "multipath", -4},
- { "mp", -4},
+ { "multipath", LEVEL_MULTIPATH},
+ { "mp", LEVEL_MULTIPATH},
{ "raid6", 6},
{ "6", 6},
{ "raid10", 10},
diff --git a/mdmon.8 b/mdmon.8
index 41af50d..022f8ac 100644
--- a/mdmon.8
+++ b/mdmon.8
@@ -1,5 +1,5 @@
.\" See file COPYING in distribution for details.
-.TH MDMON 8 "" v3.0.3
+.TH MDMON 8 "" v3.1
.SH NAME
mdmon \- monitor MD external metadata arrays
diff --git a/restripe.c b/restripe.c
index 29c7336..f673206 100644
--- a/restripe.c
+++ b/restripe.c
@@ -23,14 +23,18 @@
*/
#include "mdadm.h"
+#include <stdint.h>
/* To restripe, we read from old geometry to a buffer, and
* read from buffer to new geometry.
- * When reading we don't worry about parity. When writing we do.
+ * When reading, we might have missing devices and so could need
+ * to reconstruct.
+ * When writing, we need to create correct parity and Q.
*
*/
-static int geo_map(int block, unsigned long long stripe, int raid_disks, int level, int layout)
+static int geo_map(int block, unsigned long long stripe, int raid_disks,
+ int level, int layout)
{
/* On the given stripe, find which disk in the array will have
* block numbered 'block'.
@@ -42,6 +46,7 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
switch(level*100 + layout) {
case 000:
case 400:
+ case 500 + ALGORITHM_PARITY_N:
/* raid 4 isn't messed around by parity blocks */
if (block == -1)
return raid_disks-1; /* parity block */
@@ -70,6 +75,65 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
if (block == -1) return pd;
return (pd + 1 + block) % raid_disks;
+ case 500 + ALGORITHM_PARITY_0:
+ return block + 1;
+
+
+ case 600 + ALGORITHM_PARITY_N_6:
+ if (block == -2)
+ return raid_disks - 1;
+ if (block == -1)
+ return raid_disks - 2; /* parity block */
+ return block;
+ case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks-1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks - 1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_PARITY_0_6:
+ if (block == -2)
+ return raid_disks - 1;
+ return block + 1;
+
+
+ case 600 + ALGORITHM_PARITY_0:
+ if (block == -1)
+ return 0;
+ if (block == -2)
+ return 1;
+ return block + 2;
+
case 600 + ALGORITHM_LEFT_ASYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
if (block == -1) return pd;
@@ -80,6 +144,8 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
return block+2;
return block;
+ case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
+ /* Different order for calculating Q, otherwize same as ... */
case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
if (block == -1) return pd;
@@ -101,9 +167,43 @@ static int geo_map(int block, unsigned long long stripe, int raid_disks, int lev
if (block == -1) return pd;
if (block == -2) return (pd+1) % raid_disks;
return (pd + 2 + block) % raid_disks;
+
+
+ case 600 + ALGORITHM_ROTATING_N_RESTART:
+ /* Same a left_asymmetric, by first stripe is
+ * D D D P Q rather than
+ * Q D D D P
+ */
+ pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_ROTATING_N_CONTINUE:
+ /* Same as left_symmetric but Q is before P */
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+raid_disks-1) % raid_disks;
+ return (pd + 1 + block) % raid_disks;
}
return -1;
}
+static int is_ddf(int layout)
+{
+ switch (layout)
+ {
+ default:
+ return 0;
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ case ALGORITHM_ROTATING_N_RESTART:
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ return 1;
+ }
+}
static void xor_blocks(char *target, char **sources, int disks, int size)
@@ -118,10 +218,10 @@ static void xor_blocks(char *target, char **sources, int disks, int size)
}
}
-static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
+static void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
{
int d, z;
- char wq0, wp0, wd0, w10, w20;
+ uint8_t wq0, wp0, wd0, w10, w20;
for ( d = 0; d < size; d++) {
wq0 = wp0 = sources[disks-1][d];
for ( z = disks-2 ; z >= 0 ; z-- ) {
@@ -138,50 +238,306 @@ static void qsyndrome(char *p, char *q, char **sources, int disks, int size)
}
}
+
+/*
+ * The following was taken from linux/drivers/md/mktables.c, and modified
+ * to create in-memory tables rather than C code
+ */
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+ uint8_t v = 0;
+
+ while (b) {
+ if (b & 1)
+ v ^= a;
+ a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+ uint8_t v = 1;
+
+ b %= 255;
+ if (b < 0)
+ b += 255;
+
+ while (b) {
+ if (b & 1)
+ v = gfmul(v, a);
+ a = gfmul(a, a);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+int tables_ready = 0;
+uint8_t raid6_gfmul[256][256];
+uint8_t raid6_gfexp[256];
+uint8_t raid6_gfinv[256];
+uint8_t raid6_gfexi[256];
+void make_tables(void)
+{
+ int i, j;
+ uint8_t v;
+
+ /* Compute multiplication table */
+ for (i = 0; i < 256; i++)
+ for (j = 0; j < 256; j++)
+ raid6_gfmul[i][j] = gfmul(i, j);
+
+ /* Compute power-of-2 table (exponent) */
+ v = 1;
+ for (i = 0; i < 256; i++) {
+ raid6_gfexp[i] = v;
+ v = gfmul(v, 2);
+ if (v == 1)
+ v = 0; /* For entry 255, not a real entry */
+ }
+
+ /* Compute inverse table x^-1 == x^254 */
+ for (i = 0; i < 256; i++)
+ raid6_gfinv[i] = gfpow(i, 254);
+
+ /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+ for (i = 0; i < 256; i ++)
+ raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
+
+ tables_ready = 1;
+}
+
+uint8_t *zero;
+/* Following was taken from linux/drivers/md/raid6recov.c */
+
+/* Recover two failed data blocks. */
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ uint8_t **ptrs)
+{
+ uint8_t *p, *q, *dp, *dq;
+ uint8_t px, qx, db;
+ const uint8_t *pbmul; /* P multiplier table for B data */
+ const uint8_t *qmul; /* Q multiplier table (for both) */
+
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = ptrs[faila];
+ ptrs[faila] = zero;
+ dq = ptrs[failb];
+ ptrs[failb] = zero;
+
+ qsyndrome(dp, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ px = *p ^ *dp;
+ qx = qmul[*q ^ *dq];
+ *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+ *dp++ = db ^ px; /* Reconstructed A */
+ p++; q++;
+ }
+}
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
+{
+ uint8_t *p, *q, *dq;
+ const uint8_t *qmul; /* Q multiplier table */
+
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = ptrs[faila];
+ ptrs[faila] = zero;
+
+ qsyndrome(p, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ *p++ ^= *dq = qmul[*q ^ *dq];
+ q++; dq++;
+ }
+}
+
/* Save data:
* We are given:
- * A list of 'fds' of the active disks. For now we require all to be present.
+ * A list of 'fds' of the active disks. Some may be absent.
* A geometry: raid_disks, chunk_size, level, layout
* A list of 'fds' for mirrored targets. They are already seeked to
* right (Write) location
- * A start and length
+ * A start and length which must be stripe-aligned
+ * 'buf' is large enough to hold one stripe, and is aligned
*/
int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
- unsigned long long start, unsigned long long length)
+ unsigned long long start, unsigned long long length,
+ char *buf)
{
- char abuf[8192+512];
- char *buf = (char*)(((unsigned long)abuf+511)&~511UL);
- int cpos = start % chunk_size; /* where in chunk we are up to */
int len;
int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
int disk;
+ int i;
+ if (!tables_ready)
+ make_tables();
+
+ if (zero == NULL) {
+ zero = malloc(chunk_size);
+ memset(zero, 0, chunk_size);
+ }
+
+ len = data_disks * chunk_size;
while (length > 0) {
- unsigned long long offset;
- int i;
- len = chunk_size - cpos;
- if (len > 8192) len = 8192;
- if (len > length) len = length;
- /* len bytes to be moved from one device */
-
- offset = (start/chunk_size/data_disks)*chunk_size + cpos;
- disk = start/chunk_size % data_disks;
- disk = geo_map(disk, start/chunk_size/data_disks,
- raid_disks, level, layout);
- if (lseek64(source[disk], offsets[disk]+offset, 0) < 0)
- return -1;
- if (read(source[disk], buf, len) != len)
+ int failed = 0;
+ int fdisk[3], fblock[3];
+ for (disk = 0; disk < raid_disks ; disk++) {
+ unsigned long long offset;
+ int dnum;
+
+ offset = (start/chunk_size/data_disks)*chunk_size;
+ dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (dnum < 0) abort();
+ if (source[dnum] < 0 ||
+ lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
+ read(source[dnum], buf+disk * chunk_size, chunk_size)
+ != chunk_size)
+ if (failed <= 2) {
+ fdisk[failed] = dnum;
+ fblock[failed] = disk;
+ failed++;
+ }
+ }
+ if (failed == 0 || fblock[0] >= data_disks)
+ /* all data disks are good */
+ ;
+ else if (failed == 1 || fblock[1] >= data_disks+1) {
+ /* one failed data disk and good parity */
+ char *bufs[data_disks];
+ for (i=0; i < data_disks; i++)
+ if (fblock[0] == i)
+ bufs[i] = buf + data_disks*chunk_size;
+ else
+ bufs[i] = buf + i*chunk_size;
+
+ xor_blocks(buf + fblock[0]*chunk_size,
+ bufs, data_disks, chunk_size);
+ } else if (failed > 2 || level != 6)
+ /* too much failure */
return -1;
+ else {
+ /* RAID6 computations needed. */
+ uint8_t *bufs[data_disks+4];
+ int qdisk;
+ int syndrome_disks;
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ qdisk = geo_map(-2, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ bufs[i] = zero;
+ for (i = 0; i < data_disks; i++) {
+ int dnum = geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ int snum;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * and thus the syndrome number.
+ */
+ snum = dnum;
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+ }
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ * Note that for the '_6' variety, the p block
+ * makes a hole that we need to be careful of.
+ */
+ int j;
+ int snum = 0;
+ for (j = 0; j < raid_disks; j++) {
+ int dnum = (qdisk + 1 + j) % raid_disks;
+ if (dnum == disk || dnum == qdisk)
+ continue;
+ for (i = 0; i < data_disks; i++)
+ if (geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout) == dnum)
+ break;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * snum is syndrome disk for which 0 is immediately after Q
+ */
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+
+ if (fblock[0] == i)
+ fdisk[0] = snum;
+ if (fblock[1] == i)
+ fdisk[1] = snum;
+ snum++;
+ }
+
+ syndrome_disks = data_disks;
+ }
+
+ /* Place P and Q blocks at end of bufs */
+ bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
+ bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
+
+ if (fblock[1] == data_disks)
+ /* One data failed, and parity failed */
+ raid6_datap_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], bufs);
+ else {
+ if (fdisk[0] > fdisk[1]) {
+ int t = fdisk[0];
+ fdisk[0] = fdisk[1];
+ fdisk[1] = t;
+ }
+ /* Two data blocks failed, P,Q OK */
+ raid6_2data_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], fdisk[1], bufs);
+ }
+ }
+
for (i=0; i<nwrites; i++)
if (write(dest[i], buf, len) != len)
return -1;
+
length -= len;
start += len;
- cpos += len;
- while (cpos >= chunk_size) cpos -= chunk_size;
}
return 0;
}
@@ -202,17 +558,25 @@ int restore_stripes(int *dest, unsigned long long *offsets,
int source, unsigned long long read_offset,
unsigned long long start, unsigned long long length)
{
- char *stripe_buf = malloc(raid_disks * chunk_size);
+ char *stripe_buf;
char **stripes = malloc(raid_disks * sizeof(char*));
char **blocks = malloc(raid_disks * sizeof(char*));
int i;
- int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
+ int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
- if (stripe_buf == NULL || stripes == NULL || blocks == NULL) {
+ posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size);
+ if (zero == NULL) {
+ zero = malloc(chunk_size);
+ if (zero)
+ memset(zero, 0, chunk_size);
+ }
+ if (stripe_buf == NULL || stripes == NULL || blocks == NULL
+ || zero == NULL) {
free(stripe_buf);
free(stripes);
free(blocks);
+ free(zero);
return -2;
}
for (i=0; i<raid_disks; i++)
@@ -221,12 +585,12 @@ int restore_stripes(int *dest, unsigned long long *offsets,
int len = data_disks * chunk_size;
unsigned long long offset;
int disk, qdisk;
+ int syndrome_disks;
if (length < len)
return -3;
for (i=0; i < data_disks; i++) {
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
- blocks[i] = stripes[disk];
if (lseek64(source, read_offset, 0) != read_offset)
return -1;
if (read(source, stripes[disk], chunk_size) != chunk_size)
@@ -240,6 +604,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
case 5:
disk = geo_map(-1, start/chunk_size/data_disks,
raid_disks, level, layout);
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(disk+1+i) % raid_disks];
xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
break;
case 6:
@@ -247,9 +613,29 @@ int restore_stripes(int *dest, unsigned long long *offsets,
raid_disks, level, layout);
qdisk = geo_map(-2, start/chunk_size/data_disks,
raid_disks, level, layout);
-
- qsyndrome(stripes[disk], stripes[qdisk], blocks,
- data_disks, chunk_size);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ if (i == disk || i == qdisk)
+ blocks[i] = (char*)zero;
+ else
+ blocks[i] = stripes[i];
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ */
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(qdisk+1+i) % raid_disks];
+
+ syndrome_disks = data_disks;
+ }
+ qsyndrome((uint8_t*)stripes[disk],
+ (uint8_t*)stripes[qdisk],
+ (uint8_t**)blocks,
+ syndrome_disks, chunk_size);
break;
}
for (i=0; i < raid_disks ; i++)
@@ -298,7 +684,7 @@ int test_stripes(int *source, unsigned long long *offsets,
}
switch(level) {
case 6:
- qsyndrome(p, q, blocks, data_disks, chunk_size);
+ qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
disk = geo_map(-1, start/chunk_size, raid_disks,
level, layout);
if (memcmp(p, stripes[disk], chunk_size) != 0) {
@@ -337,6 +723,7 @@ main(int argc, char *argv[])
int save;
int *fds;
char *file;
+ char *buf;
int storefd;
unsigned long long *offsets;
int raid_disks, chunk_size, level, layout;
@@ -395,11 +782,13 @@ main(int argc, char *argv[])
}
}
+ buf = malloc(raid_disks * chunk_size);
+
if (save == 1) {
int rv = save_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
1, &storefd,
- start, length);
+ start, length, buf);
if (rv != 0) {
fprintf(stderr,
"test_stripe: save_stripes returned %d\n", rv);
diff --git a/super-ddf.c b/super-ddf.c
index 06858e2..fe83642 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1366,7 +1366,7 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
info->disk.raid_disk = -1;
// info->disk.raid_disk = find refnum in the table and use index;
}
- info->disk.state = (1 << MD_DISK_SYNC);
+ info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
info->reshape_active = 0;
@@ -2555,8 +2555,12 @@ static int validate_geometry_ddf(struct supertype *st,
for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
if (ddf_level_num[i].num2 == level)
break;
- if (ddf_level_num[i].num1 == MAXINT)
+ if (ddf_level_num[i].num1 == MAXINT) {
+ if (verbose)
+ fprintf(stderr, Name ": DDF does not support level %d arrays\n",
+ level);
return 0;
+ }
/* Should check layout? etc */
if (st->sb && freesize) {
@@ -2604,7 +2608,7 @@ static int validate_geometry_ddf(struct supertype *st,
if (verbose)
fprintf(stderr,
Name ": ddf: Cannot create this array "
- "on device %s\n",
+ "on device %s - a container is required.\n",
dev);
return 0;
}
@@ -2696,8 +2700,11 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
struct extent *e;
int i;
/* ddf/bvd supports lots of things, but not containers */
- if (level == LEVEL_CONTAINER)
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ fprintf(stderr, Name ": DDF cannot create a container within an container\n");
return 0;
+ }
/* We must have the container info already read in. */
if (!ddf)
return 0;
diff --git a/super-intel.c b/super-intel.c
index 9a99d60..2e119f8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3704,8 +3704,11 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
case 1:
case 10:
case 5:
- break;
+ return 0;
default:
+ if (verbose)
+ fprintf(stderr, Name
+ ": IMSM only supports levels 0,1,5,10\n");
return 1;
}
diff --git a/super0.c b/super0.c
index 07f4792..15b3ca2 100644
--- a/super0.c
+++ b/super0.c
@@ -85,6 +85,7 @@ static void examine_super0(struct supertype *st, char *homehost)
mdp_super_t *sb = st->sb;
time_t atime;
int d;
+ int delta_extra = 0;
char *c;
printf(" Magic : %08x\n", sb->md_magic);
@@ -135,10 +136,9 @@ static void examine_super0(struct supertype *st, char *homehost)
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)sb->reshape_position/2, human_size((long long)sb->reshape_position<<9));
if (sb->delta_disks) {
printf(" Delta Devices : %d", sb->delta_disks);
- if (sb->delta_disks)
- printf(" (%d->%d)\n", sb->raid_disks-sb->delta_disks, sb->raid_disks);
- else
- printf(" (%d->%d)\n", sb->raid_disks, sb->raid_disks+sb->delta_disks);
+ printf(" (%d->%d)\n", sb->raid_disks-sb->delta_disks, sb->raid_disks);
+ if (((int)sb->delta_disks) < 0)
+ delta_extra = - sb->delta_disks;
}
if (sb->new_level != sb->level) {
c = map_num(pers, sb->new_level);
@@ -149,6 +149,10 @@ static void examine_super0(struct supertype *st, char *homehost)
c = map_num(r5layout, sb->new_layout);
printf(" New Layout : %s\n", c?c:"-unknown-");
}
+ if (sb->level == 6) {
+ c = map_num(r6layout, sb->new_layout);
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
if (sb->level == 10) {
printf(" New Layout : near=%d, %s=%d\n",
sb->new_layout&255,
@@ -182,6 +186,10 @@ static void examine_super0(struct supertype *st, char *homehost)
c = map_num(r5layout, sb->layout);
printf(" Layout : %s\n", c?c:"-unknown-");
}
+ if (sb->level == 6) {
+ c = map_num(r6layout, sb->layout);
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (sb->level == 10) {
printf(" Layout :");
print_r10_layout(sb->layout);
@@ -202,7 +210,7 @@ static void examine_super0(struct supertype *st, char *homehost)
}
printf("\n");
printf(" Number Major Minor RaidDevice State\n");
- for (d= -1; d<(signed int)(sb->raid_disks+sb->spare_disks); d++) {
+ for (d= -1; d<(signed int)(sb->raid_disks+delta_extra + sb->spare_disks); d++) {
mdp_disk_t *dp;
char *dv;
char nb[5];
@@ -371,6 +379,8 @@ static void getinfo_super0(struct supertype *st, struct mdinfo *info)
info->delta_disks = sb->delta_disks;
info->new_layout = sb->new_layout;
info->new_chunk = sb->new_chunk;
+ if (info->delta_disks < 0)
+ info->array.raid_disks -= info->delta_disks;
} else
info->reshape_active = 0;
@@ -468,7 +478,14 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
- if ((sb->disks[d].state & ~(1<<MD_DISK_WRITEMOSTLY))
+ int mask = (1<<MD_DISK_WRITEMOSTLY);
+ int add = 0;
+ if (sb->minor_version >= 91)
+ /* During reshape we don't insist on everything
+ * being marked 'sync'
+ */
+ add = (1<<MD_DISK_SYNC);
+ if (((sb->disks[d].state & ~mask) | add)
!= info->disk.state) {
sb->disks[d].state = info->disk.state | wonly;
rv = 1;
@@ -904,9 +921,7 @@ static struct supertype *match_metadata_desc0(char *arg)
while (arg[0] == '0' && arg[1] == '0')
arg++;
if (strcmp(arg, "0") == 0 ||
- strcmp(arg, "0.90") == 0 ||
- strcmp(arg, "default") == 0 ||
- strcmp(arg, "") == 0 /* no metadata */
+ strcmp(arg, "0.90") == 0
)
return st;
@@ -955,9 +970,14 @@ static int add_internal_bitmap0(struct supertype *st, int *chunkp,
min_chunk *= 2;
bits = (bits+1)/2;
}
- if (chunk == UnSet)
+ if (chunk == UnSet) {
+ /* A chunk size less than a few Megabytes gives poor
+ * performance without increasing resync noticeably
+ */
chunk = min_chunk;
- else if (chunk < min_chunk)
+ if (chunk < 64*1024*1024)
+ chunk = 64*1024*1024;
+ } else if (chunk < min_chunk)
return 0; /* chunk size too small */
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
@@ -1059,12 +1079,22 @@ static int validate_geometry0(struct supertype *st, int level,
unsigned long long ldsize;
int fd;
- if (level == LEVEL_CONTAINER)
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ fprintf(stderr, Name ": 0.90 metadata does not support containers\n");
return 0;
- if (raiddisks > MD_SB_DISKS)
+ }
+ if (raiddisks > MD_SB_DISKS) {
+ if (verbose)
+ fprintf(stderr, Name ": 0.90 metadata supports at most %d devices per array\n",
+ MD_SB_DISKS);
return 0;
- if (size > (0x7fffffffULL<<9))
+ }
+ if (size > (0x7fffffffULL<<9)) {
+ if (verbose)
+ fprintf(stderr, Name ": 0.90 metadata supports at most 2 terrabytes per device\n");
return 0;
+ }
if (!subdev)
return 1;
diff --git a/super1.c b/super1.c
index fee22a9..5a2df0e 100644
--- a/super1.c
+++ b/super1.c
@@ -76,8 +76,8 @@ struct mdp_superblock_1 {
__u64 utime; /* 40 bits second, 24 btes microseconds */
__u64 events; /* incremented when superblock updated */
__u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
- __u32 sb_csum; /* checksum upto devs[max_dev] */
- __u32 max_dev; /* size of devs[] array to consider */
+ __u32 sb_csum; /* checksum upto dev_roles[max_dev] */
+ __u32 max_dev; /* size of dev_roles[] array to consider */
__u8 pad3[64-32]; /* set to 0 when writing */
/* device state information. Indexed by dev_number.
@@ -201,6 +201,7 @@ static void examine_super1(struct supertype *st, char *homehost)
time_t atime;
int d;
int role;
+ int delta_extra = 0;
int i;
char *c;
int l = homehost ? strlen(homehost) : 0;
@@ -283,13 +284,11 @@ static void examine_super1(struct supertype *st, char *homehost)
human_size(__le64_to_cpu(sb->reshape_position)<<9));
if (__le32_to_cpu(sb->delta_disks)) {
printf(" Delta Devices : %d", __le32_to_cpu(sb->delta_disks));
- if (__le32_to_cpu(sb->delta_disks))
- printf(" (%d->%d)\n",
- __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
- __le32_to_cpu(sb->raid_disks));
- else
- printf(" (%d->%d)\n", __le32_to_cpu(sb->raid_disks),
- __le32_to_cpu(sb->raid_disks)+__le32_to_cpu(sb->delta_disks));
+ printf(" (%d->%d)\n",
+ __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
+ __le32_to_cpu(sb->raid_disks));
+ if ((int)__le32_to_cpu(sb->delta_disks) < 0)
+ delta_extra = -__le32_to_cpu(sb->delta_disks);
}
if (__le32_to_cpu(sb->new_level) != __le32_to_cpu(sb->level)) {
c = map_num(pers, __le32_to_cpu(sb->new_level));
@@ -300,6 +299,10 @@ static void examine_super1(struct supertype *st, char *homehost)
c = map_num(r5layout, __le32_to_cpu(sb->new_layout));
printf(" New Layout : %s\n", c?c:"-unknown-");
}
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->new_layout));
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 10) {
printf(" New Layout :");
print_r10_layout(__le32_to_cpu(sb->new_layout));
@@ -331,6 +334,10 @@ static void examine_super1(struct supertype *st, char *homehost)
c = map_num(r5layout, __le32_to_cpu(sb->layout));
printf(" Layout : %s\n", c?c:"-unknown-");
}
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 10) {
int lo = __le32_to_cpu(sb->layout);
printf(" Layout :");
@@ -368,7 +375,7 @@ static void examine_super1(struct supertype *st, char *homehost)
#endif
printf(" Device Role : ");
d = __le32_to_cpu(sb->dev_number);
- if (d < sb->raid_disks)
+ if (d < __le32_to_cpu(sb->max_dev))
role = __le16_to_cpu(sb->dev_roles[d]);
else
role = 0xFFFF;
@@ -378,7 +385,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("Active device %d\n", role);
printf(" Array State : ");
- for (d=0; d<__le32_to_cpu(sb->raid_disks); d++) {
+ for (d=0; d<__le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
int cnt = 0;
int me = 0;
int i;
@@ -587,7 +594,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info)
info->disk.raid_disk = -1;
switch(role) {
case 0xFFFF:
- info->disk.state = 2; /* spare: ACTIVE, not sync, not faulty */
+ info->disk.state = 0; /* spare: not active, not sync, not faulty */
break;
case 0xFFFE:
info->disk.state = 1; /* faulty */
@@ -612,6 +619,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info)
info->delta_disks = __le32_to_cpu(sb->delta_disks);
info->new_layout = __le32_to_cpu(sb->new_layout);
info->new_chunk = __le32_to_cpu(sb->new_chunk)<<9;
+ if (info->delta_disks < 0)
+ info->array.raid_disks -= info->delta_disks;
} else
info->reshape_active = 0;
@@ -1342,7 +1351,10 @@ static struct supertype *match_metadata_desc1(char *arg)
return st;
}
if (strcmp(arg, "1.1") == 0 ||
- strcmp(arg, "1.01") == 0) {
+ strcmp(arg, "1.01") == 0 ||
+ strcmp(arg, "default") == 0 ||
+ strcmp(arg, "") == 0 /* no metadata */
+ ) {
st->minor_version = 1;
return st;
}
@@ -1489,9 +1501,14 @@ add_internal_bitmap1(struct supertype *st,
min_chunk *= 2;
bits = (bits+1)/2;
}
- if (chunk == UnSet)
+ if (chunk == UnSet) {
+ /* For practical purpose, 64Meg is a good
+ * default chunk size for internal bitmaps.
+ */
chunk = min_chunk;
- else if (chunk < min_chunk)
+ if (chunk < 64*1024*1024)
+ chunk = 64*1024*1024;
+ } else if (chunk < min_chunk)
return 0; /* chunk size too small */
if (chunk == 0) /* rounding problem */
return 0;
@@ -1592,8 +1609,11 @@ static int validate_geometry1(struct supertype *st, int level,
unsigned long long ldsize;
int fd;
- if (level == LEVEL_CONTAINER)
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ fprintf(stderr, Name ": 1.x metadata does not support containers\n");
return 0;
+ }
if (!subdev)
return 1;
diff --git a/sysfs.c b/sysfs.c
index d327e3d..35dfbd4 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -442,21 +442,28 @@ int sysfs_uevent(struct mdinfo *sra, char *event)
return 0;
}
-int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
- char *name, unsigned long long *val)
+int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
+ char *name)
{
char fname[50];
- char buf[50];
- int n;
int fd;
- char *ep;
+
sprintf(fname, "/sys/block/%s/md/%s/%s",
sra->sys_name, dev?dev->sys_name:"", name);
- fd = open(fname, O_RDONLY);
+ fd = open(fname, O_RDWR);
if (fd < 0)
- return -1;
+ fd = open(fname, O_RDONLY);
+ return fd;
+}
+
+int sysfs_fd_get_ll(int fd, unsigned long long *val)
+{
+ char buf[50];
+ int n;
+ char *ep;
+
+ lseek(fd, 0, 0);
n = read(fd, buf, sizeof(buf));
- close(fd);
if (n <= 0)
return -1;
buf[n] = 0;
@@ -466,25 +473,46 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
return 0;
}
-int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
- char *name, char *val, int size)
+int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *val)
{
- char fname[50];
int n;
int fd;
- sprintf(fname, "/sys/block/%s/md/%s/%s",
- sra->sys_name, dev?dev->sys_name:"", name);
- fd = open(fname, O_RDONLY);
+
+ fd = sysfs_get_fd(sra, dev, name);
if (fd < 0)
return -1;
- n = read(fd, val, size);
+ n = sysfs_fd_get_ll(fd, val);
close(fd);
+ return n;
+}
+
+int sysfs_fd_get_str(int fd, char *val, int size)
+{
+ int n;
+
+ lseek(fd, 0, 0);
+ n = read(fd, val, size);
if (n <= 0)
return -1;
val[n] = 0;
return n;
}
+int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_str(fd, val, size);
+ close(fd);
+ return n;
+}
+
int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
{
unsigned long sec;
diff --git a/test b/test
index 3acb6c1..d0456f1 100644
--- a/test
+++ b/test
@@ -34,6 +34,8 @@ mdsize0=19904
mdsize00=19840
# super1.0 round down to multiple of 2, subtract 8
mdsize1=19992
+# super1.1 for linear: round to multiple of 2, subtract 4
+mdsize1_l=19996
# subtract another 4 for bitmaps
mdsize1b=19988
mdsize11=19992
@@ -166,7 +168,9 @@ testdev() {
cnt=$2
dvsize=$3
chunk=$4
- mkfs -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
+ if [ -z "$5" ]; then
+ mkfs -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
+ fi
dsize=$[dvsize/chunk]
dsize=$[dsize*chunk]
rasize=$[dsize*2*cnt]
diff --git a/tests/00linear b/tests/00linear
index ec6a166..1aea0a2 100644
--- a/tests/00linear
+++ b/tests/00linear
@@ -3,13 +3,19 @@
mdadm -CR $md0 -l linear -n3 $dev0 $dev1 $dev2
check linear
-testdev $md0 3 $mdsize0 64
+testdev $md0 3 $mdsize1_l 1
mdadm -S $md0
-# now with version-1 superblock
-mdadm -CR $md0 -e1 --level=linear -n4 $dev0 $dev1 $dev2 $dev3
+# now with version-0.90 superblock
+mdadm -CR $md0 -e0.90 --level=linear -n4 $dev0 $dev1 $dev2 $dev3
check linear
-testdev $md0 4 $mdsize1 64
+testdev $md0 4 $mdsize0 1
+mdadm -S $md0
+
+# now with version-1.0 superblock
+mdadm -CR $md0 -e1.0 --level=linear -n4 $dev0 $dev1 $dev2 $dev3
+check linear
+testdev $md0 4 $mdsize1 1
mdadm -S $md0
# now with no superblock
diff --git a/tests/00multipath b/tests/00multipath
index efad690..cc52fb3 100644
--- a/tests/00multipath
+++ b/tests/00multipath
@@ -4,21 +4,21 @@
mdadm -CR $md1 -l multipath -n2 $path0 $path1
-testdev $md1 1 $mdsize0 1
+testdev $md1 1 $mdsize1 1
mdadm $md1 -f $path0
rotest $md1
-testdev $md1 1 $mdsize0 1
+testdev $md1 1 $mdsize1 1
mdadm $md1 -r $path0
mdadm $md1 -a $path0
rotest $md1
-testdev $md1 1 $mdsize0 1
+testdev $md1 1 $mdsize1 1
mdadm $md1 -f $path1
mdadm $md1 -r $path1
rotest $md1
-testdev $md1 1 $mdsize0 1
+testdev $md1 1 $mdsize1 1
mdadm -S $md1
diff --git a/tests/00raid0 b/tests/00raid0
index f5122ec..1614a63 100644
--- a/tests/00raid0
+++ b/tests/00raid0
@@ -3,32 +3,32 @@
mdadm -CR $md0 -l raid0 -n3 $dev0 $dev1 $dev2
check raid0
-testdev $md0 3 $mdsize0 64
+testdev $md0 3 $mdsize1_l 512
mdadm -S $md0
-# now with version-1 superblock
-mdadm -CR $md0 -e1 -l0 -n4 $dev0 $dev1 $dev2 $dev3
+# now with version-0.90 superblock
+mdadm -CR $md0 -e0.90 -l0 -n4 $dev0 $dev1 $dev2 $dev3
check raid0
-testdev $md0 4 $mdsize1 64
+testdev $md0 4 $mdsize0 512
mdadm -S $md0
# now with no superblock
mdadm -B $md0 -l0 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
check raid0
-testdev $md0 5 $size 64
+testdev $md0 5 $size 512
mdadm -S $md0
# now same again with different chunk size
for chunk in 4 32 256
do
- mdadm -CR $md0 -l raid0 --chunk $chunk -n3 $dev0 $dev1 $dev2
+ mdadm -CR $md0 -e0.90 -l raid0 --chunk $chunk -n3 $dev0 $dev1 $dev2
check raid0
testdev $md0 3 $mdsize0 $chunk
mdadm -S $md0
# now with version-1 superblock
- mdadm -CR $md0 -e1 -l0 -c $chunk -n4 $dev0 $dev1 $dev2 $dev3
+ mdadm -CR $md0 -e1.0 -l0 -c $chunk -n4 $dev0 $dev1 $dev2 $dev3
check raid0
testdev $md0 4 $mdsize1 $chunk
mdadm -S $md0
diff --git a/tests/00raid1 b/tests/00raid1
index 2ad82d0..45dc86a 100644
--- a/tests/00raid1
+++ b/tests/00raid1
@@ -6,14 +6,14 @@
mdadm -CR $md0 -l 1 -n2 $dev0 $dev1
check resync
check raid1
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
mdadm -S $md0
-# now with version-1 superblock, spare
-mdadm -CR $md0 -e1 --level=raid1 -n3 -x2 $dev0 missing missing $dev1 $dev2
+# now with version-0.90 superblock, spare
+mdadm -CR $md0 -e0.90 --level=raid1 -n3 -x2 $dev0 missing missing $dev1 $dev2
check recovery
check raid1
-testdev $md0 1 $mdsize1b 1
+testdev $md0 1 $mdsize0 1
mdadm -S $md0
# now with no superblock
diff --git a/tests/00raid10 b/tests/00raid10
index 6fffb74..796b970 100644
--- a/tests/00raid10
+++ b/tests/00raid10
@@ -13,6 +13,6 @@ do
esac
mdadm --create --run --level=raid10 --layout $lo --raid-disks 6 -x 1 $md0 $devs
check resync ; check raid10
- testdev $md0 $m $mdsize0 $[64*cm]
+ testdev $md0 $m $mdsize1 $[512*cm]
mdadm -S $md0
done
diff --git a/tests/00raid4 b/tests/00raid4
index ddb16b1..97d3e3f 100644
--- a/tests/00raid4
+++ b/tests/00raid4
@@ -3,13 +3,13 @@
mdadm -CfR $md0 -l 4 -n3 $dev0 $dev1 $dev2
check resync ; check raid[45]
-testdev $md0 2 $mdsize0 64
+testdev $md0 2 $mdsize1 512
mdadm -S $md0
# now with version-1 superblock
mdadm -CR $md0 -e1 --level=raid4 -n4 $dev0 $dev1 $dev2 $dev3
check recovery; check raid[45]
-testdev $md0 3 $mdsize1 64
+testdev $md0 3 $mdsize1 512
mdadm -S $md0
diff --git a/tests/00raid5 b/tests/00raid5
index 71f3654..a288c83 100644
--- a/tests/00raid5
+++ b/tests/00raid5
@@ -1,15 +1,15 @@
# create a simple raid5 set
-mdadm -CfR $md0 -l 5 -n3 $dev0 $dev1 $dev2
+mdadm -CfR $md0 -e 0.90 -l 5 -n3 $dev0 $dev1 $dev2
check resync
-testdev $md0 2 $mdsize0 64
+testdev $md0 2 $mdsize0 512
mdadm -S $md0
# now with version-1 superblock
mdadm -CR $md0 -e1 --level=raid5 -n4 $dev0 $dev1 $dev2 $dev3
check recovery
-testdev $md0 3 $mdsize1 64
+testdev $md0 3 $mdsize1 512
mdadm -S $md0
# now same again with explicit layout
@@ -19,13 +19,13 @@ do
mdadm -CfR $md0 -l 5 -p $lo -n3 $dev0 $dev1 $dev2
check resync ; check raid5
- testdev $md0 2 $mdsize0 64
+ testdev $md0 2 $mdsize1 512
mdadm -S $md0
# now with version-1 superblock
mdadm -CR $md0 -e1 --level=raid5 --layout $lo -n4 $dev0 $dev1 $dev2 $dev3
check recovery ; check raid5
- testdev $md0 3 $mdsize1 64
+ testdev $md0 3 $mdsize1 512
mdadm -S $md0
done
diff --git a/tests/00raid6 b/tests/00raid6
index 81834aa..63d60f5 100644
--- a/tests/00raid6
+++ b/tests/00raid6
@@ -1,15 +1,15 @@
# create a simple raid6 set
-mdadm -CfR $md0 -l 6 -n4 $dev0 $dev1 $dev2 $dev3
+mdadm -CfR $md0 -e0.90 -l 6 -n4 $dev0 $dev1 $dev2 $dev3
check resync ; check raid6
-testdev $md0 2 $mdsize0 64
+testdev $md0 2 $mdsize0 512
mdadm -S $md0
# now with version-1 superblock
mdadm -CR $md0 -e1 --level=raid6 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
check resync ; check raid6
-testdev $md0 3 $mdsize1 64
+testdev $md0 3 $mdsize1 512
mdadm -S $md0
diff --git a/tests/02lineargrow b/tests/02lineargrow
index 78477a3..b093355 100644
--- a/tests/02lineargrow
+++ b/tests/02lineargrow
@@ -6,17 +6,17 @@ do
case $e in
0.90 ) sz=$mdsize0 ;;
1 ) sz=$mdsize1 ;;
- 1.1 ) sz=$mdsize11 ;;
- 1.2 ) sz=$mdsize12 ;;
+ 1.1 ) sz=$mdsize1_l ;;
+ 1.2 ) sz=$mdsize11 ;;
esac
mdadm -CRf $md0 --level linear -e $e --raid-disks=1 $dev1
- testdev $md0 1 $sz 64
+ testdev $md0 1 $sz 1
mdadm --grow $md0 --add $dev2
- testdev $md0 2 $sz 64
+ testdev $md0 2 $sz 1
mdadm --grow $md0 --add $dev3
- testdev $md0 3 $sz 64
+ testdev $md0 3 $sz 1
mdadm -S $md0
done
diff --git a/tests/02r1grow b/tests/02r1grow
index 916a31f..43f037b 100644
--- a/tests/02r1grow
+++ b/tests/02r1grow
@@ -2,7 +2,7 @@
# create a small raid1 array, make it larger. Then make it smaller
-mdadm -CR $md0 --level raid1 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+mdadm -CR $md0 -e 0.90 --level raid1 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
check wait
check state UUU
testdev $md0 1 $[size/2] 1
diff --git a/tests/02r5grow b/tests/02r5grow
index 2247570..e526962 100644
--- a/tests/02r5grow
+++ b/tests/02r5grow
@@ -2,7 +2,7 @@
# create a small raid5 array, make it larger. Then make it smaller
-mdadm -CR $md0 --level raid5 --chunk=32 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+mdadm -CR $md0 -e0.90 --level raid5 --chunk=32 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
check wait
check state UUU
testdev $md0 2 $[size/2] 32
@@ -10,11 +10,11 @@ testdev $md0 2 $[size/2] 32
mdadm --grow $md0 --size max
check resync
check wait
-testdev $md0 2 $mdsize0 64
+testdev $md0 2 $mdsize0 32
mdadm --grow $md0 --size $[size/2]
check nosync
-testdev $md0 2 $[size/2] 64
+testdev $md0 2 $[size/2] 32
mdadm -S $md0
diff --git a/tests/02r6grow b/tests/02r6grow
index cdcc46a..3261760 100644
--- a/tests/02r6grow
+++ b/tests/02r6grow
@@ -2,7 +2,7 @@
# create a small raid6 array, make it larger. Then make it smaller
-mdadm -CR $md0 --level raid6 --chunk=32 --raid-disks 4 --size $[size/2] $dev1 $dev2 $dev3 $dev4
+mdadm -CR $md0 -e 0.90 --level raid6 --chunk=32 --raid-disks 4 --size $[size/2] $dev1 $dev2 $dev3 $dev4
check wait
check state UUUU
testdev $md0 2 $[size/2] 32
@@ -10,11 +10,11 @@ testdev $md0 2 $[size/2] 32
mdadm --grow $md0 --size max
check resync
check wait
-testdev $md0 2 $mdsize0 64
+testdev $md0 2 $mdsize0 32
mdadm --grow $md0 --size $[size/2]
check nosync
-testdev $md0 2 $[size/2] 64
+testdev $md0 2 $[size/2] 32
mdadm -S $md0
diff --git a/tests/03r0assem b/tests/03r0assem
index 55205a3..db6486e 100644
--- a/tests/03r0assem
+++ b/tests/03r0assem
@@ -6,7 +6,7 @@
mdadm -CR $md2 -l0 -n3 $dev0 $dev1 $dev2
check raid0
-tst="testdev $md2 3 $mdsize0 64"
+tst="testdev $md2 3 $mdsize1 512"
$tst
uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
mdadm -S $md2
@@ -19,7 +19,7 @@ mdadm -A $md2 -u $uuid $devlist
$tst
mdadm -S $md2
-mdadm --assemble $md2 --super-minor=2 $devlist
+mdadm --assemble $md2 --name=2 $devlist
$tst
mdadm -S $md2
@@ -35,7 +35,7 @@ mdadm -S $md2
{
echo DEVICE $devlist
- echo array $md2 super-minor=2
+ echo array $md2 name=2
} > $conf
mdadm -As -c $conf $md2
@@ -65,12 +65,12 @@ $tst
mdadm -S $md2
-### Now for version 1...
+### Now for version 0...
mdadm --zero-superblock $dev0 $dev1 $dev2
-mdadm -CR $md2 -l0 --metadata=1.0 -n3 $dev0 $dev1 $dev2
+mdadm -CR $md2 -l0 --metadata=0.90 -n3 $dev0 $dev1 $dev2
check raid0
-tst="testdev $md2 3 $mdsize1 64"
+tst="testdev $md2 3 $mdsize0 512"
$tst
uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
@@ -84,10 +84,9 @@ mdadm -A $md2 -u $uuid $devlist
$tst
mdadm -S $md2
-# version 1 has no super-minor
-# mdadm --assemble $md2 --super-minor=2 $devlist #
-# $tst
-# mdadm -S $md2
+mdadm --assemble $md2 --super-minor=2 $devlist #
+$tst
+mdadm -S $md2
conf=$targetdir/mdadm.conf
{
@@ -99,14 +98,14 @@ mdadm -As -c $conf $md2
$tst
mdadm -S $md2
-#{
-# echo DEVICE $devlist
-# echo array $md2 super-minor=2
-#} > $conf
-#
-#mdadm -As -c $conf $md2
-#$tst
-#mdadm -S $md2
+{
+ echo DEVICE $devlist
+ echo array $md2 super-minor=2
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+mdadm -S $md2
{
diff --git a/tests/03r5assem b/tests/03r5assem
index be26cf1..de0d56b 100644
--- a/tests/03r5assem
+++ b/tests/03r5assem
@@ -2,8 +2,8 @@
# create a raid5 array and assemble it in various ways,
# including with missing devices.
-mdadm -CR $md1 -l5 -n3 $dev0 $dev1 $dev2
-tst="check raid5 ;testdev $md1 2 $mdsize0 64 ; mdadm -S $md1"
+mdadm -CR -e 0.90 $md1 -l5 -n3 $dev0 $dev1 $dev2
+tst="check raid5 ;testdev $md1 2 $mdsize0 512 ; mdadm -S $md1"
uuid=`mdadm -Db $md1 | sed 's/.*UUID=//'`
check wait
eval $tst
diff --git a/tests/03r5assemV1 b/tests/03r5assemV1
index 0f2c83b..5238ede 100644
--- a/tests/03r5assemV1
+++ b/tests/03r5assemV1
@@ -2,7 +2,7 @@
# create a v-1 raid5 array and assemble in various ways
mdadm -CR -e1 --name one $md1 -l5 -n3 -x2 $dev0 $dev1 $dev2 $dev3 $dev4
-tst="check raid5 ;testdev $md1 2 $mdsize1 64 ; mdadm -S $md1"
+tst="check raid5 ;testdev $md1 2 $mdsize1 512 ; mdadm -S $md1"
uuid=`mdadm -Db $md1 | sed 's/.*UUID=//'`
check wait
diff --git a/tests/04r0update b/tests/04r0update
index 23c7d65..0cd815a 100644
--- a/tests/04r0update
+++ b/tests/04r0update
@@ -1,7 +1,7 @@
# create a raid0, re-assemble with a different super-minor
-mdadm -CR $md0 -l0 -n3 $dev0 $dev1 $dev2
-testdev $md0 3 $mdsize0 64
+mdadm -CR -e 0.90 $md0 -l0 -n3 $dev0 $dev1 $dev2
+testdev $md0 3 $mdsize0 512
minor1=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
mdadm -S /dev/md0
diff --git a/tests/05r1-bitmapfile b/tests/05r1-bitmapfile
index 59564cb..380b229 100644
--- a/tests/05r1-bitmapfile
+++ b/tests/05r1-bitmapfile
@@ -6,11 +6,11 @@ bmf=$targetdir/bitmap
rm -f $bmf
mdadm --create --run $md0 --level=1 -n2 --delay=1 --bitmap $bmf $dev1 $dev2
check wait
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
mdadm -S $md0
mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
@@ -20,7 +20,7 @@ then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
exit 1
fi
mdadm $md0 -f $dev1
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
sleep 4
dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
if [ $dirty3 -lt 400 ]
diff --git a/tests/05r1-grow-external b/tests/05r1-grow-external
index aa49109..c110093 100644
--- a/tests/05r1-grow-external
+++ b/tests/05r1-grow-external
@@ -4,7 +4,7 @@
#
mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2
check wait
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
bmf=$targetdir/bm
rm -f $bmf
@@ -14,7 +14,7 @@ dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
diff --git a/tests/05r1-grow-internal b/tests/05r1-grow-internal
index 1da28a9..f5c0e5b 100644
--- a/tests/05r1-grow-internal
+++ b/tests/05r1-grow-internal
@@ -4,15 +4,15 @@
#
mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2
check wait
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
#mdadm -E $dev1
-mdadm --grow $md0 --bitmap=internal --delay=1 || { mdadm -X $dev2 ; exit 1; }
+mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1 || { mdadm -X $dev2 ; exit 1; }
dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
diff --git a/tests/05r1-grow-internal-1 b/tests/05r1-grow-internal-1
index 18c5c8b..7539fad 100644
--- a/tests/05r1-grow-internal-1
+++ b/tests/05r1-grow-internal-1
@@ -7,7 +7,7 @@ check wait
testdev $md0 1 $mdsize1b 1
#mdadm -E $dev1
-mdadm --grow $md0 --bitmap=internal --delay=1
+mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1
dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
diff --git a/tests/05r1-internalbitmap b/tests/05r1-internalbitmap
index 30114b0..68d65e4 100644
--- a/tests/05r1-internalbitmap
+++ b/tests/05r1-internalbitmap
@@ -2,7 +2,7 @@
#
# create a raid1 with an internal bitmap
#
-mdadm --create --run $md0 --level=1 -n2 --delay=1 --bitmap internal $dev1 $dev2
+mdadm --create -e0.90 --run $md0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
check wait
testdev $md0 1 $mdsize0 1
mdadm -S $md0
diff --git a/tests/05r1-internalbitmap-v1a b/tests/05r1-internalbitmap-v1a
index 2ef5eba..f9b08e8 100644
--- a/tests/05r1-internalbitmap-v1a
+++ b/tests/05r1-internalbitmap-v1a
@@ -2,7 +2,7 @@
#
# create a raid1 with an internal bitmap
#
-mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --delay=1 --bitmap internal $dev1 $dev2
+mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
check wait
check bitmap
testdev $md0 1 $mdsize1b 1
diff --git a/tests/05r1-internalbitmap-v1b b/tests/05r1-internalbitmap-v1b
index 509fb28..268de57 100644
--- a/tests/05r1-internalbitmap-v1b
+++ b/tests/05r1-internalbitmap-v1b
@@ -2,7 +2,7 @@
#
# create a raid1 with an internal bitmap
#
-mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --delay=1 --bitmap internal $dev1 $dev2
+mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
check wait
check bitmap
testdev $md0 1 $mdsize11 1
diff --git a/tests/05r1-internalbitmap-v1c b/tests/05r1-internalbitmap-v1c
index 5fea930..9eb9a45 100644
--- a/tests/05r1-internalbitmap-v1c
+++ b/tests/05r1-internalbitmap-v1c
@@ -2,7 +2,7 @@
#
# create a raid1 with an internal bitmap
#
-mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --delay=1 --bitmap internal $dev1 $dev2
+mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk 4 $dev1 $dev2
check wait
check bitmap
testdev $md0 1 $mdsize12 1
diff --git a/tests/05r1-n3-bitmapfile b/tests/05r1-n3-bitmapfile
index 59664fb..4b4f0f1 100644
--- a/tests/05r1-n3-bitmapfile
+++ b/tests/05r1-n3-bitmapfile
@@ -6,7 +6,7 @@
#
bmf=$targetdir/bitmap
rm -f $bmf
-mdadm --create --run $md0 --level=1 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3
+mdadm --create -e0.90 --run $md0 --level=1 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3
check wait
testdev $md0 1 $mdsize0 1
mdadm -S $md0
diff --git a/tests/05r1-re-add b/tests/05r1-re-add
index b2dd82e..cfa1e6b 100644
--- a/tests/05r1-re-add
+++ b/tests/05r1-re-add
@@ -5,10 +5,10 @@
# Then do some IO first. Resync should still be very fast
#
-mdadm -CR $md0 -l1 -n2 -binternal -d1 $dev1 $dev2
+mdadm -CR $md0 -l1 -n2 -binternal --bitmap-chunk=4 -d1 $dev1 $dev2
check resync
check wait
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
sleep 4
mdadm $md0 -f $dev2
@@ -21,10 +21,10 @@ check nosync
mdadm $md0 -f $dev2
sleep 1
mdadm $md0 -r $dev2
-testdev $md0 1 $mdsize0 1
+testdev $md0 1 $mdsize1 1
mdadm $md0 -a $dev2
check wait
-cmp --bytes=$[$mdsize0*1024] $dev1 $dev2
+cmp --ignore-initial=$[16*512] --bytes=$[$mdsize0*1024] $dev1 $dev2
mdadm $md0 -f $dev2; sleep 1
mdadm $md0 -r $dev2
@@ -32,5 +32,5 @@ if dd if=/dev/zero of=$md0 ; then : ; fi
mdadm $md0 -a $dev2
check recovery
check wait
-cmp --bytes=$[$mdsize0*1024] $dev1 $dev2
+cmp --ignore-initial=$[16*512] --bytes=$[$mdsize0*1024] $dev1 $dev2
mdadm -S $md0
diff --git a/tests/05r5-bitmapfile b/tests/05r5-bitmapfile
index 25c1228..c753758 100644
--- a/tests/05r5-bitmapfile
+++ b/tests/05r5-bitmapfile
@@ -6,11 +6,11 @@ bmf=$targetdir/bitmap
rm -f $bmf
mdadm --create --run $md0 --level=5 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3
check wait
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
mdadm -S $md0
mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
@@ -20,7 +20,7 @@ then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
exit 1
fi
mdadm $md0 -f $dev1
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
sleep 4
dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
if [ $dirty3 -lt 400 ]
diff --git a/tests/05r5-internalbitmap b/tests/05r5-internalbitmap
index d79db90..591c9db 100644
--- a/tests/05r5-internalbitmap
+++ b/tests/05r5-internalbitmap
@@ -2,13 +2,13 @@
#
# create a raid1 with an internal bitmap
#
-mdadm --create --run $md0 --level=5 -n3 --delay=1 --bitmap internal $dev1 $dev2 $dev3
+mdadm --create --run $md0 --level=5 -n3 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2 $dev3
check wait
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
mdadm -S $md0
mdadm --assemble $md0 $dev1 $dev2 $dev3
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
@@ -18,7 +18,7 @@ then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
exit 1
fi
mdadm $md0 -f $dev1
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
sleep 4
dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
if [ $dirty3 -lt 400 ]
diff --git a/tests/05r6-bitmapfile b/tests/05r6-bitmapfile
index 865cedb..28c590e 100644
--- a/tests/05r6-bitmapfile
+++ b/tests/05r6-bitmapfile
@@ -6,11 +6,11 @@ bmf=$targetdir/bitmap
rm -f $bmf
mdadm --create --run $md0 --level=6 -n4 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3 $dev4
check wait
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
mdadm -S $md0
mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3 $dev4
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
sleep 4
dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
@@ -20,7 +20,7 @@ then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
exit 1
fi
mdadm $md0 -f $dev3
-testdev $md0 2 $mdsize0 1
+testdev $md0 2 $mdsize1 512
sleep 4
dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
if [ $dirty3 -lt 400 ]
diff --git a/tests/06r5swap b/tests/06r5swap
index 6a722e8..5373a60 100644
--- a/tests/06r5swap
+++ b/tests/06r5swap
@@ -1,7 +1,7 @@
# make a raid5 array, byte swap the superblocks, then assemble...
-mdadm -CR $md0 -l5 -n4 $dev0 $dev1 $dev2 $dev3
+mdadm -CR $md0 -e 0.90 -l5 -n4 $dev0 $dev1 $dev2 $dev3
sleep 4
mdadm -S $md0
diff --git a/tests/06update-uuid b/tests/06update-uuid
index c23afc9..a4409e7 100644
--- a/tests/06update-uuid
+++ b/tests/06update-uuid
@@ -57,7 +57,7 @@ fi
mdadm -S /dev/md0
# Internal bitmaps too.
-mdadm -CR --assume-clean -b internal $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -CR --assume-clean -b internal --bitmap-chunk 4 $md0 -l5 -n3 $dev0 $dev1 $dev2
mdadm -S /dev/md0
mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
no_errors
@@ -69,7 +69,7 @@ mdadm -X $dev0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
}
mdadm -S /dev/md0
-mdadm -CR --assume-clean -e1.2 -b internal $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -CR --assume-clean -e1.2 -b internal --bitmap-chunk=4 $md0 -l5 -n3 $dev0 $dev1 $dev2
mdadm -S /dev/md0
mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
no_errors
diff --git a/tests/06wrmostly b/tests/06wrmostly
index 34be60d..2a3fae6 100644
--- a/tests/06wrmostly
+++ b/tests/06wrmostly
@@ -2,13 +2,13 @@
# create a raid1 array with a wrmostly device
mdadm -CR $md0 -l1 -n3 $dev0 $dev1 --write-mostly $dev2
-testdev $md0 1 $mdsize0 64
+testdev $md0 1 $mdsize1 1
# unfortunately, we cannot measure if any read requests are going to $dev2
mdadm -S $md0
-mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal $dev0 $dev1 --write-mostly $dev2
-testdev $md0 1 $mdsize0 64
+mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal --bitmap-chunk=4 $dev0 $dev1 --write-mostly $dev2
+testdev $md0 1 $mdsize1 1
mdadm -S $md0
diff --git a/tests/07autoassemble b/tests/07autoassemble
index bdbc5d3..49b9985 100644
--- a/tests/07autoassemble
+++ b/tests/07autoassemble
@@ -8,9 +8,9 @@ mdadm -CR $md0 -l0 -n2 $md1 $md2 --homehost=testing
mdadm -Ss
mdadm -As -c /dev/null --homehost=testing -vvv
-testdev $md1 1 $mdsize0 64
-testdev $md2 1 $mdsize0 64
-testdev $md0 2 $mdsize00 64
+testdev $md1 1 $mdsize1 1
+testdev $md2 1 $mdsize1 1
+testdev $md0 2 $mdsize11 512
mdadm -Ss
mdadm --zero-superblock $dev0 $dev1 $dev2 $dev3
@@ -19,6 +19,6 @@ mdadm -CR $md1 -l1 -n2 $dev0 $dev1 --homehost=testing
mdadm -CR $md0 -l0 -n2 $md1 $dev2 --homehost=testing
mdadm -Ss
mdadm -As -c /dev/null --homehost=testing -vvv
-testdev $md1 1 $mdsize0 64
-testdev $md0 1 $[mdsize0+mdsize00] 64
+testdev $md1 1 $mdsize1 1
+testdev $md0 1 $[mdsize1+mdsize11] 512
mdadm -Ss
diff --git a/tests/07autodetect b/tests/07autodetect
index 5460c5b..156cced 100644
--- a/tests/07autodetect
+++ b/tests/07autodetect
@@ -12,15 +12,15 @@ then
fi
-mdadm -CR $mdp0 -l0 -f -n1 $dev0
-mdadm -CR $mdp1 -l0 -f -n1 $dev1
+mdadm -CR -e 0 $mdp0 -l0 -f -n1 $dev0
+mdadm -CR -e 0 $mdp1 -l0 -f -n1 $dev1
sfdisk $mdp0 >&2 << END
,,FD
END
sfdisk $mdp1 >&2 << END
,,FD
END
-mdadm -CR $md0 -l1 -n2 ${mdp0}p1 ${mdp1}p1
+mdadm -CR -e 0 $md0 -l1 -n2 ${mdp0}p1 ${mdp1}p1
check resync
check raid1
check wait
diff --git a/tests/07changelevelintr b/tests/07changelevelintr
new file mode 100644
index 0000000..d3faf2e
--- /dev/null
+++ b/tests/07changelevelintr
@@ -0,0 +1,60 @@
+
+#
+# test that we can stop and restart a level change.
+# just test a few in-place changes, and a few
+# size-reducing changes.
+
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `cat /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+restart() {
+ sleep 0.5
+ check reshape
+ mdadm -S $md0
+ mdadm -A $md0 $devs --backup-file=$bu
+ sleep 0.5
+ check reshape
+}
+
+bu=/tmp/md-backup
+rm -f $bu
+devs="$dev0 $dev1 $dev2 $dev3 $dev4"
+mdadm -CR $md0 -l5 -n5 -c 256 $devs
+checkgeo md0 raid5 5 $[256*1024] 2
+
+mdadm -G $md0 -c 128 --backup-file=$bu
+restart
+checkgeo md0 raid5 5 $[128*1024] 2
+
+mdadm -G $md0 --layout rs --backup-file=$bu
+restart
+checkgeo md0 raid5 5 $[128*1024] 3
+
+mdadm -G $md0 --array-size 59136
+mdadm -G $md0 --raid-disks 4 -c 64 --backup-file=$bu
+restart
+checkgeo md0 raid5 4 $[64*1024] 3
+
+devs="$dev0 $dev1 $dev2 $dev3"
+mdadm -G $md0 --array-size 19712
+mdadm -G $md0 -n 2 -c 256 --backup-file=$bu
+restart
+checkgeo md0 raid5 2 $[256*1024] 3
diff --git a/tests/07changelevels b/tests/07changelevels
new file mode 100644
index 0000000..57790a1
--- /dev/null
+++ b/tests/07changelevels
@@ -0,0 +1,107 @@
+
+# Test changing of level, chunksize etc.
+# Create a RAID1, convert to RAID5, add a disk, add another disk
+# convert to RAID6, back to RAID5 and ultimately to RAID1
+
+testK=$[64*3*6]
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$testK
+export MDADM_GROW_VERIFY=1
+
+dotest() {
+ sleep 0.5
+ check wait
+ testdev $md0 $1 $mdsize1 64 nd
+ blockdev --flushbufs $md0
+ cmp -s -n $[textK*1024] $md0 /tmp/RandFile || { echo cmp failed; exit 2; }
+ # write something new - shift chars 4 space
+ tr ' -~' '$-~ -#' < /tmp/RandFile > /tmp/RandFile2
+ mv /tmp/RandFile2 /tmp/RandFile
+ dd if=/tmp/RandFile of=$md0
+}
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `cat /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+
+bu=/tmp/md-test-backup
+rm -f $bu
+mdadm -CR $md0 -l1 -n2 -x1 $dev0 $dev1 $dev2 -z 19968
+testdev $md0 1 $mdsize1 64
+dd if=/tmp/RandFile of=$md0
+dotest 1
+
+mdadm --grow $md0 -l5 -n3 --chunk 64
+dotest 2
+
+mdadm $md0 --add $dev3 $dev4
+mdadm --grow $md0 -n4 --chunk 32
+dotest 3
+
+mdadm -G $md0 -l6 --backup-file $bu
+dotest 3
+
+mdadm -G /dev/md0 --array-size 39936
+mdadm -G $md0 -n4 --backup-file $bu
+dotest 2
+
+mdadm -G $md0 -l5 --backup-file $bu
+dotest 2
+
+mdadm -G /dev/md0 --array-size 19968
+mdadm -G $md0 -n2 --backup-file $bu
+dotest 1
+
+mdadm -G --level=1 $md0
+dotest 1
+
+# now repeat that last few steps only with a degraded array.
+mdadm -S $md0
+mdadm -CR $md0 -l6 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+dd if=/tmp/RandFile of=$md0
+dotest 3
+
+mdadm $md0 --fail $dev0
+
+mdadm -G /dev/md0 --array-size 39936
+mdadm -G $md0 -n4 --backup-file $bu
+dotest 2
+mdadm $md0 --fail $dev4
+
+mdadm $md0 --fail $dev3
+# now double-degraded.
+# switch layout to a DDF layout and back to make sure that works.
+
+mdadm -G /dev/md0 --layout=ddf-N-continue --backup-file $bu
+checkgeo md0 raid6 4 $[512*1024] 10
+dotest 2
+mdadm -G /dev/md0 --layout=ra --backup-file $bu
+checkgeo md0 raid6 4 $[512*1024] 1
+dotest 2
+
+mdadm -G $md0 -l5 --backup-file $bu
+dotest 2
+
+mdadm -G /dev/md0 --array-size 19968
+mdadm -G $md0 -n2 --backup-file $bu
+dotest 1
+mdadm $md0 --fail $dev2
+
+mdadm -G --level=1 $md0
+dotest 1
diff --git a/tests/07layouts b/tests/07layouts
new file mode 100644
index 0000000..e72bb35
--- /dev/null
+++ b/tests/07layouts
@@ -0,0 +1,91 @@
+
+# check that kernel an restripe interpret all the different layouts
+# the same
+# This involves changing the layout to each different possibility
+# while MDADM_GROW_VERIFY is set.
+
+testK=$[64*3*6]
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$testK
+export MDADM_GROW_VERITY=1
+
+
+dotest() {
+ sleep 0.5
+ check wait
+ testdev $md0 $1 $mdsize1 512 nd
+ blockdev --flushbufs $md0
+ cmp -s -n $[textK*1024] $md0 /tmp/RandFile || { echo cmp failed; exit 2; }
+ # write something new - shift chars 4 space
+ tr ' -~' '$-~ -#' < /tmp/RandFile > /tmp/RandFile2
+ mv /tmp/RandFile2 /tmp/RandFile
+ dd if=/tmp/RandFile of=$md0
+}
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `cat /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+
+bu=/tmp/md-test-backup
+rm -f $bu
+
+# first a degraded 5 device raid5
+mdadm -CR $md0 -l5 -n5 $dev0 $dev1 missing $dev2 $dev3
+dd if=/tmp/RandFile of=$md0
+dotest 4
+
+l5[0]=la
+l5[1]=ra
+l5[2]=ls
+l5[3]=rs
+l5[4]=parity-first
+l5[5]=parity-last
+for layout in 0 1 2 3 4 5 0
+do
+ mdadm -G $md0 --layout=${l5[$layout]} --backup-file $bu
+ checkgeo md0 raid5 5 $[512*1024] $layout
+ dotest 4
+done
+
+mdadm -S $md0
+# now a doubly degraded raid6
+mdadm -CR $md0 -l6 -n5 $dev0 missing $dev2 missing $dev4
+dd if=/tmp/RandFile of=$md0
+dotest 3
+
+l6[0]=la
+l6[1]=ra
+l6[2]=ls
+l6[3]=rs
+l6[4]=parity-first
+l6[5]=parity-last
+l6[8]=ddf-zero-restart
+l6[9]=ddf-N-restart
+l6[10]=ddf-N-continue
+l6[16]=left-asymmetric-6
+l6[17]=right-asymmetric-6
+l6[18]=left-symmetric-6
+l6[19]=right-symmetric-6
+l6[20]=parity-first-6
+for layout in 0 1 2 3 4 5 8 9 10 16 17 18 19 20 0
+do
+ mdadm -G $md0 --layout=${l6[$layout]} --backup-file $bu
+ checkgeo md0 raid6 5 $[512*1024] $layout
+ dotest 3
+done
diff --git a/tests/07reshape5intr b/tests/07reshape5intr
index 96c8e02..3a5f115 100644
--- a/tests/07reshape5intr
+++ b/tests/07reshape5intr
@@ -1,6 +1,6 @@
#
-# test interrupting and restartign raid5 reshape.
+# test interrupting and restarting raid5 reshape.
set -x
devs="$dev1"
st=UU
@@ -12,7 +12,7 @@ do
do dd if=/dev/urandom of=$d bs=1024 || true
done
- mdadm -CR $md0 -amd -l5 -n$disks --assume-clean $devs
+ mdadm -CR $md0 -amd -l5 -c 256 -n$disks --assume-clean $devs
mdadm $md0 --add $dev6
echo 20 > /proc/sys/dev/raid/speed_limit_max
mdadm --grow $md0 -n $[disks+1]
diff --git a/tests/07testreshape5 b/tests/07testreshape5
index 4a9c0df..724621a 100644
--- a/tests/07testreshape5
+++ b/tests/07testreshape5
@@ -5,22 +5,27 @@
# shaped md arrays.
set -x
layouts=(la ra ls rs)
+for level in 5 6
+do
for chunk in 4 8 16 32 64 128
do
devs="$dev1"
for disks in 2 3 4 5 6
do
eval devs=\"$devs \$dev$disks\"
+ if [ " $level $disks" = " 6 3" -o " $level $disks" = " 6 2" ]
+ then continue
+ fi
for nlayout in 0 1 2 3
do
layout=${layouts[$nlayout]}
- size=$[chunk*(disks-1)*disks]
+ size=$[chunk*(disks-(level-4))*disks]
# test restore: make a raid5 from a file, then do a compare
dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$size
- $dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs
- mdadm -CR $md0 -amd -l5 -n$disks --assume-clean -c $chunk -p $layout $devs
+ $dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] $level $nlayout 0 $[size*1024] $devs
+ mdadm -CR -e 1.0 $md0 -amd -l$level -n$disks --assume-clean -c $chunk -p $layout $devs
cmp -s -n $[size*1024] $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
# FIXME check parity
@@ -29,12 +34,13 @@ do
dd if=/dev/urandom of=$md0 bs=1024 count=$size
blockdev --flushbufs $md0 $devs; sync
> /tmp/NewRand
- $dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] 5 $nlayout 0 $[size*1024] $devs
+ $dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] $level $nlayout 0 $[size*1024] $devs
cmp -s -n $[size*1024] $md0 /tmp/NewRand || { echo cmp failed ; exit 2; }
mdadm -S $md0
udevadm settle
done
done
done
+done
exit 0
diff --git a/tests/10ddf-create b/tests/10ddf-create
index a32dc0f..12c3820 100644
--- a/tests/10ddf-create
+++ b/tests/10ddf-create
@@ -12,10 +12,10 @@ mdadm -CR /dev/md/ddf0 -e ddf -n 5 $dev8 $dev9 $dev10 $dev11 $dev12
mdadm -CR r0 -l0 -n5 /dev/md/ddf0 -z 5000
mdadm -CR r1 -l1 -n2 /dev/md/ddf0
mdadm -CR r5 -l5 -n3 /dev/md/ddf0
-testdev /dev/md/r0 5 5000 64
-# r0 will use 4992 due to chunk size, so that leave 27776 for the rest
-testdev /dev/md/r1 1 27776 1
-testdev /dev/md/r5 2 27776 64
+testdev /dev/md/r0 5 5000 512
+# r0 will use 4608 due to chunk size, so that leaves 28160 for the rest
+testdev /dev/md/r1 1 28160 1
+testdev /dev/md/r5 2 28160 512
dd if=/dev/sda of=/dev/md/r0 || true
dd if=/dev/sda of=/dev/md/r1 || true
dd if=/dev/sda of=/dev/md/r5 || true
diff --git a/util.c b/util.c
index 98aedd0..a0e4bcf 100644
--- a/util.c
+++ b/util.c
@@ -149,6 +149,73 @@ int get_linux_version()
return (a*1000000)+(b*1000)+c;
}
+#ifndef MDASSEMBLE
+long long parse_size(char *size)
+{
+ /* parse 'size' which should be a number optionally
+ * followed by 'K', 'M', or 'G'.
+ * Without a suffix, K is assumed.
+ * Number returned is in sectors (half-K)
+ */
+ char *c;
+ long long s = strtoll(size, &c, 10);
+ if (s > 0) {
+ switch (*c) {
+ case 'K':
+ c++;
+ default:
+ s *= 2;
+ break;
+ case 'M':
+ c++;
+ s *= 1024 * 2;
+ break;
+ case 'G':
+ c++;
+ s *= 1024 * 1024 * 2;
+ break;
+ }
+ }
+ if (*c)
+ s = 0;
+ return s;
+}
+
+int parse_layout_10(char *layout)
+{
+ int copies, rv;
+ char *cp;
+ /* Parse the layout string for raid10 */
+ /* 'f', 'o' or 'n' followed by a number <= raid_disks */
+ if ((layout[0] != 'n' && layout[0] != 'f' && layout[0] != 'o') ||
+ (copies = strtoul(layout+1, &cp, 10)) < 1 ||
+ copies > 200 ||
+ *cp)
+ return -1;
+ if (layout[0] == 'n')
+ rv = 256 + copies;
+ else if (layout[0] == 'o')
+ rv = 0x10000 + (copies<<8) + 1;
+ else
+ rv = 1 + (copies<<8);
+ return rv;
+}
+
+int parse_layout_faulty(char *layout)
+{
+ /* Parse the layout string for 'faulty' */
+ int ln = strcspn(layout, "0123456789");
+ char *m = strdup(layout);
+ int mode;
+ m[ln] = 0;
+ mode = map_name(faultylayout, m);
+ if (mode == UnSet)
+ return -1;
+
+ return mode | (atoi(layout+ln)<< ModeShift);
+}
+#endif
+
void remove_partitions(int fd)
{
/* remove partitions from this block devices.
@@ -194,9 +261,9 @@ int enough(int level, int raid_disks, int layout, int clean,
} while (first != 0);
return 1;
- case -4:
+ case LEVEL_MULTIPATH:
return avail_disks>= 1;
- case -1:
+ case LEVEL_LINEAR:
case 0:
return avail_disks == raid_disks;
case 1: