summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2015-05-25 16:33:45 +1000
committerNeilBrown <neilb@suse.de>2015-05-25 16:33:45 +1000
commite0cc1c8d8b29e28f0977fc16db54a38a44274765 (patch)
treec7f57c75f21d61ed406dd5a5b4346a237cc58cab
parent4a984120ea3d4ebd1b84bdf020c5ba3c14320213 (diff)
downloadmdadm-e0cc1c8d8b29e28f0977fc16db54a38a44274765.tar.gz
Grow: another attempt to fix stop-during-reshape race.
When the array is stopped during a critical section, we sometimes erase the backup, which is bad. This happens when 'completed' is zero. This can happen easily when 'stop' freezes reshape. So try to be more careful and check 'reshape_position'. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--Grow.c34
1 files changed, 18 insertions, 16 deletions
diff --git a/Grow.c b/Grow.c
index f2cf46a..a20ff3e 100644
--- a/Grow.c
+++ b/Grow.c
@@ -3858,27 +3858,30 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
}
/* Some kernels reset 'sync_completed' to zero,
* we need to have real point we are in md.
- * But only if array is actually still reshaping,
- * not stopped.
+ * So in that case, read 'reshape_position' from sysfs.
*/
if (completed == 0) {
+ unsigned long long reshapep;
char action[20];
if (sysfs_get_str(info, NULL, "sync_action",
action, 20) > 0 &&
- strncmp(action, "idle", 4) == 0)
- completed = max_progress;
- }
-
- /* some kernels can give an incorrectly high 'completed' number */
- completed /= (info->new_chunk/512);
- completed *= (info->new_chunk/512);
- /* Convert 'completed' back in to a 'progress' number */
- completed *= reshape->after.data_disks;
- if (!advancing) {
- completed = info->component_size * reshape->after.data_disks
- - completed;
+ strncmp(action, "idle", 4) == 0 &&
+ sysfs_get_ll(info, NULL,
+ "reshape_position", &reshapep) == 0)
+ *reshape_completed = reshapep;
+ } else {
+ /* some kernels can give an incorrectly high
+ * 'completed' number, so round down */
+ completed /= (info->new_chunk/512);
+ completed *= (info->new_chunk/512);
+ /* Convert 'completed' back in to a 'progress' number */
+ completed *= reshape->after.data_disks;
+ if (!advancing)
+ completed = (info->component_size
+ * reshape->after.data_disks
+ - completed);
+ *reshape_completed = completed;
}
- *reshape_completed = completed;
close(fd);
@@ -3898,7 +3901,6 @@ check_progress:
* it was just a device failure that leaves us degraded but
* functioning.
*/
- strcpy(buf, "hi");
if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
|| strncmp(buf, "none", 4) != 0) {
/* The abort might only be temporary. Wait up to 10