summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorMariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>2022-03-22 16:23:38 +0100
committerSong Liu <song@kernel.org>2022-04-25 14:00:34 -0700
commit9631abdbf406c764f2a5d8305eac063bc3396a0a (patch)
tree562765b208e282d83742f8c90b84d6bc5fcf934e /drivers/md/md.c
parent5ea7c1339e3ed094dd4df48d598f9018a2587283 (diff)
downloadlinux-9631abdbf406c764f2a5d8305eac063bc3396a0a.tar.gz
md: Set MD_BROKEN for RAID1 and RAID10
There is no direct mechanism to determine raid failure outside personality. It is done by checking rdev->flags after executing md_error(). If "faulty" flag is not set then -EBUSY is returned to userspace. -EBUSY means that array will be failed after drive removal. Mdadm has special routine to handle the array failure and it is executed if -EBUSY is returned by md. There are at least two known reasons to not consider this mechanism as correct: 1. drive can be removed even if array will be failed[1]. 2. -EBUSY seems to be wrong status. Array is not busy, but removal process cannot proceed safe. -EBUSY expectation cannot be removed without breaking compatibility with userspace. In this patch first issue is resolved by adding support for MD_BROKEN flag for RAID1 and RAID10. Support for RAID456 is added in next commit. The idea is to set the MD_BROKEN if we are sure that raid is in failed state now. This is done in each error_handler(). In md_error() MD_BROKEN flag is checked. If is set, then -EBUSY is returned to userspace. As in previous commit, it causes that #mdadm --set-faulty is able to fail array. Previously proposed workaround is valid if optional functionality[1] is disabled. [1] commit 9a567843f7ce("md: allow last device to be forcibly removed from RAID1/RAID10.") Reviewd-by: Xiao Ni <xni@redhat.com> Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com> Signed-off-by: Song Liu <song@kernel.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c27
1 files changed, 15 insertions, 12 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2587f872c088..9294f13e0c9d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2984,10 +2984,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- if (test_bit(Faulty, &rdev->flags))
- err = 0;
- else
+
+ if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
+ else
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -4353,10 +4354,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec).
*
* broken
- * RAID0/LINEAR-only: same as clean, but array is missing a member.
- * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
- * when a member is gone, so this state will at least alert the
- * user that something is wrong.
+* Array is failed. It's useful because mounted-arrays aren't stopped
+* when array is failed, so this state will at least alert the user that
+* something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word};
@@ -7443,7 +7443,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags))
+ if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
}
rcu_read_unlock();
@@ -7984,13 +7984,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers || !mddev->pers->error_handler)
return;
- mddev->pers->error_handler(mddev,rdev);
- if (mddev->degraded)
+ mddev->pers->error_handler(mddev, rdev);
+
+ if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ if (!test_bit(MD_BROKEN, &mddev->flags)) {
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();