md: don't fail action_store() if sync_thread is not registered
MD_RECOVERY_RUNNING will always be set when trying to register a new sync_thread, however, if md_start_sync() turns out to do nothing, MD_RECOVERY_RUNNING will be cleared in this case. And during the race window, action_store() will return -EBUSY, which will cause some mdadm tests to fail. For example: The test 07reshape5intr will add a new disk to array, then start reshape: mdadm /dev/md0 --add /dev/xxx mdadm --grow /dev/md0 -n 3 And add_bound_rdev() from mdadm --add will set MD_RECOVERY_NEEDED, then during the race windown, mdadm --grow will fail. Fix the problem by waiting in action_store() during the race window, fail only if sync_thread is registered. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240611132251.1967786-8-yukuai1@huaweicloud.com
This commit is contained in:
+33
-52
@@ -753,7 +753,6 @@ int mddev_init(struct mddev *mddev)
|
||||
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
mutex_init(&mddev->suspend_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
@@ -5021,34 +5020,6 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
|
||||
|
||||
static void idle_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (mddev_lock(mddev)) {
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false);
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static void frozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (mddev_lock(mddev)) {
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false);
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static int mddev_start_reshape(struct mddev *mddev)
|
||||
{
|
||||
int ret;
|
||||
@@ -5056,24 +5027,13 @@ static int mddev_start_reshape(struct mddev *mddev)
|
||||
if (mddev->pers->start_reshape == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
mddev_unlock(mddev);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (mddev->reshape_position == MaxSector ||
|
||||
mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev)) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
ret = mddev->pers->start_reshape(mddev);
|
||||
if (ret) {
|
||||
mddev_unlock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* If reshape is still in progress, and md_check_recovery() can
|
||||
@@ -5083,7 +5043,6 @@ static int mddev_start_reshape(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
|
||||
mddev_unlock(mddev);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
|
||||
return 0;
|
||||
}
|
||||
@@ -5097,36 +5056,53 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
if (!mddev->pers || !mddev->pers->sync_request)
|
||||
return -EINVAL;
|
||||
|
||||
retry:
|
||||
if (work_busy(&mddev->sync_work))
|
||||
flush_work(&mddev->sync_work);
|
||||
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (work_busy(&mddev->sync_work)) {
|
||||
mddev_unlock(mddev);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
action = md_sync_action_by_name(page);
|
||||
|
||||
/* TODO: mdadm rely on "idle" to start sync_thread. */
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
switch (action) {
|
||||
case ACTION_FROZEN:
|
||||
frozen_sync_thread(mddev);
|
||||
return len;
|
||||
md_frozen_sync_thread(mddev);
|
||||
ret = len;
|
||||
goto out;
|
||||
case ACTION_IDLE:
|
||||
idle_sync_thread(mddev);
|
||||
md_idle_sync_thread(mddev);
|
||||
break;
|
||||
case ACTION_RESHAPE:
|
||||
case ACTION_RECOVER:
|
||||
case ACTION_CHECK:
|
||||
case ACTION_REPAIR:
|
||||
case ACTION_RESYNC:
|
||||
return -EBUSY;
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
default:
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
switch (action) {
|
||||
case ACTION_FROZEN:
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
return len;
|
||||
ret = len;
|
||||
goto out;
|
||||
case ACTION_RESHAPE:
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
ret = mddev_start_reshape(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
break;
|
||||
case ACTION_RECOVER:
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
@@ -5144,7 +5120,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5152,14 +5129,18 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
/* A write to sync_action is enough to justify
|
||||
* canceling read-auto mode
|
||||
*/
|
||||
flush_work(&mddev->sync_work);
|
||||
mddev->ro = MD_RDWR;
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
}
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
return len;
|
||||
ret = len;
|
||||
|
||||
out:
|
||||
mddev_unlock(mddev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_scan_mode =
|
||||
|
||||
@@ -595,8 +595,6 @@ struct mddev {
|
||||
*/
|
||||
struct list_head deleting;
|
||||
|
||||
/* Used to synchronize idle and frozen for action_store() */
|
||||
struct mutex sync_mutex;
|
||||
/* The sequence number for sync thread */
|
||||
atomic_t sync_seq;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user