Merge tag 'md-next-20231012' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.7/block
Pull MD changes from Song: "1. Rewrite mddev_suspend(), by Yu Kuai; 2. Simplify md_seq_ops, by Yu Kuai; 3. Reduce unnecessary locking array_state_store(), by Mariusz Tkaczyk." * tag 'md-next-20231012' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (23 commits) md: rename __mddev_suspend/resume() back to mddev_suspend/resume() md: remove old apis to suspend the array md: suspend array in md_start_sync() if array need reconfiguration md/raid5: replace suspend with quiesce() callback md/md-linear: cleanup linear_add() md: cleanup mddev_create/destroy_serial_pool() md: use new apis to suspend array before mddev_create/destroy_serial_pool md: use new apis to suspend array for ioctls involed array reconfiguration md: use new apis to suspend array for adding/removing rdev from state_store() md: use new apis to suspend array for sysfs apis md/raid5: use new apis to suspend array md/raid5-cache: use new apis to suspend array md/md-bitmap: use new apis to suspend array for location_store() md/dm-raid: use new apis to suspend array md: add new helpers to suspend/resume and lock/unlock array md: add new helpers to suspend/resume array md: replace is_md_suspended() with 'mddev->suspended' in md_check_recovery() md/raid5-cache: use READ_ONCE/WRITE_ONCE for 'conf->log' md: use READ_ONCE/WRITE_ONCE for 'suspend_lo' and 'suspend_hi' md/raid1: don't split discard io for write behind ...
This commit is contained in:
@@ -3244,7 +3244,7 @@ size_check:
|
||||
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
|
||||
|
||||
/* Has to be held on running the array */
|
||||
mddev_lock_nointr(&rs->md);
|
||||
mddev_suspend_and_lock_nointr(&rs->md);
|
||||
r = md_run(&rs->md);
|
||||
rs->md.in_sync = 0; /* Assume already marked dirty */
|
||||
if (r) {
|
||||
@@ -3268,7 +3268,6 @@ size_check:
|
||||
}
|
||||
}
|
||||
|
||||
mddev_suspend(&rs->md);
|
||||
set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
|
||||
|
||||
/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
|
||||
@@ -3798,9 +3797,7 @@ static void raid_postsuspend(struct dm_target *ti)
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
|
||||
md_stop_writes(&rs->md);
|
||||
|
||||
mddev_lock_nointr(&rs->md);
|
||||
mddev_suspend(&rs->md);
|
||||
mddev_unlock(&rs->md);
|
||||
mddev_suspend(&rs->md, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4059,8 +4056,7 @@ static void raid_resume(struct dm_target *ti)
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
mddev->ro = 0;
|
||||
mddev->in_sync = 0;
|
||||
mddev_resume(mddev);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
||||
return;
|
||||
}
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err) {
|
||||
pr_err("md: failed to lock array %s\n", name);
|
||||
goto out_mddev_put;
|
||||
@@ -221,7 +221,7 @@ static void __init md_setup_drive(struct md_setup_args *args)
|
||||
if (err)
|
||||
pr_warn("md: starting %s failed\n", name);
|
||||
out_unlock:
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
out_mddev_put:
|
||||
mddev_put(mddev);
|
||||
}
|
||||
|
||||
+8
-10
@@ -1861,7 +1861,7 @@ void md_bitmap_destroy(struct mddev *mddev)
|
||||
|
||||
md_bitmap_wait_behind_writes(mddev);
|
||||
if (!mddev->serialize_policy)
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
mddev_destroy_serial_pool(mddev, NULL);
|
||||
|
||||
mutex_lock(&mddev->bitmap_info.mutex);
|
||||
spin_lock(&mddev->lock);
|
||||
@@ -1977,7 +1977,7 @@ int md_bitmap_load(struct mddev *mddev)
|
||||
goto out;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_serial_pool(mddev, rdev, true);
|
||||
mddev_create_serial_pool(mddev, rdev);
|
||||
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
|
||||
@@ -2348,11 +2348,10 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
int rv;
|
||||
|
||||
rv = mddev_lock(mddev);
|
||||
rv = mddev_suspend_and_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
if (mddev->pers) {
|
||||
if (mddev->recovery || mddev->sync_thread) {
|
||||
rv = -EBUSY;
|
||||
@@ -2429,8 +2428,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
}
|
||||
rv = 0;
|
||||
out:
|
||||
mddev_resume(mddev);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
return len;
|
||||
@@ -2539,7 +2537,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (backlog > COUNTER_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
rv = mddev_lock(mddev);
|
||||
rv = mddev_suspend_and_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
@@ -2564,16 +2562,16 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (!backlog && mddev->serial_info_pool) {
|
||||
/* serial_info_pool is not needed if backlog is zero */
|
||||
if (!mddev->serialize_policy)
|
||||
mddev_destroy_serial_pool(mddev, NULL, false);
|
||||
mddev_destroy_serial_pool(mddev, NULL);
|
||||
} else if (backlog && !mddev->serial_info_pool) {
|
||||
/* serial_info_pool is needed since backlog is not zero */
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
mddev_create_serial_pool(mddev, rdev);
|
||||
}
|
||||
if (old_mwb != backlog)
|
||||
md_bitmap_update_sb(mddev->bitmap);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
@@ -183,7 +183,6 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
|
||||
* in linear_congested(), therefore kfree_rcu() is used to free
|
||||
* oldconf until no one uses it anymore.
|
||||
*/
|
||||
mddev_suspend(mddev);
|
||||
oldconf = rcu_dereference_protected(mddev->private,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
mddev->raid_disks++;
|
||||
@@ -192,7 +191,6 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
|
||||
rcu_assign_pointer(mddev->private, newconf);
|
||||
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
|
||||
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
|
||||
mddev_resume(mddev);
|
||||
kfree_rcu(oldconf, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
+187
-212
@@ -206,8 +206,7 @@ static int rdev_need_serial(struct md_rdev *rdev)
|
||||
* 1. rdev is the first device which return true from rdev_enable_serial.
|
||||
* 2. rdev is NULL, means we want to enable serialization for all rdevs.
|
||||
*/
|
||||
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend)
|
||||
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -215,15 +214,12 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
!test_bit(CollisionCheck, &rdev->flags))
|
||||
return;
|
||||
|
||||
if (!is_suspend)
|
||||
mddev_suspend(mddev);
|
||||
|
||||
if (!rdev)
|
||||
ret = rdevs_init_serial(mddev);
|
||||
else
|
||||
ret = rdev_init_serial(rdev);
|
||||
if (ret)
|
||||
goto abort;
|
||||
return;
|
||||
|
||||
if (mddev->serial_info_pool == NULL) {
|
||||
/*
|
||||
@@ -238,10 +234,6 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
pr_err("can't alloc memory pool for serialization\n");
|
||||
}
|
||||
}
|
||||
|
||||
abort:
|
||||
if (!is_suspend)
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -250,8 +242,7 @@ abort:
|
||||
* 2. when bitmap is destroyed while policy is not enabled.
|
||||
* 3. for disable policy, the pool is destroyed only when no rdev needs it.
|
||||
*/
|
||||
void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend)
|
||||
void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (rdev && !test_bit(CollisionCheck, &rdev->flags))
|
||||
return;
|
||||
@@ -260,8 +251,6 @@ void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct md_rdev *temp;
|
||||
int num = 0; /* used to track if other rdevs need the pool */
|
||||
|
||||
if (!is_suspend)
|
||||
mddev_suspend(mddev);
|
||||
rdev_for_each(temp, mddev) {
|
||||
if (!rdev) {
|
||||
if (!mddev->serialize_policy ||
|
||||
@@ -283,8 +272,6 @@ void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
mempool_destroy(mddev->serial_info_pool);
|
||||
mddev->serial_info_pool = NULL;
|
||||
}
|
||||
if (!is_suspend)
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -359,11 +346,11 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
|
||||
return true;
|
||||
if (bio_data_dir(bio) != WRITE)
|
||||
return false;
|
||||
if (mddev->suspend_lo >= mddev->suspend_hi)
|
||||
if (READ_ONCE(mddev->suspend_lo) >= READ_ONCE(mddev->suspend_hi))
|
||||
return false;
|
||||
if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
|
||||
if (bio->bi_iter.bi_sector >= READ_ONCE(mddev->suspend_hi))
|
||||
return false;
|
||||
if (bio_end_sector(bio) < mddev->suspend_lo)
|
||||
if (bio_end_sector(bio) < READ_ONCE(mddev->suspend_lo))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@@ -431,42 +418,73 @@ static void md_submit_bio(struct bio *bio)
|
||||
md_handle_request(mddev, bio);
|
||||
}
|
||||
|
||||
/* mddev_suspend makes sure no new requests are submitted
|
||||
* to the device, and that any requests that have been submitted
|
||||
* are completely handled.
|
||||
* Once mddev_detach() is called and completes, the module will be
|
||||
* completely unused.
|
||||
/*
|
||||
* Make sure no new requests are submitted to the device, and any requests that
|
||||
* have been submitted are completely handled.
|
||||
*/
|
||||
void mddev_suspend(struct mddev *mddev)
|
||||
int mddev_suspend(struct mddev *mddev, bool interruptible)
|
||||
{
|
||||
struct md_thread *thread = rcu_dereference_protected(mddev->thread,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* hold reconfig_mutex to wait for normal io will deadlock, because
|
||||
* other context can't update super_block, and normal io can rely on
|
||||
* updating super_block.
|
||||
*/
|
||||
lockdep_assert_not_held(&mddev->reconfig_mutex);
|
||||
|
||||
if (interruptible)
|
||||
err = mutex_lock_interruptible(&mddev->suspend_mutex);
|
||||
else
|
||||
mutex_lock(&mddev->suspend_mutex);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (mddev->suspended) {
|
||||
WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
|
||||
mutex_unlock(&mddev->suspend_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(thread && current == thread->tsk);
|
||||
if (mddev->suspended++)
|
||||
return;
|
||||
wake_up(&mddev->sb_wait);
|
||||
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
percpu_ref_kill(&mddev->active_io);
|
||||
if (interruptible)
|
||||
err = wait_event_interruptible(mddev->sb_wait,
|
||||
percpu_ref_is_zero(&mddev->active_io));
|
||||
else
|
||||
wait_event(mddev->sb_wait,
|
||||
percpu_ref_is_zero(&mddev->active_io));
|
||||
if (err) {
|
||||
percpu_ref_resurrect(&mddev->active_io);
|
||||
mutex_unlock(&mddev->suspend_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (mddev->pers && mddev->pers->prepare_suspend)
|
||||
mddev->pers->prepare_suspend(mddev);
|
||||
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
|
||||
/*
|
||||
* For raid456, io might be waiting for reshape to make progress,
|
||||
* allow new reshape to start while waiting for io to be done to
|
||||
* prevent deadlock.
|
||||
*/
|
||||
WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
|
||||
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
/* restrict memory reclaim I/O during raid array is suspend */
|
||||
mddev->noio_flag = memalloc_noio_save();
|
||||
|
||||
mutex_unlock(&mddev->suspend_mutex);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_suspend);
|
||||
|
||||
void mddev_resume(struct mddev *mddev)
|
||||
{
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
if (--mddev->suspended)
|
||||
lockdep_assert_not_held(&mddev->reconfig_mutex);
|
||||
|
||||
mutex_lock(&mddev->suspend_mutex);
|
||||
WRITE_ONCE(mddev->suspended, mddev->suspended - 1);
|
||||
if (mddev->suspended) {
|
||||
mutex_unlock(&mddev->suspend_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/* entred the memalloc scope from mddev_suspend() */
|
||||
memalloc_noio_restore(mddev->noio_flag);
|
||||
@@ -477,6 +495,8 @@ void mddev_resume(struct mddev *mddev)
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
|
||||
|
||||
mutex_unlock(&mddev->suspend_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_resume);
|
||||
|
||||
@@ -616,23 +636,28 @@ static inline struct mddev *mddev_get(struct mddev *mddev)
|
||||
|
||||
static void mddev_delayed_delete(struct work_struct *ws);
|
||||
|
||||
static void __mddev_put(struct mddev *mddev)
|
||||
{
|
||||
if (mddev->raid_disks || !list_empty(&mddev->disks) ||
|
||||
mddev->ctime || mddev->hold_active)
|
||||
return;
|
||||
|
||||
/* Array is not configured at all, and not held active, so destroy it */
|
||||
set_bit(MD_DELETED, &mddev->flags);
|
||||
|
||||
/*
|
||||
* Call queue_work inside the spinlock so that flush_workqueue() after
|
||||
* mddev_find will succeed in waiting for the work to be done.
|
||||
*/
|
||||
queue_work(md_misc_wq, &mddev->del_work);
|
||||
}
|
||||
|
||||
void mddev_put(struct mddev *mddev)
|
||||
{
|
||||
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
|
||||
return;
|
||||
if (!mddev->raid_disks && list_empty(&mddev->disks) &&
|
||||
mddev->ctime == 0 && !mddev->hold_active) {
|
||||
/* Array is not configured at all, and not held active,
|
||||
* so destroy it */
|
||||
set_bit(MD_DELETED, &mddev->flags);
|
||||
|
||||
/*
|
||||
* Call queue_work inside the spinlock so that
|
||||
* flush_workqueue() after mddev_find will succeed in waiting
|
||||
* for the work to be done.
|
||||
*/
|
||||
queue_work(md_misc_wq, &mddev->del_work);
|
||||
}
|
||||
__mddev_put(mddev);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
}
|
||||
|
||||
@@ -667,6 +692,7 @@ int mddev_init(struct mddev *mddev)
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
mutex_init(&mddev->suspend_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
INIT_LIST_HEAD(&mddev->all_mddevs);
|
||||
@@ -2454,7 +2480,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
pr_debug("md: bind<%s>\n", b);
|
||||
|
||||
if (mddev->raid_disks)
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
mddev_create_serial_pool(mddev, rdev);
|
||||
|
||||
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
|
||||
goto fail;
|
||||
@@ -2507,7 +2533,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
|
||||
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
|
||||
list_del_rcu(&rdev->same_set);
|
||||
pr_debug("md: unbind<%pg>\n", rdev->bdev);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev, false);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev);
|
||||
rdev->mddev = NULL;
|
||||
sysfs_remove_link(&rdev->kobj, "block");
|
||||
sysfs_put(rdev->sysfs_state);
|
||||
@@ -2837,11 +2863,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
|
||||
*/
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
if (add_journal)
|
||||
mddev_suspend(mddev);
|
||||
err = mddev->pers->hot_add_disk(mddev, rdev);
|
||||
if (add_journal)
|
||||
mddev_resume(mddev);
|
||||
if (err) {
|
||||
md_kick_rdev_from_array(rdev);
|
||||
return err;
|
||||
@@ -2978,11 +3000,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
}
|
||||
} else if (cmd_match(buf, "writemostly")) {
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
mddev_create_serial_pool(rdev->mddev, rdev, false);
|
||||
mddev_create_serial_pool(rdev->mddev, rdev);
|
||||
need_update_sb = true;
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "-writemostly")) {
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev, false);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev);
|
||||
clear_bit(WriteMostly, &rdev->flags);
|
||||
need_update_sb = true;
|
||||
err = 0;
|
||||
@@ -3594,6 +3616,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
|
||||
struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
|
||||
struct kernfs_node *kn = NULL;
|
||||
bool suspend = false;
|
||||
ssize_t rv;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
|
||||
@@ -3601,17 +3624,25 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
return -EIO;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (!mddev)
|
||||
return -ENODEV;
|
||||
|
||||
if (entry->store == state_store && cmd_match(page, "remove"))
|
||||
kn = sysfs_break_active_protection(kobj, attr);
|
||||
if (entry->store == state_store) {
|
||||
if (cmd_match(page, "remove"))
|
||||
kn = sysfs_break_active_protection(kobj, attr);
|
||||
if (cmd_match(page, "remove") || cmd_match(page, "re-add") ||
|
||||
cmd_match(page, "writemostly") ||
|
||||
cmd_match(page, "-writemostly"))
|
||||
suspend = true;
|
||||
}
|
||||
|
||||
rv = mddev ? mddev_lock(mddev) : -ENODEV;
|
||||
rv = suspend ? mddev_suspend_and_lock(mddev) : mddev_lock(mddev);
|
||||
if (!rv) {
|
||||
if (rdev->mddev == NULL)
|
||||
rv = -ENODEV;
|
||||
else
|
||||
rv = entry->store(rdev, page, length);
|
||||
mddev_unlock(mddev);
|
||||
suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
|
||||
}
|
||||
|
||||
if (kn)
|
||||
@@ -3916,7 +3947,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (slen == 0 || slen >= sizeof(clevel))
|
||||
return -EINVAL;
|
||||
|
||||
rv = mddev_lock(mddev);
|
||||
rv = mddev_suspend_and_lock(mddev);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
@@ -4009,7 +4040,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
}
|
||||
|
||||
/* Looks like we have a winner */
|
||||
mddev_suspend(mddev);
|
||||
mddev_detach(mddev);
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
@@ -4095,14 +4125,13 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
blk_set_stacking_limits(&mddev->queue->limits);
|
||||
pers->run(mddev);
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
mddev_resume(mddev);
|
||||
if (!mddev->thread)
|
||||
md_update_sb(mddev, 1);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_level);
|
||||
md_new_event();
|
||||
rv = len;
|
||||
out_unlock:
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return rv;
|
||||
}
|
||||
|
||||
@@ -4410,6 +4439,18 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
int err = 0;
|
||||
enum array_state st = match_word(buf, array_states);
|
||||
|
||||
/* No lock dependent actions */
|
||||
switch (st) {
|
||||
case suspended: /* not supported yet */
|
||||
case write_pending: /* cannot be set */
|
||||
case active_idle: /* cannot be set */
|
||||
case broken: /* cannot be set */
|
||||
case bad_word:
|
||||
return -EINVAL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (mddev->pers && (st == active || st == clean) &&
|
||||
mddev->ro != MD_RDONLY) {
|
||||
/* don't take reconfig_mutex when toggling between
|
||||
@@ -4434,23 +4475,16 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
err = -EINVAL;
|
||||
switch(st) {
|
||||
case bad_word:
|
||||
break;
|
||||
case clear:
|
||||
/* stopping an active array */
|
||||
err = do_md_stop(mddev, 0, NULL);
|
||||
break;
|
||||
|
||||
switch (st) {
|
||||
case inactive:
|
||||
/* stopping an active array */
|
||||
/* stop an active array, return 0 otherwise */
|
||||
if (mddev->pers)
|
||||
err = do_md_stop(mddev, 2, NULL);
|
||||
else
|
||||
err = 0; /* already inactive */
|
||||
break;
|
||||
case suspended:
|
||||
break; /* not supported yet */
|
||||
case clear:
|
||||
err = do_md_stop(mddev, 0, NULL);
|
||||
break;
|
||||
case readonly:
|
||||
if (mddev->pers)
|
||||
err = md_set_readonly(mddev, NULL);
|
||||
@@ -4501,10 +4535,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
err = do_md_run(mddev);
|
||||
}
|
||||
break;
|
||||
case write_pending:
|
||||
case active_idle:
|
||||
case broken:
|
||||
/* these cannot be set */
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4577,7 +4609,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
minor != MINOR(dev))
|
||||
return -EOVERFLOW;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
if (mddev->persistent) {
|
||||
@@ -4598,14 +4630,14 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
rdev = md_import_device(dev, -1, -1);
|
||||
|
||||
if (IS_ERR(rdev)) {
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return PTR_ERR(rdev);
|
||||
}
|
||||
err = bind_rdev_to_array(rdev, mddev);
|
||||
out:
|
||||
if (err)
|
||||
export_rdev(rdev, mddev);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
if (!err)
|
||||
md_new_event();
|
||||
return err ? err : len;
|
||||
@@ -5171,7 +5203,8 @@ __ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
|
||||
static ssize_t
|
||||
suspend_lo_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)READ_ONCE(mddev->suspend_lo));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@@ -5186,15 +5219,13 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (new != (sector_t)new)
|
||||
return -EINVAL;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend(mddev, true);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
mddev->suspend_lo = new;
|
||||
WRITE_ONCE(mddev->suspend_lo, new);
|
||||
mddev_resume(mddev);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
return len;
|
||||
}
|
||||
static struct md_sysfs_entry md_suspend_lo =
|
||||
@@ -5203,7 +5234,8 @@ __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
|
||||
static ssize_t
|
||||
suspend_hi_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)READ_ONCE(mddev->suspend_hi));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@@ -5218,15 +5250,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (new != (sector_t)new)
|
||||
return -EINVAL;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend(mddev, true);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
mddev->suspend_hi = new;
|
||||
WRITE_ONCE(mddev->suspend_hi, new);
|
||||
mddev_resume(mddev);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
return len;
|
||||
}
|
||||
static struct md_sysfs_entry md_suspend_hi =
|
||||
@@ -5474,7 +5504,7 @@ serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (value == mddev->serialize_policy)
|
||||
return len;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
if (mddev->pers == NULL || (mddev->pers->level != 1)) {
|
||||
@@ -5483,15 +5513,13 @@ serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
mddev_suspend(mddev);
|
||||
if (value)
|
||||
mddev_create_serial_pool(mddev, NULL, true);
|
||||
mddev_create_serial_pool(mddev, NULL);
|
||||
else
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
mddev_destroy_serial_pool(mddev, NULL);
|
||||
mddev->serialize_policy = value;
|
||||
mddev_resume(mddev);
|
||||
unlock:
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
@@ -6254,7 +6282,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
}
|
||||
/* disable policy to guarantee rdevs free resources for serialization */
|
||||
mddev->serialize_policy = 0;
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
mddev_destroy_serial_pool(mddev, NULL);
|
||||
}
|
||||
|
||||
void md_stop_writes(struct mddev *mddev)
|
||||
@@ -6546,13 +6574,13 @@ static void autorun_devices(int part)
|
||||
if (IS_ERR(mddev))
|
||||
break;
|
||||
|
||||
if (mddev_lock(mddev))
|
||||
if (mddev_suspend_and_lock(mddev))
|
||||
pr_warn("md: %s locked, cannot run\n", mdname(mddev));
|
||||
else if (mddev->raid_disks || mddev->major_version
|
||||
|| !list_empty(&mddev->disks)) {
|
||||
pr_warn("md: %s already running, cannot run %pg\n",
|
||||
mdname(mddev), rdev0->bdev);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
} else {
|
||||
pr_debug("md: created %s\n", mdname(mddev));
|
||||
mddev->persistent = 1;
|
||||
@@ -6562,7 +6590,7 @@ static void autorun_devices(int part)
|
||||
export_rdev(rdev, mddev);
|
||||
}
|
||||
autorun_array(mddev);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
}
|
||||
/* on success, candidates will be empty, on error
|
||||
* it won't...
|
||||
@@ -7112,7 +7140,6 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
|
||||
struct bitmap *bitmap;
|
||||
|
||||
bitmap = md_bitmap_create(mddev, -1);
|
||||
mddev_suspend(mddev);
|
||||
if (!IS_ERR(bitmap)) {
|
||||
mddev->bitmap = bitmap;
|
||||
err = md_bitmap_load(mddev);
|
||||
@@ -7122,11 +7149,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
|
||||
md_bitmap_destroy(mddev);
|
||||
fd = -1;
|
||||
}
|
||||
mddev_resume(mddev);
|
||||
} else if (fd < 0) {
|
||||
mddev_suspend(mddev);
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
}
|
||||
if (fd < 0) {
|
||||
@@ -7415,7 +7439,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
mddev->bitmap_info.space =
|
||||
mddev->bitmap_info.default_space;
|
||||
bitmap = md_bitmap_create(mddev, -1);
|
||||
mddev_suspend(mddev);
|
||||
if (!IS_ERR(bitmap)) {
|
||||
mddev->bitmap = bitmap;
|
||||
rv = md_bitmap_load(mddev);
|
||||
@@ -7423,7 +7446,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
rv = PTR_ERR(bitmap);
|
||||
if (rv)
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_resume(mddev);
|
||||
} else {
|
||||
/* remove the bitmap */
|
||||
if (!mddev->bitmap) {
|
||||
@@ -7448,9 +7470,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
|
||||
module_put(md_cluster_mod);
|
||||
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
|
||||
}
|
||||
mddev_suspend(mddev);
|
||||
md_bitmap_destroy(mddev);
|
||||
mddev_resume(mddev);
|
||||
mddev->bitmap_info.offset = 0;
|
||||
}
|
||||
}
|
||||
@@ -7521,6 +7541,20 @@ static inline bool md_ioctl_valid(unsigned int cmd)
|
||||
}
|
||||
}
|
||||
|
||||
static bool md_ioctl_need_suspend(unsigned int cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case ADD_NEW_DISK:
|
||||
case HOT_ADD_DISK:
|
||||
case HOT_REMOVE_DISK:
|
||||
case SET_BITMAP_FILE:
|
||||
case SET_ARRAY_INFO:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int __md_set_array_info(struct mddev *mddev, void __user *argp)
|
||||
{
|
||||
mdu_array_info_t info;
|
||||
@@ -7653,7 +7687,8 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
if (!md_is_rdwr(mddev))
|
||||
flush_work(&mddev->sync_work);
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = md_ioctl_need_suspend(cmd) ? mddev_suspend_and_lock(mddev) :
|
||||
mddev_lock(mddev);
|
||||
if (err) {
|
||||
pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
|
||||
err, cmd);
|
||||
@@ -7781,7 +7816,10 @@ unlock:
|
||||
if (mddev->hold_active == UNTIL_IOCTL &&
|
||||
err != -EINVAL)
|
||||
mddev->hold_active = 0;
|
||||
mddev_unlock(mddev);
|
||||
|
||||
md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
|
||||
mddev_unlock(mddev);
|
||||
|
||||
out:
|
||||
if(did_set_md_closing)
|
||||
clear_bit(MD_CLOSING, &mddev->flags);
|
||||
@@ -8208,105 +8246,46 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
|
||||
}
|
||||
|
||||
static void *md_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
__acquires(&all_mddevs_lock)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
loff_t l = *pos;
|
||||
struct mddev *mddev;
|
||||
struct md_personality *pers;
|
||||
|
||||
if (l == 0x10000) {
|
||||
++*pos;
|
||||
return (void *)2;
|
||||
}
|
||||
if (l > 0x10000)
|
||||
return NULL;
|
||||
if (!l--)
|
||||
/* header */
|
||||
return (void*)1;
|
||||
seq_puts(seq, "Personalities : ");
|
||||
spin_lock(&pers_lock);
|
||||
list_for_each_entry(pers, &pers_list, list)
|
||||
seq_printf(seq, "[%s] ", pers->name);
|
||||
|
||||
spin_unlock(&pers_lock);
|
||||
seq_puts(seq, "\n");
|
||||
seq->poll_event = atomic_read(&md_event_count);
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
list_for_each(tmp,&all_mddevs)
|
||||
if (!l--) {
|
||||
mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||
if (!mddev_get(mddev))
|
||||
continue;
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
return mddev;
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
if (!l--)
|
||||
return (void*)2;/* tail */
|
||||
return NULL;
|
||||
|
||||
return seq_list_start(&all_mddevs, *pos);
|
||||
}
|
||||
|
||||
static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct mddev *next_mddev, *mddev = v;
|
||||
struct mddev *to_put = NULL;
|
||||
|
||||
++*pos;
|
||||
if (v == (void*)2)
|
||||
return NULL;
|
||||
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (v == (void*)1) {
|
||||
tmp = all_mddevs.next;
|
||||
} else {
|
||||
to_put = mddev;
|
||||
tmp = mddev->all_mddevs.next;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (tmp == &all_mddevs) {
|
||||
next_mddev = (void*)2;
|
||||
*pos = 0x10000;
|
||||
break;
|
||||
}
|
||||
next_mddev = list_entry(tmp, struct mddev, all_mddevs);
|
||||
if (mddev_get(next_mddev))
|
||||
break;
|
||||
mddev = next_mddev;
|
||||
tmp = mddev->all_mddevs.next;
|
||||
}
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
|
||||
if (to_put)
|
||||
mddev_put(to_put);
|
||||
return next_mddev;
|
||||
|
||||
return seq_list_next(v, &all_mddevs, pos);
|
||||
}
|
||||
|
||||
static void md_seq_stop(struct seq_file *seq, void *v)
|
||||
__releases(&all_mddevs_lock)
|
||||
{
|
||||
struct mddev *mddev = v;
|
||||
|
||||
if (mddev && v != (void*)1 && v != (void*)2)
|
||||
mddev_put(mddev);
|
||||
status_unused(seq);
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
}
|
||||
|
||||
static int md_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct mddev *mddev = v;
|
||||
struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
|
||||
sector_t sectors;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (v == (void*)1) {
|
||||
struct md_personality *pers;
|
||||
seq_printf(seq, "Personalities : ");
|
||||
spin_lock(&pers_lock);
|
||||
list_for_each_entry(pers, &pers_list, list)
|
||||
seq_printf(seq, "[%s] ", pers->name);
|
||||
|
||||
spin_unlock(&pers_lock);
|
||||
seq_printf(seq, "\n");
|
||||
seq->poll_event = atomic_read(&md_event_count);
|
||||
if (!mddev_get(mddev))
|
||||
return 0;
|
||||
}
|
||||
if (v == (void*)2) {
|
||||
status_unused(seq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
spin_lock(&mddev->lock);
|
||||
if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
|
||||
seq_printf(seq, "%s : %sactive", mdname(mddev),
|
||||
@@ -8377,6 +8356,9 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||
seq_printf(seq, "\n");
|
||||
}
|
||||
spin_unlock(&mddev->lock);
|
||||
spin_lock(&all_mddevs_lock);
|
||||
if (atomic_dec_and_test(&mddev->active))
|
||||
__mddev_put(mddev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -9371,8 +9353,13 @@ static void md_start_sync(struct work_struct *ws)
|
||||
{
|
||||
struct mddev *mddev = container_of(ws, struct mddev, sync_work);
|
||||
int spares = 0;
|
||||
bool suspend = false;
|
||||
|
||||
mddev_lock_nointr(mddev);
|
||||
if (md_spares_need_change(mddev))
|
||||
suspend = true;
|
||||
|
||||
suspend ? mddev_suspend_and_lock_nointr(mddev) :
|
||||
mddev_lock_nointr(mddev);
|
||||
|
||||
if (!md_is_rdwr(mddev)) {
|
||||
/*
|
||||
@@ -9408,7 +9395,7 @@ static void md_start_sync(struct work_struct *ws)
|
||||
goto not_running;
|
||||
}
|
||||
|
||||
mddev_unlock(mddev);
|
||||
suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
md_new_event();
|
||||
@@ -9420,7 +9407,7 @@ not_running:
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev_unlock(mddev);
|
||||
suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
|
||||
|
||||
wake_up(&resync_wait);
|
||||
if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
|
||||
@@ -9452,19 +9439,7 @@ not_running:
|
||||
*/
|
||||
void md_check_recovery(struct mddev *mddev)
|
||||
{
|
||||
if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
|
||||
/* Write superblock - thread that called mddev_suspend()
|
||||
* holds reconfig_mutex for us.
|
||||
*/
|
||||
set_bit(MD_UPDATING_SB, &mddev->flags);
|
||||
smp_mb__after_atomic();
|
||||
if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
|
||||
md_update_sb(mddev, 0);
|
||||
clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
|
||||
if (is_md_suspended(mddev))
|
||||
if (READ_ONCE(mddev->suspended))
|
||||
return;
|
||||
|
||||
if (mddev->bitmap)
|
||||
|
||||
+32
-11
@@ -248,10 +248,6 @@ struct md_cluster_info;
|
||||
* become failed.
|
||||
* @MD_HAS_PPL: The raid array has PPL feature set.
|
||||
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
|
||||
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
|
||||
* without taking reconfig_mutex.
|
||||
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
|
||||
* explicitly holding reconfig_mutex.
|
||||
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
||||
* array is ready yet.
|
||||
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
||||
@@ -268,8 +264,6 @@ enum mddev_flags {
|
||||
MD_FAILFAST_SUPPORTED,
|
||||
MD_HAS_PPL,
|
||||
MD_HAS_MULTIPLE_PPLS,
|
||||
MD_ALLOW_SB_UPDATE,
|
||||
MD_UPDATING_SB,
|
||||
MD_NOT_READY,
|
||||
MD_BROKEN,
|
||||
MD_DELETED,
|
||||
@@ -316,6 +310,7 @@ struct mddev {
|
||||
unsigned long sb_flags;
|
||||
|
||||
int suspended;
|
||||
struct mutex suspend_mutex;
|
||||
struct percpu_ref active_io;
|
||||
int ro;
|
||||
int sysfs_active; /* set when sysfs deletes
|
||||
@@ -809,15 +804,14 @@ extern int md_rdev_init(struct md_rdev *rdev);
|
||||
extern void md_rdev_clear(struct md_rdev *rdev);
|
||||
|
||||
extern void md_handle_request(struct mddev *mddev, struct bio *bio);
|
||||
extern void mddev_suspend(struct mddev *mddev);
|
||||
extern int mddev_suspend(struct mddev *mddev, bool interruptible);
|
||||
extern void mddev_resume(struct mddev *mddev);
|
||||
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
extern void md_update_sb(struct mddev *mddev, int force);
|
||||
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend);
|
||||
extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend);
|
||||
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev);
|
||||
extern void mddev_destroy_serial_pool(struct mddev *mddev,
|
||||
struct md_rdev *rdev);
|
||||
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
|
||||
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
|
||||
|
||||
@@ -855,6 +849,33 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
|
||||
mddev->queue->limits.max_write_zeroes_sectors = 0;
|
||||
}
|
||||
|
||||
static inline int mddev_suspend_and_lock(struct mddev *mddev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = mddev_suspend(mddev, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
mddev_resume(mddev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void mddev_suspend_and_lock_nointr(struct mddev *mddev)
|
||||
{
|
||||
mddev_suspend(mddev, false);
|
||||
mutex_lock(&mddev->reconfig_mutex);
|
||||
}
|
||||
|
||||
static inline void mddev_unlock_and_resume(struct mddev *mddev)
|
||||
{
|
||||
mddev_unlock(mddev);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
|
||||
struct mdu_array_info_s;
|
||||
struct mdu_disk_info_s;
|
||||
|
||||
|
||||
+2
-1
@@ -1345,6 +1345,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
int first_clone;
|
||||
int max_sectors;
|
||||
bool write_behind = false;
|
||||
bool is_discard = (bio_op(bio) == REQ_OP_DISCARD);
|
||||
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
@@ -1405,7 +1406,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
* write-mostly, which means we could allocate write behind
|
||||
* bio later.
|
||||
*/
|
||||
if (rdev && test_bit(WriteMostly, &rdev->flags))
|
||||
if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
|
||||
write_behind = true;
|
||||
|
||||
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
|
||||
|
||||
+32
-32
@@ -327,8 +327,9 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
|
||||
void r5c_check_stripe_cache_usage(struct r5conf *conf)
|
||||
{
|
||||
int total_cached;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
if (!r5c_is_writeback(conf->log))
|
||||
if (!r5c_is_writeback(log))
|
||||
return;
|
||||
|
||||
total_cached = atomic_read(&conf->r5c_cached_partial_stripes) +
|
||||
@@ -344,7 +345,7 @@ void r5c_check_stripe_cache_usage(struct r5conf *conf)
|
||||
*/
|
||||
if (total_cached > conf->min_nr_stripes * 1 / 2 ||
|
||||
atomic_read(&conf->empty_inactive_list_nr) > 0)
|
||||
r5l_wake_reclaim(conf->log, 0);
|
||||
r5l_wake_reclaim(log, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -353,7 +354,9 @@ void r5c_check_stripe_cache_usage(struct r5conf *conf)
|
||||
*/
|
||||
void r5c_check_cached_full_stripe(struct r5conf *conf)
|
||||
{
|
||||
if (!r5c_is_writeback(conf->log))
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
if (!r5c_is_writeback(log))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -363,7 +366,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
|
||||
if (atomic_read(&conf->r5c_cached_full_stripes) >=
|
||||
min(R5C_FULL_STRIPE_FLUSH_BATCH(conf),
|
||||
conf->chunk_sectors >> RAID5_STRIPE_SHIFT(conf)))
|
||||
r5l_wake_reclaim(conf->log, 0);
|
||||
r5l_wake_reclaim(log, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -396,7 +399,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
|
||||
*/
|
||||
static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
if (!r5c_is_writeback(log))
|
||||
return 0;
|
||||
@@ -449,7 +452,7 @@ static inline void r5c_update_log_state(struct r5l_log *log)
|
||||
void r5c_make_stripe_write_out(struct stripe_head *sh)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
BUG_ON(!r5c_is_writeback(log));
|
||||
|
||||
@@ -491,7 +494,7 @@ static void r5c_handle_parity_cached(struct stripe_head *sh)
|
||||
*/
|
||||
static void r5c_finish_cache_stripe(struct stripe_head *sh)
|
||||
{
|
||||
struct r5l_log *log = sh->raid_conf->log;
|
||||
struct r5l_log *log = READ_ONCE(sh->raid_conf->log);
|
||||
|
||||
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
|
||||
BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
|
||||
@@ -683,7 +686,6 @@ static void r5c_disable_writeback_async(struct work_struct *work)
|
||||
disable_writeback_work);
|
||||
struct mddev *mddev = log->rdev->mddev;
|
||||
struct r5conf *conf = mddev->private;
|
||||
int locked = 0;
|
||||
|
||||
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
|
||||
return;
|
||||
@@ -692,14 +694,14 @@ static void r5c_disable_writeback_async(struct work_struct *work)
|
||||
|
||||
/* wait superblock change before suspend */
|
||||
wait_event(mddev->sb_wait,
|
||||
conf->log == NULL ||
|
||||
(!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
|
||||
(locked = mddev_trylock(mddev))));
|
||||
if (locked) {
|
||||
mddev_suspend(mddev);
|
||||
!READ_ONCE(conf->log) ||
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
|
||||
log = READ_ONCE(conf->log);
|
||||
if (log) {
|
||||
mddev_suspend(mddev, false);
|
||||
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
|
||||
mddev_resume(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1151,7 +1153,7 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
|
||||
static sector_t r5c_calculate_new_cp(struct r5conf *conf)
|
||||
{
|
||||
struct stripe_head *sh;
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
sector_t new_cp;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -1159,12 +1161,12 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf)
|
||||
return log->next_checkpoint;
|
||||
|
||||
spin_lock_irqsave(&log->stripe_in_journal_lock, flags);
|
||||
if (list_empty(&conf->log->stripe_in_journal_list)) {
|
||||
if (list_empty(&log->stripe_in_journal_list)) {
|
||||
/* all stripes flushed */
|
||||
spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags);
|
||||
return log->next_checkpoint;
|
||||
}
|
||||
sh = list_first_entry(&conf->log->stripe_in_journal_list,
|
||||
sh = list_first_entry(&log->stripe_in_journal_list,
|
||||
struct stripe_head, r5c);
|
||||
new_cp = sh->log_start;
|
||||
spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags);
|
||||
@@ -1399,7 +1401,7 @@ void r5c_flush_cache(struct r5conf *conf, int num)
|
||||
struct stripe_head *sh, *next;
|
||||
|
||||
lockdep_assert_held(&conf->device_lock);
|
||||
if (!conf->log)
|
||||
if (!READ_ONCE(conf->log))
|
||||
return;
|
||||
|
||||
count = 0;
|
||||
@@ -1420,7 +1422,7 @@ void r5c_flush_cache(struct r5conf *conf, int num)
|
||||
|
||||
static void r5c_do_reclaim(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
struct stripe_head *sh;
|
||||
int count = 0;
|
||||
unsigned long flags;
|
||||
@@ -1549,7 +1551,7 @@ static void r5l_reclaim_thread(struct md_thread *thread)
|
||||
{
|
||||
struct mddev *mddev = thread->mddev;
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
if (!log)
|
||||
return;
|
||||
@@ -1591,7 +1593,7 @@ void r5l_quiesce(struct r5l_log *log, int quiesce)
|
||||
|
||||
bool r5l_log_disk_error(struct r5conf *conf)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
/* don't allow write if journal disk is missing */
|
||||
if (!log)
|
||||
@@ -2583,9 +2585,7 @@ int r5c_journal_mode_set(struct mddev *mddev, int mode)
|
||||
mode == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
return -EINVAL;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
conf->log->r5c_journal_mode = mode;
|
||||
mddev_resume(mddev);
|
||||
|
||||
pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n",
|
||||
mdname(mddev), mode, r5c_journal_mode_str[mode]);
|
||||
@@ -2610,11 +2610,11 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
|
||||
if (strlen(r5c_journal_mode_str[mode]) == len &&
|
||||
!strncmp(page, r5c_journal_mode_str[mode], len))
|
||||
break;
|
||||
ret = mddev_lock(mddev);
|
||||
ret = mddev_suspend_and_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = r5c_journal_mode_set(mddev, mode);
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return ret ?: length;
|
||||
}
|
||||
|
||||
@@ -2635,7 +2635,7 @@ int r5c_try_caching_write(struct r5conf *conf,
|
||||
struct stripe_head_state *s,
|
||||
int disks)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
int i;
|
||||
struct r5dev *dev;
|
||||
int to_cache = 0;
|
||||
@@ -2802,7 +2802,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
|
||||
struct stripe_head *sh,
|
||||
struct stripe_head_state *s)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
int i;
|
||||
int do_wakeup = 0;
|
||||
sector_t tree_index;
|
||||
@@ -2941,7 +2941,7 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
|
||||
/* check whether this big stripe is in write back cache. */
|
||||
bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
|
||||
{
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
sector_t tree_index;
|
||||
void *slot;
|
||||
|
||||
@@ -3049,14 +3049,14 @@ int r5l_start(struct r5l_log *log)
|
||||
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct r5l_log *log = conf->log;
|
||||
struct r5l_log *log = READ_ONCE(conf->log);
|
||||
|
||||
if (!log)
|
||||
return;
|
||||
|
||||
if ((raid5_calc_degraded(conf) > 0 ||
|
||||
test_bit(Journal, &rdev->flags)) &&
|
||||
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
schedule_work(&log->disable_writeback_work);
|
||||
}
|
||||
|
||||
@@ -3145,7 +3145,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
||||
spin_lock_init(&log->stripe_in_journal_lock);
|
||||
atomic_set(&log->stripe_in_journal_count, 0);
|
||||
|
||||
conf->log = log;
|
||||
WRITE_ONCE(conf->log, log);
|
||||
|
||||
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
|
||||
return 0;
|
||||
@@ -3173,7 +3173,7 @@ void r5l_exit_log(struct r5conf *conf)
|
||||
* 'reconfig_mutex' is held by caller, set 'confg->log' to NULL to
|
||||
* ensure disable_writeback_work wakes up and exits.
|
||||
*/
|
||||
conf->log = NULL;
|
||||
WRITE_ONCE(conf->log, NULL);
|
||||
wake_up(&conf->mddev->sb_wait);
|
||||
flush_work(&log->disable_writeback_work);
|
||||
|
||||
|
||||
+21
-35
@@ -70,6 +70,8 @@ MODULE_PARM_DESC(devices_handle_discard_safely,
|
||||
"Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
|
||||
static struct workqueue_struct *raid5_wq;
|
||||
|
||||
static void raid5_quiesce(struct mddev *mddev, int quiesce);
|
||||
|
||||
static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
|
||||
{
|
||||
int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK;
|
||||
@@ -2492,15 +2494,12 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
|
||||
unsigned long cpu;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Never shrink. And mddev_suspend() could deadlock if this is called
|
||||
* from raid5d. In that case, scribble_disks and scribble_sectors
|
||||
* should equal to new_disks and new_sectors
|
||||
*/
|
||||
/* Never shrink. */
|
||||
if (conf->scribble_disks >= new_disks &&
|
||||
conf->scribble_sectors >= new_sectors)
|
||||
return 0;
|
||||
mddev_suspend(conf->mddev);
|
||||
|
||||
raid5_quiesce(conf->mddev, true);
|
||||
cpus_read_lock();
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
@@ -2514,7 +2513,8 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
|
||||
}
|
||||
|
||||
cpus_read_unlock();
|
||||
mddev_resume(conf->mddev);
|
||||
raid5_quiesce(conf->mddev, false);
|
||||
|
||||
if (!err) {
|
||||
conf->scribble_disks = new_disks;
|
||||
conf->scribble_sectors = new_sectors;
|
||||
@@ -7025,7 +7025,7 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
|
||||
new != roundup_pow_of_two(new))
|
||||
return -EINVAL;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -7049,7 +7049,6 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
mddev_suspend(mddev);
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
size = conf->max_nr_stripes;
|
||||
|
||||
@@ -7064,10 +7063,9 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len)
|
||||
err = -ENOMEM;
|
||||
}
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
mddev_resume(mddev);
|
||||
|
||||
out_unlock:
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
@@ -7153,7 +7151,7 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
|
||||
return -EINVAL;
|
||||
new = !!new;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
conf = mddev->private;
|
||||
@@ -7162,15 +7160,13 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
|
||||
else if (new != conf->skip_copy) {
|
||||
struct request_queue *q = mddev->queue;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
conf->skip_copy = new;
|
||||
if (new)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
@@ -7225,15 +7221,13 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
if (new > 8192)
|
||||
return -EINVAL;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
conf = mddev->private;
|
||||
if (!conf)
|
||||
err = -ENODEV;
|
||||
else if (new != conf->worker_cnt_per_group) {
|
||||
mddev_suspend(mddev);
|
||||
|
||||
old_groups = conf->worker_groups;
|
||||
if (old_groups)
|
||||
flush_workqueue(raid5_wq);
|
||||
@@ -7250,9 +7244,8 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
kfree(old_groups[0].workers);
|
||||
kfree(old_groups);
|
||||
}
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
|
||||
return err ?: len;
|
||||
}
|
||||
@@ -8558,8 +8551,8 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
* the reshape wasn't running - like Discard or Read - have
|
||||
* completed.
|
||||
*/
|
||||
mddev_suspend(mddev);
|
||||
mddev_resume(mddev);
|
||||
raid5_quiesce(mddev, true);
|
||||
raid5_quiesce(mddev, false);
|
||||
|
||||
/* Add some new drives, as many as will fit.
|
||||
* We know there are enough to make the newly sized array work.
|
||||
@@ -8974,12 +8967,12 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
||||
struct r5conf *conf;
|
||||
int err;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
err = mddev_suspend_and_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
conf = mddev->private;
|
||||
if (!conf) {
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@@ -8989,19 +8982,14 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
||||
err = log_init(conf, NULL, true);
|
||||
if (!err) {
|
||||
err = resize_stripes(conf, conf->pool_size);
|
||||
if (err) {
|
||||
mddev_suspend(mddev);
|
||||
if (err)
|
||||
log_exit(conf);
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
}
|
||||
} else
|
||||
err = -EINVAL;
|
||||
} else if (strncmp(buf, "resync", 6) == 0) {
|
||||
if (raid5_has_ppl(conf)) {
|
||||
mddev_suspend(mddev);
|
||||
log_exit(conf);
|
||||
mddev_resume(mddev);
|
||||
err = resize_stripes(conf, conf->pool_size);
|
||||
} else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
|
||||
r5l_log_disk_error(conf)) {
|
||||
@@ -9014,11 +9002,9 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!journal_dev_exists) {
|
||||
mddev_suspend(mddev);
|
||||
if (!journal_dev_exists)
|
||||
clear_bit(MD_HAS_JOURNAL, &mddev->flags);
|
||||
mddev_resume(mddev);
|
||||
} else /* need remove journal device first */
|
||||
else /* need remove journal device first */
|
||||
err = -EBUSY;
|
||||
} else
|
||||
err = -EINVAL;
|
||||
@@ -9029,7 +9015,7 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
|
||||
if (!err)
|
||||
md_update_sb(mddev, 1);
|
||||
|
||||
mddev_unlock(mddev);
|
||||
mddev_unlock_and_resume(mddev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user