bcachefs: Start copygc, rebalance threads earlier

Previously, copygc and rebalance weren't started until the very end of
mounting, after all recvoery passes have finished.

But copygc really should be started earlier, since it may be needed for
allocations to make forward progress. Additionally, we've been seeing
occasional bug reports where starting the kthread fails due to a pending
signal - i.e. we're getting timed out by systemd (during a version
upgrade), but we're not seeing the signal until mount is about to
complete.

Additionally, we now have copygc/rebalance explicitly wait for
check_snapshots to complete (if being run); they require that for
snapshot_is_ancestor() in the data move path.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet
2025-04-13 06:44:23 -04:00
parent d64e8e842b
commit 387df33129
5 changed files with 39 additions and 36 deletions
+7
View File
@@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),
+7
View File
@@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg)
set_freezable();
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
writepoint_ptr(&c->rebalance_write_point),
true);
+4
View File
@@ -18,6 +18,7 @@
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
#include "movinggc.h"
#include "namei.h"
#include "quota.h"
#include "rebalance.h"
@@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c)
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
if (ret)
+7
View File
@@ -266,6 +266,7 @@ int bch2_run_recovery_passes(struct bch_fs *c)
spin_lock_irq(&c->recovery_pass_lock);
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
unsigned prev_done = c->recovery_pass_done;
unsigned pass = c->curr_recovery_pass;
c->next_recovery_pass = pass + 1;
@@ -299,6 +300,12 @@ int bch2_run_recovery_passes(struct bch_fs *c)
}
c->curr_recovery_pass = c->next_recovery_pass;
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
}
}
spin_unlock_irq(&c->recovery_pass_lock);
+14 -36
View File
@@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
return ret;
}
static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*
* Ideally we'd start copygc/rebalance earlier instead of waiting for
* all of recovery/fsck to complete:
*/
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
return ret;
}
return 0;
}
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
int ret;
@@ -503,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
atomic_long_inc(&c->writes[i]);
}
#endif
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
goto err;
ret = bch2_copygc_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting copygc thread");
goto err;
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting rebalance thread");
goto err;
}
bch2_do_discards(c);
@@ -1082,13 +1063,10 @@ int bch2_fs_start(struct bch_fs *c)
wake_up(&c->ro_ref_wait);
down_write(&c->state_lock);
if (c->opts.read_only) {
if (c->opts.read_only)
bch2_fs_read_only(c);
} else {
ret = !test_bit(BCH_FS_rw, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c);
}
else if (!test_bit(BCH_FS_rw, &c->flags))
ret = bch2_fs_read_write(c);
up_write(&c->state_lock);
err: