net/mlx5e: Fix potential deadlock by deferring RX timeout recovery
mlx5e_reporter_rx_timeout() is currently invoked synchronously
in the driver's open error flow. This causes the thread holding
priv->state_lock to attempt acquiring the devlink lock, which
can result in a circular dependency with other devlink operations.
For example:
- Devlink health diagnose flow:
- __devlink_nl_pre_doit() acquires the devlink lock.
- devlink_nl_health_reporter_diagnose_doit() invokes the
driver's diagnose callback.
- mlx5e_rx_reporter_diagnose() then attempts to acquire
priv->state_lock.
- Driver open flow:
- mlx5e_open() acquires priv->state_lock.
- If an error occurs, devlink_health_reporter may be called,
attempting to acquire the devlink lock.
To prevent this circular locking scenario, defer the RX timeout
recovery by scheduling it via a workqueue. This ensures that the
recovery work acquires locks in a consistent order: first the
devlink lock, then priv->state_lock.
Additionally, make the recovery work acquire the netdev instance
lock to safely synchronize with the open/close channel flows,
similar to mlx5e_tx_timeout_work. Repeatedly attempt to acquire
the netdev instance lock until it is taken or the target RQ is no
longer active, as indicated by the MLX5E_STATE_CHANNELS_ACTIVE bit.
Fixes: 32c57fb268 ("net/mlx5e: Report and recover from rx timeout")
Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1753256672-337784-4-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
committed by
Jakub Kicinski
parent
6d19c44b5c
commit
e80d655615
@@ -728,6 +728,7 @@ struct mlx5e_rq {
|
||||
struct xsk_buff_pool *xsk_pool;
|
||||
|
||||
struct work_struct recover_work;
|
||||
struct work_struct rx_timeout_work;
|
||||
|
||||
/* control */
|
||||
struct mlx5_wq_ctrl wq_ctrl;
|
||||
|
||||
@@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
|
||||
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
|
||||
{
|
||||
struct mlx5_eq_comp *eq;
|
||||
struct mlx5e_priv *priv;
|
||||
struct mlx5e_rq *rq;
|
||||
int err;
|
||||
|
||||
rq = ctx;
|
||||
priv = rq->priv;
|
||||
|
||||
mutex_lock(&priv->state_lock);
|
||||
|
||||
eq = rq->cq.mcq.eq;
|
||||
|
||||
err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
|
||||
if (err && rq->icosq)
|
||||
clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
|
||||
|
||||
mutex_unlock(&priv->state_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -707,6 +707,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
|
||||
mlx5e_reporter_rq_cqe_err(rq);
|
||||
}
|
||||
|
||||
static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
|
||||
{
|
||||
struct mlx5e_rq *rq = container_of(timeout_work,
|
||||
struct mlx5e_rq,
|
||||
rx_timeout_work);
|
||||
|
||||
/* Acquire netdev instance lock to synchronize with channel close and
|
||||
* reopen flows. Either successfully obtain the lock, or detect that
|
||||
* channels are closing for another reason, making this work no longer
|
||||
* necessary.
|
||||
*/
|
||||
while (!netdev_trylock(rq->netdev)) {
|
||||
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
|
||||
return;
|
||||
msleep(20);
|
||||
}
|
||||
|
||||
mlx5e_reporter_rx_timeout(rq);
|
||||
netdev_unlock(rq->netdev);
|
||||
}
|
||||
|
||||
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
|
||||
{
|
||||
rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
|
||||
@@ -830,6 +851,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
||||
|
||||
rqp->wq.db_numa_node = node;
|
||||
INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
|
||||
INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work);
|
||||
|
||||
if (params->xdp_prog)
|
||||
bpf_prog_inc(params->xdp_prog);
|
||||
@@ -1204,7 +1226,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
|
||||
netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
|
||||
rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
|
||||
|
||||
mlx5e_reporter_rx_timeout(rq);
|
||||
queue_work(rq->priv->wq, &rq->rx_timeout_work);
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
@@ -1375,6 +1398,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq)
|
||||
if (rq->dim)
|
||||
cancel_work_sync(&rq->dim->work);
|
||||
cancel_work_sync(&rq->recover_work);
|
||||
cancel_work_sync(&rq->rx_timeout_work);
|
||||
mlx5e_destroy_rq(rq);
|
||||
mlx5e_free_rx_descs(rq);
|
||||
mlx5e_free_rq(rq);
|
||||
|
||||
Reference in New Issue
Block a user