Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Several mlx5 bugs, crashers, and reports:

   - Limit stack usage

   - Fix mis-use of __xa_store/erase() without holding the lock to a
     locked version

   - Rate limit prints in the gid cache error cases

   - Fully initialize the event object before making it globally visible
     in an xarray

   - Fix deadlock inside the ODP code if the MMU notifier was called
     from a reclaim context

   - Include missed counters for some switchdev configurations and
     mulit-port MPV mode

   - Fix loopback packet support when in mulit-port MPV mode"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/mlx5: Fix vport loopback for MPV device
  RDMA/mlx5: Fix CC counters query for MPV
  RDMA/mlx5: Fix HW counters query for non-representor devices
  IB/core: Annotate umem_mutex acquisition under fs_reclaim for lockdep
  IB/mlx5: Fix potential deadlock in MR deregistration
  RDMA/mlx5: Initialize obj_event->obj_sub_list before xa_insert
  RDMA/core: Rate limit GID cache warning messages
  RDMA/mlx5: Fix unsafe xarray access in implicit ODP handling
  RDMA/mlx5: reduce stack usage in mlx5_ib_ufile_hw_cleanup
This commit is contained in:
Linus Torvalds
2025-07-02 09:17:40 -07:00
7 changed files with 107 additions and 24 deletions
+2 -2
View File
@@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
out_unlock:
mutex_unlock(&table->lock);
if (ret)
pr_warn("%s: unable to add gid %pI6 error=%d\n",
__func__, gid->raw, ret);
pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
__func__, gid->raw, ret);
return ret;
}
+11
View File
@@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
end = ALIGN(end, page_size);
if (unlikely(end < page_size))
return -EOVERFLOW;
/*
* The mmu notifier can be called within reclaim contexts and takes the
* umem_mutex. This is rare to trigger in testing, teach lockdep about
* it.
*/
if (IS_ENABLED(CONFIG_LOCKDEP)) {
fs_reclaim_acquire(GFP_KERNEL);
mutex_lock(&umem_odp->umem_mutex);
mutex_unlock(&umem_odp->umem_mutex);
fs_reclaim_release(GFP_KERNEL);
}
nr_entries = (end - start) >> PAGE_SHIFT;
if (!(nr_entries * PAGE_SIZE / page_size))
+2 -2
View File
@@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
+8 -2
View File
@@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
@@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
struct mlx5_async_cmd *async_cmd;
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
int head = 0;
int tail = 0;
async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
if (!async_cmd)
return;
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
@@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
kfree(async_cmd);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+33
View File
@@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave)
{
int err;
err = mlx5_nic_vport_update_local_lb(master, true);
if (err)
return err;
err = mlx5_nic_vport_update_local_lb(slave, true);
if (err)
goto out;
return 0;
out:
mlx5_nic_vport_update_local_lb(master, false);
return err;
}
static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave)
{
mlx5_nic_vport_update_local_lb(slave, false);
mlx5_nic_vport_update_local_lb(master, false);
}
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
if (err)
goto unbind;
return true;
unbind:
+47 -14
View File
@@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
!to_ib_umem_dmabuf(mr->umem)->pinned;
bool from_cache = !!ent;
int ret = 0;
!to_ib_umem_dmabuf(mr->umem)->pinned;
bool is_odp = is_odp_mr(mr);
int ret;
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
NULL);
if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
ret = mlx5r_umr_revoke_mr(mr);
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
}
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
dma_resv_unlock(
to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
return ret;
}
static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
{
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
!to_ib_umem_dmabuf(mr->umem)->pinned;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
bool is_odp = is_odp_mr(mr);
bool from_cache = !!ent;
int ret;
if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
!cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
@@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);
goto out;
return 0;
}
if (ent) {
@@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
NULL);
ret = destroy_mkey(dev, mr);
out:
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2075,9 +2108,9 @@ out:
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
dma_resv_unlock(
to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
return ret;
}
@@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
rc = mlx5_revoke_mr(mr);
rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
+4 -4
View File
@@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
}
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
__xa_erase(&mr_to_mdev(mr)->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key));
xa_erase(&mr_to_mdev(mr)->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL);
ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
&mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
__xa_erase(&imr->implicit_children, idx);