Merge branch 'mlx5-hws-fixes-2025-08-17'

Mark Bloch says:

====================
mlx5 HWS fixes 2025-08-17

The following patch set focuses on hardware steering fixes
found by the team.
====================

Link: https://patch.msgid.link/20250817202323.308604-1-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-08-19 19:35:16 -07:00
11 changed files with 112 additions and 38 deletions
@@ -173,6 +173,8 @@ static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs,
memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions));
rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter);
rule_actions[0].counter.offset =
attr->counter->id - attr->counter->bulk->base_id;
/* Modify header is special, it may require extra arguments outside the action itself. */
if (mh_action->mh_data) {
rule_actions[1].modify_header.offset = mh_action->mh_data->offset;
@@ -74,9 +74,9 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher,
static int
hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
{
bool move_error = false, poll_error = false, drain_error = false;
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
struct mlx5hws_matcher *matcher = bwc_matcher->matcher;
int drain_error = 0, move_error = 0, poll_error = 0;
u16 bwc_queues = mlx5hws_bwc_queues(ctx);
struct mlx5hws_rule_attr rule_attr;
struct mlx5hws_bwc_rule *bwc_rule;
@@ -84,6 +84,7 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
struct list_head *rules_list;
u32 pending_rules;
int i, ret = 0;
bool drain;
mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr);
@@ -99,23 +100,37 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
ret = mlx5hws_matcher_resize_rule_move(matcher,
bwc_rule->rule,
&rule_attr);
if (unlikely(ret && !move_error)) {
mlx5hws_err(ctx,
"Moving BWC rule: move failed (%d), attempting to move rest of the rules\n",
ret);
move_error = true;
if (unlikely(ret)) {
if (!move_error) {
mlx5hws_err(ctx,
"Moving BWC rule: move failed (%d), attempting to move rest of the rules\n",
ret);
move_error = ret;
}
/* Rule wasn't queued, no need to poll */
continue;
}
pending_rules++;
drain = pending_rules >=
hws_bwc_get_burst_th(ctx, rule_attr.queue_id);
ret = mlx5hws_bwc_queue_poll(ctx,
rule_attr.queue_id,
&pending_rules,
false);
if (unlikely(ret && !poll_error)) {
mlx5hws_err(ctx,
"Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n",
ret);
poll_error = true;
drain);
if (unlikely(ret)) {
if (ret == -ETIMEDOUT) {
mlx5hws_err(ctx,
"Moving BWC rule: timeout polling for completions (%d), aborting rehash\n",
ret);
return ret;
}
if (!poll_error) {
mlx5hws_err(ctx,
"Moving BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n",
ret);
poll_error = ret;
}
}
}
@@ -126,17 +141,30 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
rule_attr.queue_id,
&pending_rules,
true);
if (unlikely(ret && !drain_error)) {
mlx5hws_err(ctx,
"Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n",
ret);
drain_error = true;
if (unlikely(ret)) {
if (ret == -ETIMEDOUT) {
mlx5hws_err(ctx,
"Moving bwc rule: timeout draining completions (%d), aborting rehash\n",
ret);
return ret;
}
if (!drain_error) {
mlx5hws_err(ctx,
"Moving bwc rule: drain failed (%d), attempting to move rest of the rules\n",
ret);
drain_error = ret;
}
}
}
}
if (move_error || poll_error || drain_error)
ret = -EINVAL;
/* Return the first error that happened */
if (unlikely(move_error))
return move_error;
if (unlikely(poll_error))
return poll_error;
if (unlikely(drain_error))
return drain_error;
return ret;
}
@@ -1035,6 +1063,21 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
return 0; /* rule inserted successfully */
}
/* Rule insertion could fail due to queue being full, timeout, or
* matcher in resize. In such cases, no point in trying to rehash.
*/
if (ret == -EBUSY || ret == -ETIMEDOUT || ret == -EAGAIN) {
mutex_unlock(queue_lock);
mlx5hws_err(ctx,
"BWC rule insertion failed - %s (%d)\n",
ret == -EBUSY ? "queue is full" :
ret == -ETIMEDOUT ? "timeout" :
ret == -EAGAIN ? "matcher in resize" : "N/A",
ret);
hws_bwc_rule_cnt_dec(bwc_rule);
return ret;
}
/* At this point the rule wasn't added.
* It could be because there was collision, or some other problem.
* Try rehash by size and insert rule again - last chance.
@@ -1328,11 +1328,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
{
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
struct mlx5hws_matcher *matcher = bwc_matcher->matcher;
bool move_error = false, poll_error = false;
u16 bwc_queues = mlx5hws_bwc_queues(ctx);
struct mlx5hws_bwc_rule *tmp_bwc_rule;
struct mlx5hws_rule_attr rule_attr;
struct mlx5hws_table *isolated_tbl;
int move_error = 0, poll_error = 0;
struct mlx5hws_rule *tmp_rule;
struct list_head *rules_list;
u32 expected_completions = 1;
@@ -1391,11 +1391,15 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
ret = mlx5hws_matcher_resize_rule_move(matcher,
tmp_rule,
&rule_attr);
if (unlikely(ret && !move_error)) {
mlx5hws_err(ctx,
"Moving complex BWC rule failed (%d), attempting to move rest of the rules\n",
ret);
move_error = true;
if (unlikely(ret)) {
if (!move_error) {
mlx5hws_err(ctx,
"Moving complex BWC rule: move failed (%d), attempting to move rest of the rules\n",
ret);
move_error = ret;
}
/* Rule wasn't queued, no need to poll */
continue;
}
expected_completions = 1;
@@ -1403,11 +1407,19 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
rule_attr.queue_id,
&expected_completions,
true);
if (unlikely(ret && !poll_error)) {
mlx5hws_err(ctx,
"Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n",
ret);
poll_error = true;
if (unlikely(ret)) {
if (ret == -ETIMEDOUT) {
mlx5hws_err(ctx,
"Moving complex BWC rule: timeout polling for completions (%d), aborting rehash\n",
ret);
return ret;
}
if (!poll_error) {
mlx5hws_err(ctx,
"Moving complex BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n",
ret);
poll_error = ret;
}
}
/* Done moving the rule to the new matcher,
@@ -1422,8 +1434,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
}
}
if (move_error || poll_error)
ret = -EINVAL;
/* Return the first error that happened */
if (unlikely(move_error))
return move_error;
if (unlikely(poll_error))
return poll_error;
return ret;
}
@@ -55,6 +55,7 @@ int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev,
MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
MLX5_SET(create_flow_table_in, in, table_type, ft_attr->type);
MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid);
ft_ctx = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
MLX5_SET(flow_table_context, ft_ctx, level, ft_attr->level);
@@ -36,6 +36,7 @@ struct mlx5hws_cmd_set_fte_attr {
struct mlx5hws_cmd_ft_create_attr {
u8 type;
u8 level;
u16 uid;
bool rtc_valid;
bool decap_en;
bool reformat_en;
@@ -267,6 +267,7 @@ static int mlx5_cmd_hws_create_flow_table(struct mlx5_flow_root_namespace *ns,
tbl_attr.type = MLX5HWS_TABLE_TYPE_FDB;
tbl_attr.level = ft_attr->level;
tbl_attr.uid = ft_attr->uid;
tbl = mlx5hws_table_create(ctx, &tbl_attr);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating hws flow_table\n");
@@ -85,6 +85,7 @@ static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher)
ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev,
tbl,
0,
&matcher->end_ft_id);
if (ret) {
mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n");
@@ -112,7 +113,9 @@ static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher)
if (mlx5hws_matcher_is_isolated(matcher))
ret = hws_matcher_create_end_ft_isolated(matcher);
else
ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl,
ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev,
tbl,
0,
&matcher->end_ft_id);
if (ret) {
@@ -75,6 +75,7 @@ struct mlx5hws_context_attr {
struct mlx5hws_table_attr {
enum mlx5hws_table_type type;
u32 level;
u16 uid;
};
enum mlx5hws_matcher_flow_src {
@@ -964,7 +964,6 @@ static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev,
return -ENOMEM;
MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries);
MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries));
err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq);
@@ -9,6 +9,7 @@ u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl)
}
static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl,
u16 uid,
struct mlx5hws_cmd_ft_create_attr *ft_attr)
{
ft_attr->type = tbl->fw_ft_type;
@@ -16,7 +17,9 @@ static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl,
ft_attr->level = tbl->ctx->caps->fdb_ft.max_level - 1;
else
ft_attr->level = tbl->ctx->caps->nic_ft.max_level - 1;
ft_attr->rtc_valid = true;
ft_attr->uid = uid;
}
static void hws_table_set_cap_attr(struct mlx5hws_table *tbl,
@@ -119,12 +122,12 @@ static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32
int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev,
struct mlx5hws_table *tbl,
u32 *ft_id)
u16 uid, u32 *ft_id)
{
struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
int ret;
hws_table_init_next_ft_attr(tbl, &ft_attr);
hws_table_init_next_ft_attr(tbl, uid, &ft_attr);
hws_table_set_cap_attr(tbl, &ft_attr);
ret = mlx5hws_cmd_flow_table_create(mdev, &ft_attr, ft_id);
@@ -189,7 +192,10 @@ static int hws_table_init(struct mlx5hws_table *tbl)
}
mutex_lock(&ctx->ctrl_lock);
ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &tbl->ft_id);
ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev,
tbl,
tbl->uid,
&tbl->ft_id);
if (ret) {
mlx5hws_err(tbl->ctx, "Failed to create flow table object\n");
mutex_unlock(&ctx->ctrl_lock);
@@ -239,6 +245,7 @@ struct mlx5hws_table *mlx5hws_table_create(struct mlx5hws_context *ctx,
tbl->ctx = ctx;
tbl->type = attr->type;
tbl->level = attr->level;
tbl->uid = attr->uid;
ret = hws_table_init(tbl);
if (ret) {
@@ -18,6 +18,7 @@ struct mlx5hws_table {
enum mlx5hws_table_type type;
u32 fw_ft_type;
u32 level;
u16 uid;
struct list_head matchers_list;
struct list_head tbl_list_node;
struct mlx5hws_default_miss default_miss;
@@ -47,7 +48,7 @@ u32 mlx5hws_table_get_res_fw_ft_type(enum mlx5hws_table_type tbl_type,
int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev,
struct mlx5hws_table *tbl,
u32 *ft_id);
u16 uid, u32 *ft_id);
void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl,
u32 ft_id);