Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2025-05-30 (ice, idpf)

For ice:
Michal resolves XDP issues related to Tx scheduler configuration with
large number of Tx queues.

Additional information:
https://lore.kernel.org/intel-wired-lan/20250513105529.241745-1-michal.kubiak@intel.com/

For idpf:
Brian Vazquez updates netif_subqueue_maybe_stop() condition check to
prevent possible races.

Emil shuts down virtchannel mailbox during reset to reduce timeout
delays as it's unavailable during that time.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  idpf: avoid mailbox timeout delays during reset
  idpf: fix a race in txq wakeup
  ice: fix rebuilding the Tx scheduler tree for large queue counts
  ice: create new Tx scheduler nodes for new queues only
  ice: fix Tx scheduler error handling in XDP callback
====================

Link: https://patch.msgid.link/20250530211221.2170484-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-06-02 18:44:37 -07:00
8 changed files with 218 additions and 93 deletions
+33 -14
View File
@@ -2740,6 +2740,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi)
}
}
/**
* ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors
* @vsi: the VSI with XDP rings being unmapped
*/
static void ice_unmap_xdp_rings(struct ice_vsi *vsi)
{
int v_idx;
ice_for_each_q_vector(vsi, v_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
struct ice_tx_ring *ring;
ice_for_each_tx_ring(ring, q_vector->tx)
if (!ring->tx_buf || !ice_ring_is_xdp(ring))
break;
/* restore the value of last node prior to XDP setup */
q_vector->tx.tx_ring = ring;
}
}
/**
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
@@ -2803,7 +2824,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
if (status) {
dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
status);
goto clear_xdp_rings;
goto unmap_xdp_rings;
}
/* assign the prog only when it's not already present on VSI;
@@ -2819,6 +2840,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
ice_vsi_assign_bpf_prog(vsi, prog);
return 0;
unmap_xdp_rings:
ice_unmap_xdp_rings(vsi);
clear_xdp_rings:
ice_for_each_xdp_txq(vsi, i)
if (vsi->xdp_rings[i]) {
@@ -2835,6 +2858,8 @@ err_map_xdp:
mutex_unlock(&pf->avail_q_mutex);
devm_kfree(dev, vsi->xdp_rings);
vsi->xdp_rings = NULL;
return -ENOMEM;
}
@@ -2850,7 +2875,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_pf *pf = vsi->back;
int i, v_idx;
int i;
/* q_vectors are freed in reset path so there's no point in detaching
* rings
@@ -2858,17 +2883,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
if (cfg_type == ICE_XDP_CFG_PART)
goto free_qmap;
ice_for_each_q_vector(vsi, v_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
struct ice_tx_ring *ring;
ice_for_each_tx_ring(ring, q_vector->tx)
if (!ring->tx_buf || !ice_ring_is_xdp(ring))
break;
/* restore the value of last node prior to XDP setup */
q_vector->tx.tx_ring = ring;
}
ice_unmap_xdp_rings(vsi);
free_qmap:
mutex_lock(&pf->avail_q_mutex);
@@ -3013,11 +3028,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
goto resume_if;
} else {
xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
ICE_XDP_CFG_FULL);
if (xdp_ring_err)
if (xdp_ring_err) {
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
goto resume_if;
}
}
xdp_features_set_redirect_target(vsi->netdev, true);
/* reallocate Rx queues that are used for zero-copy */
@@ -3035,6 +3053,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
}
resume_if:
if (if_running)
ret = ice_up(vsi);
+148 -33
View File
@@ -84,6 +84,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
return NULL;
}
/**
* ice_sched_find_next_vsi_node - find the next node for a given VSI
* @vsi_node: VSI support node to start search with
*
* Return: Next VSI support node, or NULL.
*
* The function returns a pointer to the next node from the VSI layer
* assigned to the given VSI, or NULL if there is no such a node.
*/
static struct ice_sched_node *
ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node)
{
unsigned int vsi_handle = vsi_node->vsi_handle;
while ((vsi_node = vsi_node->sibling) != NULL)
if (vsi_node->vsi_handle == vsi_handle)
break;
return vsi_node;
}
/**
* ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
* @hw: pointer to the HW struct
@@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
if (parent->num_children < max_child_nodes) {
new_num_nodes = max_child_nodes - parent->num_children;
} else {
/* This parent is full, try the next sibling */
parent = parent->sibling;
/* This parent is full,
* try the next available sibling.
*/
parent = ice_sched_find_next_vsi_node(parent);
/* Don't modify the first node TEID memory if the
* first node was added already in the above call.
* Instead send some temp memory for all other
@@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
/* get the first queue group node from VSI sub-tree */
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
while (qgrp_node) {
struct ice_sched_node *next_vsi_node;
/* make sure the qgroup node is part of the VSI subtree */
if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
if (qgrp_node->num_children < max_children &&
qgrp_node->owner == owner)
break;
qgrp_node = qgrp_node->sibling;
if (qgrp_node)
continue;
next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
if (!next_vsi_node)
break;
vsi_node = next_vsi_node;
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
}
/* Select the best queue group */
@@ -1604,16 +1638,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
/**
* ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
* @hw: pointer to the HW struct
* @num_qs: number of queues
* @num_new_qs: number of new queues that will be added to the tree
* @num_nodes: num nodes array
*
* This function calculates the number of VSI child nodes based on the
* number of queues.
*/
static void
ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes)
{
u16 num = num_qs;
u16 num = num_new_qs;
u8 i, qgl, vsil;
qgl = ice_sched_get_qgrp_layer(hw);
@@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
if (!parent)
return -EIO;
if (i == vsil)
/* Do not modify the VSI handle for already existing VSI nodes,
* (if no new VSI node was added to the tree).
* Assign the VSI handle only to newly added VSI nodes.
*/
if (i == vsil && num_added)
parent->vsi_handle = vsi_handle;
}
@@ -1812,6 +1850,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
num_nodes);
}
/**
* ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count
* @hw: pointer to the HW struct
* @vsi_node: pointer to the leftmost VSI node that needs to be extended
* @new_numqs: new number of queues that has to be handled by the VSI
* @new_num_nodes: pointer to nodes count table to modify the VSI layer entry
*
* This function recalculates the number of supported nodes that need to
* be added after adding more Tx queues for a given VSI.
* The number of new VSI support nodes that shall be added will be saved
* to the @new_num_nodes table for the VSI layer.
*/
static void
ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw,
struct ice_sched_node *vsi_node,
unsigned int new_numqs, u16 *new_num_nodes)
{
u32 vsi_nodes_cnt = 1;
u32 max_queue_cnt = 1;
u32 qgl, vsil;
qgl = ice_sched_get_qgrp_layer(hw);
vsil = ice_sched_get_vsi_layer(hw);
for (u32 i = vsil; i <= qgl; i++)
max_queue_cnt *= hw->max_children[i];
while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL)
vsi_nodes_cnt++;
if (new_numqs > (max_queue_cnt * vsi_nodes_cnt))
new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) -
vsi_nodes_cnt;
}
/**
* ice_sched_update_vsi_child_nodes - update VSI child nodes
* @pi: port information structure
@@ -1863,15 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
return status;
}
if (new_numqs)
ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
/* Keep the max number of queue configuration all the time. Update the
* tree only if number of queues > previous number of queues. This may
ice_sched_recalc_vsi_support_nodes(hw, vsi_node,
new_numqs, new_num_nodes);
ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs,
new_num_nodes);
/* Never decrease the number of queues in the tree. Update the tree
* only if number of queues > previous number of queues. This may
* leave some extra nodes in the tree if number of queues < previous
* number but that wouldn't harm anything. Removing those extra nodes
* may complicate the code if those nodes are part of SRL or
* individually rate limited.
* Also, add the required VSI support nodes if the existing ones cannot
* handle the requested new number of queues.
*/
status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
new_num_nodes);
if (status)
return status;
status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
new_num_nodes, owner);
if (status)
@@ -2012,6 +2095,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
}
/**
* ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI
* @pi: port information structure
* @vsi_node: pointer to the leftmost node of the VSI to be removed
* @owner: LAN or RDMA
* @tc: TC number
*
* Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC.
*
* This function removes all the VSI support nodes associated with a given VSI
* and its LAN or RDMA children nodes from the scheduler tree.
*/
static int
ice_sched_rm_vsi_subtree(struct ice_port_info *pi,
struct ice_sched_node *vsi_node, u8 owner, u8 tc)
{
u16 vsi_handle = vsi_node->vsi_handle;
bool all_vsi_nodes_removed = true;
int j = 0;
while (vsi_node) {
struct ice_sched_node *next_vsi_node;
if (ice_sched_is_leaf_node_present(vsi_node)) {
ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc);
return -EBUSY;
}
while (j < vsi_node->num_children) {
if (vsi_node->children[j]->owner == owner)
ice_free_sched_node(pi, vsi_node->children[j]);
else
j++;
}
next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
/* remove the VSI if it has no children */
if (!vsi_node->num_children)
ice_free_sched_node(pi, vsi_node);
else
all_vsi_nodes_removed = false;
vsi_node = next_vsi_node;
}
/* clean up aggregator related VSI info if any */
if (all_vsi_nodes_removed)
ice_sched_rm_agg_vsi_info(pi, vsi_handle);
return 0;
}
/**
* ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
* @pi: port information structure
@@ -2038,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
ice_for_each_traffic_class(i) {
struct ice_sched_node *vsi_node, *tc_node;
u8 j = 0;
tc_node = ice_sched_get_tc_node(pi, i);
if (!tc_node)
@@ -2048,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
if (!vsi_node)
continue;
if (ice_sched_is_leaf_node_present(vsi_node)) {
ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
status = -EBUSY;
status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i);
if (status)
goto exit_sched_rm_vsi_cfg;
}
while (j < vsi_node->num_children) {
if (vsi_node->children[j]->owner == owner) {
ice_free_sched_node(pi, vsi_node->children[j]);
/* reset the counter again since the num
* children will be updated after node removal
*/
j = 0;
} else {
j++;
}
}
/* remove the VSI if it has no children */
if (!vsi_node->num_children) {
ice_free_sched_node(pi, vsi_node);
vsi_ctx->sched.vsi_node[i] = NULL;
vsi_ctx->sched.vsi_node[i] = NULL;
/* clean up aggregator related VSI info if any */
ice_sched_rm_agg_vsi_info(pi, vsi_handle);
}
if (owner == ICE_SCHED_NODE_OWNER_LAN)
vsi_ctx->sched.max_lanq[i] = 0;
else
+13 -5
View File
@@ -1801,11 +1801,19 @@ void idpf_vc_event_task(struct work_struct *work)
if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
return;
if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
idpf_init_hard_reset(adapter);
}
if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags))
goto func_reset;
if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags))
goto drv_load;
return;
func_reset:
idpf_vc_xn_shutdown(adapter->vcxn_mngr);
drv_load:
set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
idpf_init_hard_reset(adapter);
}
/**
@@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
{
struct idpf_tx_offload_params offload = { };
struct idpf_tx_buf *first;
int csum, tso, needed;
unsigned int count;
__be16 protocol;
int csum, tso;
count = idpf_tx_desc_count_required(tx_q, skb);
if (unlikely(!count))
return idpf_tx_drop_skb(tx_q, skb);
if (idpf_tx_maybe_stop_common(tx_q,
count + IDPF_TX_DESCS_PER_CACHE_LINE +
IDPF_TX_DESCS_FOR_CTX)) {
needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX;
if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
IDPF_DESC_UNUSED(tx_q),
needed, needed)) {
idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
u64_stats_update_begin(&tx_q->stats_sync);
+17 -28
View File
@@ -2184,6 +2184,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
}
/* Global conditions to tell whether the txq (and related resources)
* has room to allow the use of "size" descriptors.
*/
static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size)
{
if (IDPF_DESC_UNUSED(tx_q) < size ||
IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) ||
IDPF_TX_BUF_RSV_LOW(tx_q))
return 0;
return 1;
}
/**
* idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
* @tx_q: the queue to be checked
@@ -2194,29 +2207,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
unsigned int descs_needed)
{
if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
goto out;
if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
idpf_txq_has_room(tx_q, descs_needed),
1, 1))
return 0;
/* If there are too many outstanding completions expected on the
* completion queue, stop the TX queue to give the device some time to
* catch up
*/
if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
goto splitq_stop;
/* Also check for available book keeping buffers; if we are low, stop
* the queue to wait for more completions
*/
if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
goto splitq_stop;
return 0;
splitq_stop:
netif_stop_subqueue(tx_q->netdev, tx_q->idx);
out:
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.q_busy);
u64_stats_update_end(&tx_q->stats_sync);
@@ -2242,12 +2237,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
tx_q->next_to_use = val;
if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.q_busy);
u64_stats_update_end(&tx_q->stats_sync);
}
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
@@ -1049,12 +1049,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
u16 cleaned_count);
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
u32 needed)
{
return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
IDPF_DESC_UNUSED(tx_q),
needed, needed);
}
#endif /* !_IDPF_TXRX_H_ */
@@ -347,7 +347,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr)
* All waiting threads will be woken-up and their transaction aborted. Further
* operations on that object will fail.
*/
static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr)
{
int i;
@@ -150,5 +150,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport);
int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr);
#endif /* _IDPF_VIRTCHNL_H_ */