Merge branch 'symmetric-or-xor-rss-hash'

Gal Pressman says:

====================
Symmetric OR-XOR RSS hash

Add support for a new type of input_xfrm: Symmetric OR-XOR.
Symmetric OR-XOR performs hash as follows:
(SRC_IP | DST_IP, SRC_IP ^ DST_IP, SRC_PORT | DST_PORT, SRC_PORT ^ DST_PORT)

Configuration is done through ethtool -x/X command.
For mlx5, the default is already symmetric hash, this patch now exposes
this to userspace and allows enabling/disabling of the feature.

v5: https://lore.kernel.org/20250220113435.417487-1-gal@nvidia.com
v4: https://lore.kernel.org/20250216182453.226325-1-gal@nvidia.com
v3: https://lore.kernel.org/20250205135341.542720-1-gal@nvidia.com
v2: https://lore.kernel.org/20250203150039.519301-1-gal@nvidia.com
====================

Link: https://patch.msgid.link/20250224174416.499070-1-gal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-02-25 18:31:07 -08:00
17 changed files with 155 additions and 41 deletions
+1 -1
View File
@@ -1934,7 +1934,7 @@ ETHTOOL_A_RSS_INDIR attribute returns RSS indirection table where each byte
indicates queue number.
ETHTOOL_A_RSS_INPUT_XFRM attribute is a bitmap indicating the type of
transformation applied to the input protocol fields before given to the RSS
hfunc. Current supported option is symmetric-xor.
hfunc. Current supported options are symmetric-xor and symmetric-or-xor.
PLCA_GET_CFG
============
+11 -4
View File
@@ -49,14 +49,21 @@ destination address) and TCP/UDP (source port, destination port) tuples
are swapped, the computed hash is the same. This is beneficial in some
applications that monitor TCP/IP flows (IDS, firewalls, ...etc) and need
both directions of the flow to land on the same Rx queue (and CPU). The
"Symmetric-XOR" is a type of RSS algorithms that achieves this hash
symmetry by XORing the input source and destination fields of the IP
and/or L4 protocols. This, however, results in reduced input entropy and
could potentially be exploited. Specifically, the algorithm XORs the input
"Symmetric-XOR" and "Symmetric-OR-XOR" are types of RSS algorithms that
achieve this hash symmetry by XOR/ORing the input source and destination
fields of the IP and/or L4 protocols. This, however, results in reduced
input entropy and could potentially be exploited.
Specifically, the "Symmetric-XOR" algorithm XORs the input
as follows::
# (SRC_IP ^ DST_IP, SRC_IP ^ DST_IP, SRC_PORT ^ DST_PORT, SRC_PORT ^ DST_PORT)
The "Symmetric-OR-XOR" algorithm, on the other hand, transforms the input as
follows::
# (SRC_IP | DST_IP, SRC_IP ^ DST_IP, SRC_PORT | DST_PORT, SRC_PORT ^ DST_PORT)
The result is then fed to the underlying RSS algorithm.
Some advanced NICs allow steering packets to queues based on
@@ -1808,7 +1808,7 @@ static int iavf_set_rxfh(struct net_device *netdev,
static const struct ethtool_ops iavf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE,
.cap_rss_sym_xor_supported = true,
.supported_input_xfrm = RXH_XFRM_SYM_XOR,
.get_drvinfo = iavf_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = iavf_get_ringparam,
+1 -1
View File
@@ -4770,7 +4770,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE |
ETHTOOL_COALESCE_RX_USECS_HIGH,
.cap_rss_sym_xor_supported = true,
.supported_input_xfrm = RXH_XFRM_SYM_XOR,
.rxfh_per_ctx_key = true,
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
@@ -161,6 +161,7 @@ static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
{
enum mlx5_traffic_types tt;
rss->hash.symmetric = true;
rss->hash.hfunc = ETH_RSS_HASH_TOP;
netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
sizeof(rss->hash.toeplitz_hash_key));
@@ -566,7 +567,7 @@ inner_tir:
return final_err;
}
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric)
{
if (indir)
memcpy(indir, rss->indir.table,
@@ -579,11 +580,14 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
if (hfunc)
*hfunc = rss->hash.hfunc;
if (symmetric)
*symmetric = rss->hash.symmetric;
return 0;
}
int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
const u8 *key, const u8 *hfunc,
const u8 *key, const u8 *hfunc, const bool *symmetric,
u32 *rqns, u32 *vhca_ids, unsigned int num_rqns)
{
bool changed_indir = false;
@@ -623,6 +627,11 @@ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
rss->indir.actual_table_size * sizeof(*rss->indir.table));
}
if (symmetric) {
rss->hash.symmetric = *symmetric;
changed_hash = true;
}
if (changed_indir && rss->enabled) {
err = mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns);
if (err) {
@@ -47,9 +47,9 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss);
int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
struct mlx5e_packet_merge_param *pkt_merge_param);
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, bool *symmetric);
int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
const u8 *key, const u8 *hfunc,
const u8 *key, const u8 *hfunc, const bool *symmetric,
u32 *rqns, u32 *vhca_ids, unsigned int num_rqns);
struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
@@ -194,7 +194,7 @@ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int n
}
int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
u32 *indir, u8 *key, u8 *hfunc)
u32 *indir, u8 *key, u8 *hfunc, bool *symmetric)
{
struct mlx5e_rss *rss;
@@ -205,11 +205,12 @@ int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
if (!rss)
return -ENOENT;
return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
return mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric);
}
int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
const u32 *indir, const u8 *key, const u8 *hfunc)
const u32 *indir, const u8 *key, const u8 *hfunc,
const bool *symmetric)
{
u32 *vhca_ids = get_vhca_ids(res, 0);
struct mlx5e_rss *rss;
@@ -221,8 +222,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
if (!rss)
return -ENOENT;
return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, vhca_ids,
res->rss_nch);
return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, symmetric,
res->rss_rqns, vhca_ids, res->rss_nch);
}
int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx,
@@ -49,9 +49,10 @@ void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *ch
/* Configuration API */
void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
u32 *indir, u8 *key, u8 *hfunc);
u32 *indir, u8 *key, u8 *hfunc, bool *symmetric);
int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
const u32 *indir, const u8 *key, const u8 *hfunc);
const u32 *indir, const u8 *key, const u8 *hfunc,
const bool *symmetric);
int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx,
enum mlx5_traffic_types tt);
@@ -124,7 +124,7 @@ void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
MLX5_SET(tirc, tirc, rx_hash_symmetric, rss_hash->symmetric);
memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
}
@@ -9,6 +9,7 @@
struct mlx5e_rss_params_hash {
u8 hfunc;
u8 toeplitz_hash_key[40];
bool symmetric;
};
struct mlx5e_rss_params_traffic_type {
@@ -1456,18 +1456,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *
{
struct mlx5e_priv *priv = netdev_priv(netdev);
u32 rss_context = rxfh->rss_context;
bool symmetric;
int err;
mutex_lock(&priv->state_lock);
err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context,
rxfh->indir, rxfh->key, &rxfh->hfunc);
rxfh->indir, rxfh->key, &rxfh->hfunc, &symmetric);
mutex_unlock(&priv->state_lock);
return err;
if (err)
return err;
if (symmetric)
rxfh->input_xfrm = RXH_XFRM_SYM_OR_XOR;
return 0;
}
static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
struct netlink_ext_ack *extack)
{
bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR;
struct mlx5e_priv *priv = netdev_priv(dev);
u32 *rss_context = &rxfh->rss_context;
u8 hfunc = rxfh->hfunc;
@@ -1502,7 +1511,8 @@ static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxf
err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context,
rxfh->indir, rxfh->key,
hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc,
rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric);
unlock:
mutex_unlock(&priv->state_lock);
@@ -2611,6 +2621,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
ETHTOOL_COALESCE_MAX_FRAMES |
ETHTOOL_COALESCE_USE_ADAPTIVE |
ETHTOOL_COALESCE_USE_CQE,
.supported_input_xfrm = RXH_XFRM_SYM_OR_XOR,
.get_drvinfo = mlx5e_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ext_state = mlx5e_get_link_ext_state,
+2 -3
View File
@@ -763,13 +763,12 @@ struct kernel_ethtool_ts_info {
/**
* struct ethtool_ops - optional netdev operations
* @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*.
* @cap_link_lanes_supported: indicates if the driver supports lanes
* parameter.
* @cap_rss_ctx_supported: indicates if the driver supports RSS
* contexts via legacy API, drivers implementing @create_rxfh_context
* do not have to set this bit.
* @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
* RSS.
* @rxfh_per_ctx_key: device supports setting different RSS key for each
* additional context. Netlink API should report hfunc, key, and input_xfrm
* for every context, not just context 0.
@@ -995,9 +994,9 @@ struct kernel_ethtool_ts_info {
* of the generic netdev features interface.
*/
struct ethtool_ops {
u32 supported_input_xfrm:8;
u32 cap_link_lanes_supported:1;
u32 cap_rss_ctx_supported:1;
u32 cap_rss_sym_xor_supported:1;
u32 rxfh_per_ctx_key:1;
u32 cap_rss_rxnfc_adds:1;
u32 rxfh_indir_space;
+4
View File
@@ -2289,6 +2289,10 @@ static inline int ethtool_validate_duplex(__u8 duplex)
* be exploited to reduce the RSS queue spread.
*/
#define RXH_XFRM_SYM_XOR (1 << 0)
/* Similar to SYM_XOR, except that one copy of the XOR'ed fields is replaced by
* an OR of the same fields
*/
#define RXH_XFRM_SYM_OR_XOR (1 << 1)
#define RXH_XFRM_NO_CHANGE 0xff
/* L2-L4 network traffic flow types */
+4 -4
View File
@@ -1011,11 +1011,11 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (rc)
return rc;
/* Sanity check: if symmetric-xor is set, then:
/* Sanity check: if symmetric-xor/symmetric-or-xor is set, then:
* 1 - no other fields besides IP src/dst and/or L4 src/dst
* 2 - If src is set, dst must also be set
*/
if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
if ((rxfh.input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) &&
((info.data & ~(RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3)) ||
(!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) ||
@@ -1388,11 +1388,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
/* Check input data transformation capabilities */
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
rxfh.input_xfrm != RXH_XFRM_SYM_OR_XOR &&
rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
return -EINVAL;
if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE &&
(rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
!ops->cap_rss_sym_xor_supported)
rxfh.input_xfrm & ~ops->supported_input_xfrm)
return -EOPNOTSUPP;
create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC;
@@ -15,6 +15,7 @@ TEST_PROGS = \
nic_performance.py \
pp_alloc_fail.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
#
+87
View File
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
import multiprocessing
import socket
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx, KsftFailEx
from lib.py import rand_port
def traffic(cfg, local_port, remote_port, ipver):
af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
sock = socket.socket(af_inet, socket.SOCK_DGRAM)
sock.bind(("", local_port))
sock.connect((cfg.remote_addr_v[ipver], remote_port))
tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
fd_read_timeout(sock.fileno(), 5)
return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
def test_rss_input_xfrm(cfg, ipver):
"""
Test symmetric input_xfrm.
If symmetric RSS hash is configured, send traffic twice, swapping the
src/dst UDP ports, and verify that the same queue is receiving the traffic
in both cases (IPs are constant).
"""
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
input_xfrm = cfg.ethnl.rss_get(
{'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
# Check for symmetric xor/or-xor
if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
raise KsftSkipEx("Symmetric RSS hash not requested")
cpus = set()
successful = 0
for _ in range(100):
try:
port1 = rand_port(socket.SOCK_DGRAM)
port2 = rand_port(socket.SOCK_DGRAM)
cpu1 = traffic(cfg, port1, port2, ipver)
cpu2 = traffic(cfg, port2, port1, ipver)
cpus.update([cpu1, cpu2])
ksft_eq(
cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
successful += 1
if successful == 10:
break
except:
continue
else:
raise KsftFailEx("Failed to run traffic")
ksft_ge(len(cpus), 2,
comment=f"Received traffic on less than two cpus {cpus = }")
def test_rss_input_xfrm_ipv4(cfg):
cfg.require_ipver("4")
test_rss_input_xfrm(cfg, "4")
def test_rss_input_xfrm_ipv6(cfg):
cfg.require_ipver("6")
test_rss_input_xfrm(cfg, "6")
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
cfg.ethnl = EthtoolFamily()
cfg.netdevnl = NetdevFamily()
ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
args=(cfg, ))
ksft_exit()
if __name__ == "__main__":
main()
+5 -12
View File
@@ -185,20 +185,13 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
def rand_port():
def rand_port(type=socket.SOCK_STREAM):
"""
Get a random unprivileged port, try to make sure it's not already used.
Get a random unprivileged port.
"""
for _ in range(1000):
port = random.randint(10000, 65535)
try:
with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
s.bind(("", port))
return port
except OSError as e:
if e.errno != errno.EADDRINUSE:
raise
raise Exception("Can't find any free unprivileged port")
with socket.socket(socket.AF_INET6, type) as s:
s.bind(("", 0))
return s.getsockname()[1]
def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):