From 88e47c93b3a2a45b55f03f238b68be826917bb2e Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:17 +0000 Subject: [PATCH 1/8] net: devmem: move list_add to net_devmem_bind_dmabuf. It's annoying for the list_add to be outside net_devmem_bind_dmabuf, but the list_del is in net_devmem_unbind_dmabuf. Make it consistent by having both the list_add/del be inside the net_devmem_[un]bind_dmabuf. Cc: ap420073@gmail.com Signed-off-by: Mina Almasry Tested-by: Taehee Yoo Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 5 ++++- net/core/devmem.h | 5 ++++- net/core/netdev-genl.c | 8 ++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 0dba26baae18..b3a62ca0df65 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -178,7 +178,8 @@ err_close_rxq: struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, enum dma_data_direction direction, - unsigned int dmabuf_fd, struct netlink_ext_ack *extack) + unsigned int dmabuf_fd, struct netdev_nl_sock *priv, + struct netlink_ext_ack *extack) { struct net_devmem_dmabuf_binding *binding; static u32 id_alloc_next; @@ -299,6 +300,8 @@ net_devmem_bind_dmabuf(struct net_device *dev, if (err < 0) goto err_free_chunks; + list_add(&binding->list, &priv->bindings); + return binding; err_free_chunks: diff --git a/net/core/devmem.h b/net/core/devmem.h index 58d8d3c1b945..e7ba77050b8f 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -11,6 +11,7 @@ #define _NET_DEVMEM_H #include +#include struct netlink_ext_ack; @@ -82,7 +83,8 @@ void __net_devmem_dmabuf_binding_free(struct work_struct *wq); struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, enum dma_data_direction direction, - unsigned int dmabuf_fd, struct netlink_ext_ack *extack); + unsigned int dmabuf_fd, struct netdev_nl_sock *priv, + struct netlink_ext_ack *extack); struct net_devmem_dmabuf_binding *net_devmem_lookup_dmabuf(u32 id); void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, @@ -170,6 +172,7 @@ static inline void __net_devmem_dmabuf_binding_free(struct work_struct *wq) static inline struct net_devmem_dmabuf_binding * net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, enum dma_data_direction direction, + struct netdev_nl_sock *priv, struct netlink_ext_ack *extack) { return ERR_PTR(-EOPNOTSUPP); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 762570dcda61..2afa7b2141aa 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -908,7 +908,7 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) } binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd, - info->extack); + priv, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock; @@ -943,8 +943,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unbind; } - list_add(&binding->list, &priv->bindings); - nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); @@ -1020,15 +1018,13 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_netdev; } - binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, + binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock_netdev; } - list_add(&binding->list, &priv->bindings); - nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); From 170ebc60b79a5745addf7b408bce9b86bbd7c29d Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:18 +0000 Subject: [PATCH 2/8] page_pool: fix ugly page_pool formatting Minor cleanup; this line is badly formatted. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 974f3eef2efa..4011eb305cee 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -867,8 +867,8 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, if (!allow_direct) allow_direct = page_pool_napi_local(pool); - netmem = - __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct); + netmem = __page_pool_put_page(pool, netmem, dma_sync_size, + allow_direct); if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); From 85cea17d15c9d6c54d6dfe27e7246337eb6a025d Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:19 +0000 Subject: [PATCH 3/8] net: devmem: preserve sockc_err Preserve the error code returned by sock_cmsg_send and return that on err. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7b6ab41b496..f64f8276a73c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1067,7 +1067,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; int process_backlog = 0; - bool sockc_valid = true; + int sockc_err = 0; int zc = 0; long timeo; @@ -1075,13 +1075,10 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) }; if (msg->msg_controllen) { - err = sock_cmsg_send(sk, msg, &sockc); - if (unlikely(err)) - /* Don't return error until MSG_FASTOPEN has been - * processed; that may succeed even if the cmsg is - * invalid. - */ - sockc_valid = false; + sockc_err = sock_cmsg_send(sk, msg, &sockc); + /* Don't return error until MSG_FASTOPEN has been processed; + * that may succeed even if the cmsg is invalid. + */ } if ((flags & MSG_ZEROCOPY) && size) { @@ -1092,7 +1089,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) } else if (sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb), - sockc_valid && !!sockc.dmabuf_id); + !sockc_err && sockc.dmabuf_id); if (!uarg) { err = -ENOBUFS; goto out_err; @@ -1102,7 +1099,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) else uarg_to_msgzc(uarg)->zerocopy = 0; - if (sockc_valid && sockc.dmabuf_id) { + if (!sockc_err && sockc.dmabuf_id) { binding = net_devmem_get_binding(sk, sockc.dmabuf_id); if (IS_ERR(binding)) { err = PTR_ERR(binding); @@ -1116,7 +1113,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) zc = MSG_SPLICE_PAGES; } - if (sockc_valid && sockc.dmabuf_id && + if (!sockc_err && sockc.dmabuf_id && (!(flags & MSG_ZEROCOPY) || !sock_flag(sk, SOCK_ZEROCOPY))) { err = -EINVAL; goto out_err; @@ -1160,9 +1157,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } - if (!sockc_valid) { - if (!err) - err = -EINVAL; + if (sockc_err) { + err = sockc_err; goto out_err; } From 12d31142e63a1ff78b93e998a519e22e2425cf18 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:20 +0000 Subject: [PATCH 4/8] net: devmem: ksft: add ipv4 support ncdevmem supports both ipv4 and ipv6, but the ksft is currently ipv6-only. Propagate the ipv4 support to the ksft, so that folks that are limited to these networks can also test. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-5-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index d7f6a76eb2b7..6ff6421979fe 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -21,30 +21,28 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port}" - with bkg(listen_cmd) as socat: + with bkg(listen_cmd) as ncdevmem: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port}", host=cfg.remote, shell=True) - ksft_eq(socat.stdout.strip(), "hello\nworld") + ksft_eq(ncdevmem.stdout.strip(), "hello\nworld") @ksft_disruptive def check_tx(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: + with bkg(listen_cmd) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") From 57605ae8e1b61be0029b4ff39298e6eaef824948 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:21 +0000 Subject: [PATCH 5/8] net: devmem: ksft: add exit_wait to make rx test pass This exit_wait seems necessary to make the rx side test pass for me. I think this is just missed from the original test add patch. Add it now. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 6ff6421979fe..6c743bc2ab39 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -26,7 +26,7 @@ def check_rx(cfg) -> None: port = rand_port() listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port}" - with bkg(listen_cmd) as ncdevmem: + with bkg(listen_cmd, exit_wait=True) as ncdevmem: wait_port_listen(port) cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port}", host=cfg.remote, shell=True) From 243d47a5e1e47b2b72d654d7278fc8bff0199b0c Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:22 +0000 Subject: [PATCH 6/8] net: devmem: ksft: add 5 tuple FS support ncdevmem supports drivers that are limited to either 3-tuple or 5-tuple FS support, but the ksft is currently 3-tuple only. Support drivers that have 5-tuple FS supported by adding a ksft arg. Signed-off-by: Mina Almasry fix 5-tuple fix 5-tuple Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-7-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 4 ++-- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 6c743bc2ab39..52ee52c51029 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,11 +24,11 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr}" with bkg(listen_cmd, exit_wait=True) as ncdevmem: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}", host=cfg.remote, shell=True) ksft_eq(ncdevmem.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index fc7ba7d71502..a226f2c7af83 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -373,7 +373,8 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) server_addr = strrchr(server_addr, ':') + 1; } - return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + /* Try configure 5-tuple */ + if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", ifname, type, client_ip ? "src-ip" : "", @@ -381,7 +382,17 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) server_addr, client_ip ? "src-port" : "", client_ip ? port : "", - port, start_queue); + port, start_queue)) + /* If that fails, try configure 3-tuple */ + if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", + ifname, + type, + server_addr, + port, start_queue)) + /* If that fails, return error */ + return -1; + + return 0; } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, From baa18bc5353fcb99bc49944c51c6c7829cb1da55 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:23 +0000 Subject: [PATCH 7/8] net: devmem: ksft: upgrade rx test to send 1K data The current test just sends "hello\nworld" and verifies that is the string received on the RX side. That is fine, but improve the test a bit by sending 1K data. The test should be improved further to send more data, but for now this should be a welcome improvement. The test will send a repeating pattern of 0x01, 0x02, ... 0x06. The ncdevmem `-v 7` flag will verify this pattern. ncdevmem will provide useful debugging info when the test fails, such as the frags received and verified fine, and which frag exactly failed, what was the expected byte pattern, and what is the actual byte pattern received. All this debug information will be useful when the test fails. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-8-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 52ee52c51029..7947650210a0 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,13 +24,15 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}", host=cfg.remote, shell=True) + cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ + head -c 1K | {socat}", host=cfg.remote, shell=True) - ksft_eq(ncdevmem.stdout.strip(), "hello\nworld") + ksft_eq(ncdevmem.ret, 0) @ksft_disruptive From affffcbb87266b76b6f83edb39ae404604ffc6b5 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 23 May 2025 23:05:24 +0000 Subject: [PATCH 8/8] net: devmem: ncdevmem: remove unused variable This variable is unused and can be removed. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250523230524.1107879-9-almasrymina@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index a226f2c7af83..02e4d3d7ded2 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -540,7 +540,6 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; - struct netdev_queue_id *queues; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; struct sockaddr_in6 server_sin;