Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2025-03-06 We've added 6 non-merge commits during the last 13 day(s) which contain a total of 6 files changed, 230 insertions(+), 56 deletions(-). The main changes are: 1) Add XDP metadata support for tun driver, from Marcus Wichelmann. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: selftests/bpf: Fix file descriptor assertion in open_tuntap helper selftests/bpf: Add test for XDP metadata support in tun driver selftests/bpf: Refactor xdp_context_functional test and bpf program selftests/bpf: Move open_tuntap to network helpers net: tun: Enable transfer of XDP metadata to skb net: tun: Enable XDP metadata support ==================== Link: https://patch.msgid.link/20250307055335.441298-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
+23
-5
@@ -1535,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
|
||||
|
||||
static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
|
||||
struct page_frag *alloc_frag, char *buf,
|
||||
int buflen, int len, int pad)
|
||||
int buflen, int len, int pad,
|
||||
int metasize)
|
||||
{
|
||||
struct sk_buff *skb = build_skb(buf, buflen);
|
||||
|
||||
@@ -1544,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
|
||||
|
||||
skb_reserve(skb, pad);
|
||||
skb_put(skb, len);
|
||||
if (metasize)
|
||||
skb_metadata_set(skb, metasize);
|
||||
skb_set_owner_w(skb, tfile->socket.sk);
|
||||
|
||||
get_page(alloc_frag->page);
|
||||
@@ -1603,6 +1606,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
||||
char *buf;
|
||||
size_t copied;
|
||||
int pad = TUN_RX_PAD;
|
||||
int metasize = 0;
|
||||
int err = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -1630,7 +1634,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
||||
if (hdr->gso_type || !xdp_prog) {
|
||||
*skb_xdp = 1;
|
||||
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
|
||||
pad);
|
||||
pad, metasize);
|
||||
}
|
||||
|
||||
*skb_xdp = 0;
|
||||
@@ -1644,7 +1648,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
||||
u32 act;
|
||||
|
||||
xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
|
||||
xdp_prepare_buff(&xdp, buf, pad, len, false);
|
||||
xdp_prepare_buff(&xdp, buf, pad, len, true);
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||
if (act == XDP_REDIRECT || act == XDP_TX) {
|
||||
@@ -1665,12 +1669,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
||||
|
||||
pad = xdp.data - xdp.data_hard_start;
|
||||
len = xdp.data_end - xdp.data;
|
||||
|
||||
/* It is known that the xdp_buff was prepared with metadata
|
||||
* support, so the metasize will never be negative.
|
||||
*/
|
||||
metasize = xdp.data - xdp.data_meta;
|
||||
}
|
||||
bpf_net_ctx_clear(bpf_net_ctx);
|
||||
rcu_read_unlock();
|
||||
local_bh_enable();
|
||||
|
||||
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
|
||||
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
|
||||
metasize);
|
||||
|
||||
out:
|
||||
bpf_net_ctx_clear(bpf_net_ctx);
|
||||
@@ -2353,6 +2363,7 @@ static int tun_xdp_one(struct tun_struct *tun,
|
||||
struct sk_buff_head *queue;
|
||||
u32 rxhash = 0, act;
|
||||
int buflen = hdr->buflen;
|
||||
int metasize = 0;
|
||||
int ret = 0;
|
||||
bool skb_xdp = false;
|
||||
struct page *page;
|
||||
@@ -2368,7 +2379,6 @@ static int tun_xdp_one(struct tun_struct *tun,
|
||||
}
|
||||
|
||||
xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
|
||||
xdp_set_data_meta_invalid(xdp);
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
ret = tun_xdp_act(tun, xdp_prog, xdp, act);
|
||||
@@ -2408,6 +2418,14 @@ build:
|
||||
skb_reserve(skb, xdp->data - xdp->data_hard_start);
|
||||
skb_put(skb, xdp->data_end - xdp->data);
|
||||
|
||||
/* The externally provided xdp_buff may have no metadata support, which
|
||||
* is marked by xdp->data_meta being xdp->data + 1. This will lead to a
|
||||
* metasize of -1 and is the reason why the condition checks for > 0.
|
||||
*/
|
||||
metasize = xdp->data - xdp->data_meta;
|
||||
if (metasize > 0)
|
||||
skb_metadata_set(skb, metasize);
|
||||
|
||||
if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
|
||||
atomic_long_inc(&tun->rx_frame_errors);
|
||||
kfree_skb(skb);
|
||||
|
||||
@@ -548,6 +548,34 @@ void close_netns(struct nstoken *token)
|
||||
free(token);
|
||||
}
|
||||
|
||||
int open_tuntap(const char *dev_name, bool need_mac)
|
||||
{
|
||||
int err = 0;
|
||||
struct ifreq ifr;
|
||||
int fd = open("/dev/net/tun", O_RDWR);
|
||||
|
||||
if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
|
||||
return -1;
|
||||
|
||||
ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
|
||||
strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
|
||||
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
|
||||
|
||||
err = ioctl(fd, TUNSETIFF, &ifr);
|
||||
if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = fcntl(fd, F_SETFL, O_NONBLOCK);
|
||||
if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int get_socket_local_port(int sock_fd)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
typedef __u16 __sum16;
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/ethtool.h>
|
||||
@@ -85,6 +86,8 @@ int get_socket_local_port(int sock_fd);
|
||||
int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
|
||||
int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
|
||||
|
||||
int open_tuntap(const char *dev_name, bool need_mac);
|
||||
|
||||
struct nstoken;
|
||||
/**
|
||||
* open_netns() - Switch to specified network namespace by name.
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
#include <time.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/icmp.h>
|
||||
|
||||
#include "test_progs.h"
|
||||
@@ -37,34 +36,6 @@ static inline int netns_delete(void)
|
||||
return system("ip netns del " NETNS ">/dev/null 2>&1");
|
||||
}
|
||||
|
||||
static int open_tuntap(const char *dev_name, bool need_mac)
|
||||
{
|
||||
int err = 0;
|
||||
struct ifreq ifr;
|
||||
int fd = open("/dev/net/tun", O_RDWR);
|
||||
|
||||
if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
|
||||
return -1;
|
||||
|
||||
ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
|
||||
strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
|
||||
ifr.ifr_name[IFNAMSIZ - 1] = '\0';
|
||||
|
||||
err = ioctl(fd, TUNSETIFF, &ifr);
|
||||
if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = fcntl(fd, F_SETFL, O_NONBLOCK);
|
||||
if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
#define ICMP_PAYLOAD_SIZE 100
|
||||
|
||||
/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
|
||||
|
||||
@@ -4,12 +4,20 @@
|
||||
#include "test_xdp_context_test_run.skel.h"
|
||||
#include "test_xdp_meta.skel.h"
|
||||
|
||||
#define TX_ADDR "10.0.0.1"
|
||||
#define RX_ADDR "10.0.0.2"
|
||||
#define RX_NAME "veth0"
|
||||
#define TX_NAME "veth1"
|
||||
#define TX_NETNS "xdp_context_tx"
|
||||
#define RX_NETNS "xdp_context_rx"
|
||||
#define TAP_NAME "tap0"
|
||||
#define TAP_NETNS "xdp_context_tuntap"
|
||||
|
||||
#define TEST_PAYLOAD_LEN 32
|
||||
static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
};
|
||||
|
||||
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
|
||||
__u32 data_meta, __u32 data, __u32 data_end,
|
||||
@@ -112,7 +120,59 @@ void test_xdp_context_test_run(void)
|
||||
test_xdp_context_test_run__destroy(skel);
|
||||
}
|
||||
|
||||
void test_xdp_context_functional(void)
|
||||
static int send_test_packet(int ifindex)
|
||||
{
|
||||
int n, sock = -1;
|
||||
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
|
||||
|
||||
/* The ethernet header is not relevant for this test and doesn't need to
|
||||
* be meaningful.
|
||||
*/
|
||||
struct ethhdr eth = { 0 };
|
||||
|
||||
memcpy(packet, ð, sizeof(eth));
|
||||
memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
|
||||
|
||||
sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
|
||||
if (!ASSERT_GE(sock, 0, "socket"))
|
||||
goto err;
|
||||
|
||||
struct sockaddr_ll saddr = {
|
||||
.sll_family = PF_PACKET,
|
||||
.sll_ifindex = ifindex,
|
||||
.sll_halen = ETH_ALEN
|
||||
};
|
||||
n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
|
||||
sizeof(saddr));
|
||||
if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
|
||||
goto err;
|
||||
|
||||
close(sock);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (sock >= 0)
|
||||
close(sock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void assert_test_result(struct test_xdp_meta *skel)
|
||||
{
|
||||
int err;
|
||||
__u32 map_key = 0;
|
||||
__u8 map_value[TEST_PAYLOAD_LEN];
|
||||
|
||||
err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
|
||||
sizeof(map_key), &map_value,
|
||||
TEST_PAYLOAD_LEN, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "lookup test_result"))
|
||||
return;
|
||||
|
||||
ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
|
||||
"test_result map contains test payload");
|
||||
}
|
||||
|
||||
void test_xdp_context_veth(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
|
||||
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
|
||||
@@ -120,7 +180,7 @@ void test_xdp_context_functional(void)
|
||||
struct bpf_program *tc_prog, *xdp_prog;
|
||||
struct test_xdp_meta *skel = NULL;
|
||||
struct nstoken *nstoken = NULL;
|
||||
int rx_ifindex;
|
||||
int rx_ifindex, tx_ifindex;
|
||||
int ret;
|
||||
|
||||
tx_ns = netns_new(TX_NETNS, false);
|
||||
@@ -138,7 +198,6 @@ void test_xdp_context_functional(void)
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
|
||||
goto close;
|
||||
|
||||
SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
|
||||
SYS(close, "ip link set dev " RX_NAME " up");
|
||||
|
||||
skel = test_xdp_meta__open_and_load();
|
||||
@@ -179,9 +238,17 @@ void test_xdp_context_functional(void)
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
|
||||
goto close;
|
||||
|
||||
SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
|
||||
SYS(close, "ip link set dev " TX_NAME " up");
|
||||
ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
|
||||
|
||||
tx_ifindex = if_nametoindex(TX_NAME);
|
||||
if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
|
||||
goto close;
|
||||
|
||||
ret = send_test_packet(tx_ifindex);
|
||||
if (!ASSERT_OK(ret, "send_test_packet"))
|
||||
goto close;
|
||||
|
||||
assert_test_result(skel);
|
||||
|
||||
close:
|
||||
close_netns(nstoken);
|
||||
@@ -190,3 +257,67 @@ close:
|
||||
netns_free(tx_ns);
|
||||
}
|
||||
|
||||
void test_xdp_context_tuntap(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
|
||||
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
|
||||
struct netns_obj *ns = NULL;
|
||||
struct test_xdp_meta *skel = NULL;
|
||||
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
|
||||
int tap_fd = -1;
|
||||
int tap_ifindex;
|
||||
int ret;
|
||||
|
||||
ns = netns_new(TAP_NETNS, true);
|
||||
if (!ASSERT_OK_PTR(ns, "create and open ns"))
|
||||
return;
|
||||
|
||||
tap_fd = open_tuntap(TAP_NAME, true);
|
||||
if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
|
||||
goto close;
|
||||
|
||||
SYS(close, "ip link set dev " TAP_NAME " up");
|
||||
|
||||
skel = test_xdp_meta__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
|
||||
goto close;
|
||||
|
||||
tap_ifindex = if_nametoindex(TAP_NAME);
|
||||
if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
|
||||
goto close;
|
||||
|
||||
tc_hook.ifindex = tap_ifindex;
|
||||
ret = bpf_tc_hook_create(&tc_hook);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
|
||||
goto close;
|
||||
|
||||
tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
|
||||
ret = bpf_tc_attach(&tc_hook, &tc_opts);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_attach"))
|
||||
goto close;
|
||||
|
||||
ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
|
||||
0, NULL);
|
||||
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
|
||||
goto close;
|
||||
|
||||
/* The ethernet header is not relevant for this test and doesn't need to
|
||||
* be meaningful.
|
||||
*/
|
||||
struct ethhdr eth = { 0 };
|
||||
|
||||
memcpy(packet, ð, sizeof(eth));
|
||||
memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
|
||||
|
||||
ret = write(tap_fd, packet, sizeof(packet));
|
||||
if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
|
||||
goto close;
|
||||
|
||||
assert_test_result(skel);
|
||||
|
||||
close:
|
||||
if (tap_fd >= 0)
|
||||
close(tap_fd);
|
||||
test_xdp_meta__destroy(skel);
|
||||
netns_free(ns);
|
||||
}
|
||||
|
||||
@@ -4,37 +4,50 @@
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
|
||||
#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
|
||||
#define META_SIZE 32
|
||||
|
||||
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
|
||||
|
||||
/* Demonstrates how metadata can be passed from an XDP program to a TC program
|
||||
* using bpf_xdp_adjust_meta.
|
||||
* For the sake of testing the metadata support in drivers, the XDP program uses
|
||||
* a fixed-size payload after the Ethernet header as metadata. The TC program
|
||||
* copies the metadata it receives into a map so it can be checked from
|
||||
* userspace.
|
||||
*/
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__uint(value_size, META_SIZE);
|
||||
} test_result SEC(".maps");
|
||||
|
||||
SEC("tc")
|
||||
int ing_cls(struct __sk_buff *ctx)
|
||||
{
|
||||
__u8 *data, *data_meta, *data_end;
|
||||
__u32 diff = 0;
|
||||
__u8 *data, *data_meta;
|
||||
__u32 key = 0;
|
||||
|
||||
data_meta = ctx_ptr(ctx, data_meta);
|
||||
data_end = ctx_ptr(ctx, data_end);
|
||||
data = ctx_ptr(ctx, data);
|
||||
|
||||
if (data + ETH_ALEN > data_end ||
|
||||
data_meta + round_up(ETH_ALEN, 4) > data)
|
||||
if (data_meta + META_SIZE > data)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
|
||||
diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
|
||||
bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
|
||||
|
||||
return diff ? TC_ACT_SHOT : TC_ACT_OK;
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("xdp")
|
||||
int ing_xdp(struct xdp_md *ctx)
|
||||
{
|
||||
__u8 *data, *data_meta, *data_end;
|
||||
__u8 *data, *data_meta, *data_end, *payload;
|
||||
struct ethhdr *eth;
|
||||
int ret;
|
||||
|
||||
ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
|
||||
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
|
||||
if (ret < 0)
|
||||
return XDP_DROP;
|
||||
|
||||
@@ -42,11 +55,21 @@ int ing_xdp(struct xdp_md *ctx)
|
||||
data_end = ctx_ptr(ctx, data_end);
|
||||
data = ctx_ptr(ctx, data);
|
||||
|
||||
if (data + ETH_ALEN > data_end ||
|
||||
data_meta + round_up(ETH_ALEN, 4) > data)
|
||||
eth = (struct ethhdr *)data;
|
||||
payload = data + sizeof(struct ethhdr);
|
||||
|
||||
if (payload + META_SIZE > data_end ||
|
||||
data_meta + META_SIZE > data)
|
||||
return XDP_DROP;
|
||||
|
||||
__builtin_memcpy(data_meta, data, ETH_ALEN);
|
||||
/* The Linux networking stack may send other packets on the test
|
||||
* interface that interfere with the test. Just drop them.
|
||||
* The test packets can be recognized by their ethertype of zero.
|
||||
*/
|
||||
if (eth->h_proto != 0)
|
||||
return XDP_DROP;
|
||||
|
||||
__builtin_memcpy(data_meta, payload, META_SIZE);
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user