Merge tag 'for-6.17/io_uring-20250728' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:

 - Optimization to avoid reference counts on non-cloned registered
   buffers. This is how these buffers were handled prior to having
   cloning support, and we can still use that approach as long as the
   buffers haven't been cloned to another ring.

 - Cleanup and improvement for uring_cmd, where btrfs was the only user
   of storing allocated data for the lifetime of the uring_cmd. Clean
   that up so we can get rid of the need to do that.

 - Avoid unnecessary memory copies in uring_cmd usage. This is
   particularly important as a lot of uring_cmd usage necessitates the
   use of 128b SQEs.

 - A few updates for recv multishot, where it's now possible to add
   fairness limits for limiting how much is transferred for each retry
   loop. Additionally, recv multishot now supports an overall cap as
   well, where once reached the multishot recv will terminate. The
   latter is useful for buffer management and juggling many recv streams
   at the same time.

 - Add support for returning the TX timestamps via a new socket command.
   This feature can work in either singleshot or multishot mode, where
   the latter triggers a completion whenever new timestamps are
   available. This is an alternative to using the existing error queue.

 - Add support for an io_uring "mock" file, which is the start of being
   able to do 100% targeted testing in terms of exercising io_uring
   request handling. The idea is to have a file type that can be
   anything the tester would like, and behave exactly how you want it to
   behave in terms of hitting the code paths you want.

 - Improve zcrx by using sgtables to de-duplicate and improve dma
   address handling.

 - Prep work for supporting larger pages for zcrx.

 - Various little improvements and fixes.

* tag 'for-6.17/io_uring-20250728' of git://git.kernel.dk/linux: (42 commits)
  io_uring/zcrx: fix leaking pages on sg init fail
  io_uring/zcrx: don't leak pages on account failure
  io_uring/zcrx: fix null ifq on area destruction
  io_uring: fix breakage in EXPERT menu
  io_uring/cmd: remove struct io_uring_cmd_data
  btrfs/ioctl: store btrfs_uring_encoded_data in io_btrfs_cmd
  io_uring/cmd: introduce IORING_URING_CMD_REISSUE flag
  io_uring/zcrx: account area memory
  io_uring: export io_[un]account_mem
  io_uring/net: Support multishot receive len cap
  io_uring: deduplicate wakeup handling
  io_uring/net: cast min_not_zero() type
  io_uring/poll: cleanup apoll freeing
  io_uring/net: allow multishot receive per-invocation cap
  io_uring/net: move io_sr_msg->retry_flags to io_sr_msg->flags
  io_uring/net: use passed in 'len' in io_recv_buf_select()
  io_uring/zcrx: prepare fallback for larger pages
  io_uring/zcrx: assert area type in io_zcrx_iov_page
  io_uring/zcrx: allocate sgtable for umem areas
  io_uring/zcrx: introduce io_populate_area_dma
  ...
This commit is contained in:
Linus Torvalds
2025-07-28 16:30:12 -07:00
27 changed files with 1032 additions and 241 deletions
+18 -1
View File
@@ -50,7 +50,7 @@ struct io_uring_sqe {
};
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
__u32 rw_flags;
__u32 fsync_flags;
__u16 poll_events; /* compatibility */
__u32 poll32_events; /* word-reversed for BE */
@@ -449,6 +449,7 @@ enum io_uring_msg_ring_flags {
#define IORING_NOP_FILE (1U << 1)
#define IORING_NOP_FIXED_FILE (1U << 2)
#define IORING_NOP_FIXED_BUFFER (1U << 3)
#define IORING_NOP_TW (1U << 4)
/*
* IO completion data structure (Completion Queue Entry)
@@ -968,6 +969,22 @@ enum io_uring_socket_op {
SOCKET_URING_OP_SIOCOUTQ,
SOCKET_URING_OP_GETSOCKOPT,
SOCKET_URING_OP_SETSOCKOPT,
SOCKET_URING_OP_TX_TIMESTAMP,
};
/*
* SOCKET_URING_OP_TX_TIMESTAMP definitions
*/
#define IORING_TIMESTAMP_HW_SHIFT 16
/* The cqe->flags bit from which the timestamp type is stored */
#define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1)
/* The cqe->flags flag signifying whether it's a hardware timestamp */
#define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT)
struct io_timespec {
__u64 tv_sec;
__u64 tv_nsec;
};
/* Zero copy receive refill queue entry */
+47
View File
@@ -0,0 +1,47 @@
#ifndef LINUX_IO_URING_MOCK_FILE_H
#define LINUX_IO_URING_MOCK_FILE_H
#include <linux/types.h>
enum {
IORING_MOCK_FEAT_CMD_COPY,
IORING_MOCK_FEAT_RW_ZERO,
IORING_MOCK_FEAT_RW_NOWAIT,
IORING_MOCK_FEAT_RW_ASYNC,
IORING_MOCK_FEAT_POLL,
IORING_MOCK_FEAT_END,
};
struct io_uring_mock_probe {
__u64 features;
__u64 __resv[9];
};
enum {
IORING_MOCK_CREATE_F_SUPPORT_NOWAIT = 1,
IORING_MOCK_CREATE_F_POLL = 2,
};
struct io_uring_mock_create {
__u32 out_fd;
__u32 flags;
__u64 file_size;
__u64 rw_delay_ns;
__u64 __resv[13];
};
enum {
IORING_MOCK_MGR_CMD_PROBE,
IORING_MOCK_MGR_CMD_CREATE,
};
enum {
IORING_MOCK_CMD_COPY_REGBUF,
};
enum {
IORING_MOCK_COPY_FROM = 1,
};
#endif