Merge tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: - NAPI fixes and cleanups (Pavel, Olivier) - Add support for absolute timeouts (Pavel) - Fixes for io-wq/sqpoll affinities (Felix) - Efficiency improvements for dealing with huge pages (Chenliang) - Support for a minwait mode, where the application essentially has two timouts - one smaller one that defines the batch timeout, and the overall large one similar to what we had before. This enables efficient use of batching based on count + timeout, while still working well with periods of less intensive workloads - Use ITER_UBUF for single segment sends - Add support for incremental buffer consumption. Right now each operation will always consume a full buffer. With incremental consumption, a recv/read operation only consumes the part of the buffer that it needs to satisfy the operation - Add support for GCOV for io_uring, to help retain a high coverage of test to code ratio - Fix regression with ocfs2, where an odd -EOPNOTSUPP wasn't correctly converted to a blocking retry - Add support for cloning registered buffers from one ring to another - Misc cleanups (Anuj, me) * tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux: (35 commits) io_uring: add IORING_REGISTER_COPY_BUFFERS method io_uring/register: provide helper to get io_ring_ctx from 'fd' io_uring/rsrc: add reference count to struct io_mapped_ubuf io_uring/rsrc: clear 'slot' entry upfront io_uring/io-wq: inherit cpuset of cgroup in io worker io_uring/io-wq: do not allow pinning outside of cpuset io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common() io_uring/rw: treat -EOPNOTSUPP for IOCB_NOWAIT like -EAGAIN io_uring/sqpoll: do not allow pinning outside of cpuset io_uring/eventfd: move refs to refcount_t io_uring: remove unused rsrc_put_fn io_uring: add new line after variable declaration io_uring: add GCOV_PROFILE_URING Kconfig option io_uring/kbuf: add support for incremental buffer consumption io_uring/kbuf: pass in 'len' argument for buffer commit Revert "io_uring: Require zeroed sqe->len on provided-buffers send" io_uring/kbuf: move io_ring_head_to_buf() to kbuf.h io_uring/kbuf: add io_kbuf_commit() helper io_uring/kbuf: shrink nr_iovs/mode in struct buf_sel_arg io_uring: wire up min batch wake timeout ...
This commit is contained in:
@@ -440,11 +440,21 @@ struct io_uring_cqe {
|
||||
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
|
||||
* IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
|
||||
* them from sends.
|
||||
* IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get
|
||||
* more completions. In other words, the buffer is being
|
||||
* partially consumed, and will be used by the kernel for
|
||||
* more completions. This is only set for buffers used via
|
||||
* the incremental buffer consumption, as provided by
|
||||
* a ring buffer setup with IOU_PBUF_RING_INC. For any
|
||||
* other provided buffer type, all completions with a
|
||||
* buffer passed back is automatically returned to the
|
||||
* application.
|
||||
*/
|
||||
#define IORING_CQE_F_BUFFER (1U << 0)
|
||||
#define IORING_CQE_F_MORE (1U << 1)
|
||||
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
|
||||
#define IORING_CQE_F_NOTIF (1U << 3)
|
||||
#define IORING_CQE_F_BUF_MORE (1U << 4)
|
||||
|
||||
#define IORING_CQE_BUFFER_SHIFT 16
|
||||
|
||||
@@ -507,6 +517,7 @@ struct io_cqring_offsets {
|
||||
#define IORING_ENTER_SQ_WAIT (1U << 2)
|
||||
#define IORING_ENTER_EXT_ARG (1U << 3)
|
||||
#define IORING_ENTER_REGISTERED_RING (1U << 4)
|
||||
#define IORING_ENTER_ABS_TIMER (1U << 5)
|
||||
|
||||
/*
|
||||
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
||||
@@ -542,6 +553,7 @@ struct io_uring_params {
|
||||
#define IORING_FEAT_LINKED_FILE (1U << 12)
|
||||
#define IORING_FEAT_REG_REG_RING (1U << 13)
|
||||
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
|
||||
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
|
||||
|
||||
/*
|
||||
* io_uring_register(2) opcodes and arguments
|
||||
@@ -595,6 +607,11 @@ enum io_uring_register_op {
|
||||
IORING_REGISTER_NAPI = 27,
|
||||
IORING_UNREGISTER_NAPI = 28,
|
||||
|
||||
IORING_REGISTER_CLOCK = 29,
|
||||
|
||||
/* copy registered buffers from source ring to current ring */
|
||||
IORING_REGISTER_COPY_BUFFERS = 30,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
@@ -675,6 +692,21 @@ struct io_uring_restriction {
|
||||
__u32 resv2[3];
|
||||
};
|
||||
|
||||
struct io_uring_clock_register {
|
||||
__u32 clockid;
|
||||
__u32 __resv[3];
|
||||
};
|
||||
|
||||
enum {
|
||||
IORING_REGISTER_SRC_REGISTERED = 1,
|
||||
};
|
||||
|
||||
struct io_uring_copy_buffers {
|
||||
__u32 src_fd;
|
||||
__u32 flags;
|
||||
__u32 pad[6];
|
||||
};
|
||||
|
||||
struct io_uring_buf {
|
||||
__u64 addr;
|
||||
__u32 len;
|
||||
@@ -707,9 +739,17 @@ struct io_uring_buf_ring {
|
||||
* mmap(2) with the offset set as:
|
||||
* IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
|
||||
* to get a virtual mapping for the ring.
|
||||
* IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be
|
||||
* consumed incrementally. Normally one (or more) buffers
|
||||
* are fully consumed. With incremental consumptions, it's
|
||||
* feasible to register big ranges of buffers, and each
|
||||
* use of it will consume only as much as it needs. This
|
||||
* requires that both the kernel and application keep
|
||||
* track of where the current read/recv index is at.
|
||||
*/
|
||||
enum io_uring_register_pbuf_ring_flags {
|
||||
IOU_PBUF_RING_MMAP = 1,
|
||||
IOU_PBUF_RING_INC = 2,
|
||||
};
|
||||
|
||||
/* argument for IORING_(UN)REGISTER_PBUF_RING */
|
||||
@@ -758,7 +798,7 @@ enum io_uring_register_restriction_op {
|
||||
struct io_uring_getevents_arg {
|
||||
__u64 sigmask;
|
||||
__u32 sigmask_sz;
|
||||
__u32 pad;
|
||||
__u32 min_wait_usec;
|
||||
__u64 ts;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user