mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
nvme/io_uring: optimize IOPOLL completions for local ring context
When multiple io_uring rings poll on the same NVMe queue, one ring can find completions belonging to another ring. The current code always uses task_work to handle this, but this adds overhead for the common single-ring case. This patch passes the polling io_ring_ctx through io_comp_batch's new poll_ctx field. In io_do_iopoll(), the polling ring's context is stored in iob.poll_ctx before calling the iopoll callbacks. In nvme_uring_cmd_end_io(), we now compare iob->poll_ctx with the request's owning io_ring_ctx (via io_uring_cmd_ctx_handle()). If they match (local context), we complete inline with io_uring_cmd_done32(). If they differ (remote context) or iob is NULL (non-iopoll path), we use task_work as before. This optimization eliminates task_work scheduling overhead for the common case where a ring polls and finds its own completions. ~10% IOPS improvement is observed in the following benchmark: fio/t/io_uring -b512 -d128 -c32 -s32 -p1 -F1 -O0 -P1 -u1 -n1 /dev/ng0n1 Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Kanchan Joshi <joshi.k@samsung.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
5e2fde1a94
commit
f7bc22ca0d
3 changed files with 20 additions and 7 deletions
|
|
@ -426,14 +426,20 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
|
|||
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
|
||||
/*
|
||||
* IOPOLL could potentially complete this request directly, but
|
||||
* if multiple rings are polling on the same queue, then it's possible
|
||||
* for one ring to find completions for another ring. Punting the
|
||||
* completion via task_work will always direct it to the right
|
||||
* location, rather than potentially complete requests for ringA
|
||||
* under iopoll invocations from ringB.
|
||||
* For IOPOLL, check if this completion is happening in the context
|
||||
* of the same io_ring that owns the request (local context). If so,
|
||||
* we can complete inline without task_work overhead. Otherwise, we
|
||||
* must punt to task_work to ensure completion happens in the correct
|
||||
* ring's context.
|
||||
*/
|
||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
|
||||
if (blk_rq_is_poll(req) && iob &&
|
||||
iob->poll_ctx == io_uring_cmd_ctx_handle(ioucmd)) {
|
||||
if (pdu->bio)
|
||||
blk_rq_unmap_user(pdu->bio);
|
||||
io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 0);
|
||||
} else {
|
||||
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
|
||||
}
|
||||
return RQ_END_IO_FREE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1822,6 +1822,7 @@ struct io_comp_batch {
|
|||
struct rq_list req_list;
|
||||
bool need_ts;
|
||||
void (*complete)(struct io_comp_batch *);
|
||||
void *poll_ctx;
|
||||
};
|
||||
|
||||
static inline bool blk_atomic_write_start_sect_aligned(sector_t sector,
|
||||
|
|
|
|||
|
|
@ -1320,6 +1320,12 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
|||
DEFINE_IO_COMP_BATCH(iob);
|
||||
int nr_events = 0;
|
||||
|
||||
/*
|
||||
* Store the polling io_ring_ctx so drivers can detect if they're
|
||||
* completing a request in the same ring context that's polling.
|
||||
*/
|
||||
iob.poll_ctx = ctx;
|
||||
|
||||
/*
|
||||
* Only spin for completions if we don't have multiple devices hanging
|
||||
* off our complete list.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue