liburing.h: avoid OOL round trip in io_uring_peek_cqe() on empty CQ With the CQ empty, io_uring_peek_cqe() called into __io_uring_get_cqe() just to do a second full peek and conclude -EAGAIN, costing a function call, a redundant acquire load of the CQ tail, and the get_data setup on every poll. That's wasted work for spin-poll style users. Return -EAGAIN directly if the peek found nothing and there's nothing the kernel could flush to the CQ: no IOPOLL completions to reap, no overflown CQEs, and no pending task work. Those cases, and a peek that consumed an internal timeout CQE, still take the slow path as before. Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/src/include/liburing.h b/src/include/liburing.h index 0138ee0..0188937 100644 --- a/src/include/liburing.h +++ b/src/include/liburing.h
@@ -1922,8 +1922,20 @@ struct io_uring_cqe **cqe_ptr) LIBURING_NOEXCEPT { - if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr) - return 0; + if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL)) { + if (*cqe_ptr) + return 0; + /* + * If the CQ is empty and there's nothing the kernel could + * flush to it (no IOPOLL completions to reap, no overflown + * CQEs, no pending task work), avoid the round trip into + * the full get_cqe machinery. + */ + if (!(ring->flags & IORING_SETUP_IOPOLL) && + !(IO_URING_READ_ONCE(*ring->sq.kflags) & + (IORING_SQ_CQ_OVERFLOW | IORING_SQ_TASKRUN))) + return -EAGAIN; + } return io_uring_wait_cqe_nr(ring, cqe_ptr, 0); }