blob: 29b6c2b4a96fa4170c407de0c736ddb46ccb7ec5 [file]
/* SPDX-License-Identifier: MIT */
/*
* Description: Verify recvmsg multishot with IOU_PBUF_RING_INC does not
* return spurious -EFAULT (BADADDR) when the tail of a
* partially-consumed buffer is smaller than the msghdr header
* the kernel needs to place (sizeof(io_uring_recvmsg_out) +
* namelen + controllen).
*
* The kernel must retire the too-small tail and advance to
* the next ring entry rather than refusing the recv.
*
* Also validates the in-buffer layout of each CQE via the
* io_uring_recvmsg_* helpers, so a bug that returns a valid
* bid but mis-positions the payload is caught.
*
* See https://github.com/axboe/liburing/issues/1433
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "liburing.h"
#include "helpers.h"
#define BGID 1
#define NR_BUFS 4
#define BUF_SIZE 1024
#define QD 8
/*
* hdr reserved per CQE by recvmsg multishot:
* sizeof(io_uring_recvmsg_out) + namelen + controllen
* = 16 + sizeof(struct sockaddr_in) + 0 = 32
*
* Two send sizes exercise both buffer-advance paths:
*
* LARGE_SZ=480: two CQEs of (32+480)=512 consume bid=0 exactly. The
* kernel advances to bid=1 via the normal head++ path, no retire
* needed.
*
* SMALL_SZ=305: three CQEs of (32+305)=337 in a 1024-byte buffer total
* 1011, leaving a 13-byte tail. 13 < 32, so the 4th send into that
* buffer must retire the tail and advance to the next ring entry.
* Pre-fix this manifests as -EFAULT; post-fix the CQE succeeds with
* a new buffer id.
*
* Send sequence: 2 LARGE + 7 SMALL. Expected bid per CQE:
* { 0, 0, 1, 1, 1, 2, 2, 2, 3 }
* — one natural transition (0->1) and two retire transitions (1->2
* and 2->3).
*/
#define LARGE_SZ 480
#define NR_LARGE 2
#define SMALL_SZ 305
#define NR_SMALL 7
#define NR_SENDS (NR_LARGE + NR_SMALL)
static const int expected_bids[NR_SENDS] = { 0, 0, 1, 1, 1, 2, 2, 2, 3 };
static int no_buf_ring, no_recv_mshot;
static int setup_buf_ring(struct io_uring *ring, void **buf_mem,
struct io_uring_buf_ring **out_br)
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg = { };
size_t total = NR_BUFS * BUF_SIZE;
int page_size = sysconf(_SC_PAGESIZE);
size_t ring_size;
void *mem;
int ret, i;
ring_size = NR_BUFS * sizeof(struct io_uring_buf);
ring_size = (ring_size + page_size - 1) & ~(page_size - 1);
mem = mmap(NULL, total, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED)
return -1;
*buf_mem = mem;
br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (br == MAP_FAILED) {
munmap(mem, total);
return -1;
}
*out_br = br;
io_uring_buf_ring_init(br);
reg.ring_addr = (unsigned long) br;
reg.ring_entries = NR_BUFS;
reg.bgid = BGID;
reg.min_left = 32;
ret = io_uring_register_buf_ring(ring, &reg, IOU_PBUF_RING_INC);
if (ret) {
if (ret == -EINVAL) {
no_buf_ring = 1;
return 0;
}
fprintf(stderr, "register_buf_ring: %d\n", ret);
return -1;
}
for (i = 0; i < NR_BUFS; i++) {
io_uring_buf_ring_add(br, (char *)mem + i * BUF_SIZE, BUF_SIZE,
i, io_uring_buf_ring_mask(NR_BUFS), i);
}
io_uring_buf_ring_advance(br, NR_BUFS);
return 0;
}
static int test(void)
{
struct io_uring ring;
struct io_uring_buf_ring *br = NULL;
struct io_uring_cqe *cqe;
struct io_uring_sqe *sqe;
struct msghdr msg;
struct sockaddr_in name;
size_t expected_payload = NR_LARGE * LARGE_SZ + NR_SMALL * SMALL_SZ;
uint8_t stream[NR_LARGE * LARGE_SZ + NR_SMALL * SMALL_SZ];
size_t bid_offset[NR_BUFS] = { 0 };
size_t stream_cursor = 0;
size_t sent_offset = 0;
void *buf_mem = NULL;
int ret, fds[2];
int i, seen_bids = 0;
int last_bid = -1;
int ret_val = T_EXIT_FAIL;
for (i = 0; i < (int) expected_payload; i++)
stream[i] = (uint8_t)(i & 0xff);
ret = io_uring_queue_init(QD, &ring, 0);
if (ret) {
fprintf(stderr, "queue_init: %d\n", ret);
return T_EXIT_FAIL;
}
if (setup_buf_ring(&ring, &buf_mem, &br))
goto out;
if (no_buf_ring) {
ret_val = T_EXIT_SKIP;
goto out;
}
ret = t_create_socket_pair(fds, true);
if (ret) {
fprintf(stderr, "socket_pair: %d\n", ret);
goto out;
}
memset(&msg, 0, sizeof(msg));
msg.msg_name = &name;
msg.msg_namelen = sizeof(name);
sqe = io_uring_get_sqe(&ring);
io_uring_prep_recvmsg_multishot(sqe, fds[0], &msg, 0);
sqe->flags |= IOSQE_BUFFER_SELECT;
sqe->buf_group = BGID;
sqe->user_data = 1;
ret = io_uring_submit(&ring);
if (ret != 1) {
fprintf(stderr, "submit: %d\n", ret);
goto out_close;
}
for (i = 0; i < NR_SENDS; i++) {
int hdr = sizeof(struct io_uring_recvmsg_out) + sizeof(name);
int send_sz = (i < NR_LARGE) ? LARGE_SZ : SMALL_SZ;
struct io_uring_recvmsg_out *o;
uint8_t *cqe_buf, *pdata;
unsigned int plen;
int bid;
if (write(fds[1], stream + sent_offset, send_sz) != send_sz) {
perror("write");
goto out_close;
}
sent_offset += send_sz;
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret) {
fprintf(stderr, "wait_cqe: %d\n", ret);
goto out_close;
}
if (cqe->res == -EINVAL || cqe->res == -ENOTSUP) {
no_recv_mshot = 1;
io_uring_cqe_seen(&ring, cqe);
ret_val = T_EXIT_SKIP;
goto out_close;
}
if (cqe->res < 0) {
fprintf(stderr,
"send %d: recvmsg multishot failed: %s (res=%d)\n",
i, strerror(-cqe->res), cqe->res);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
fprintf(stderr, "send %d: CQE missing buffer id\n", i);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
if (cqe->res < hdr) {
fprintf(stderr, "send %d: short CQE res=%d (< hdr %d)\n",
i, cqe->res, hdr);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
if (bid != expected_bids[i]) {
fprintf(stderr,
"send %d: bid=%d, expected %d\n",
i, bid, expected_bids[i]);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
if (bid != last_bid) {
seen_bids++;
last_bid = bid;
}
cqe_buf = (uint8_t *)buf_mem + bid * BUF_SIZE + bid_offset[bid];
o = io_uring_recvmsg_validate(cqe_buf, cqe->res, &msg);
if (!o) {
fprintf(stderr,
"send %d: recvmsg_validate returned NULL (res=%d)\n",
i, cqe->res);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
if (o->controllen != 0) {
fprintf(stderr,
"send %d: unexpected controllen=%u\n",
i, o->controllen);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
plen = io_uring_recvmsg_payload_length(o, cqe->res, &msg);
pdata = io_uring_recvmsg_payload(o, &msg);
if (plen != (unsigned int)(cqe->res - hdr)) {
fprintf(stderr,
"send %d: payload_length=%u, expected %d\n",
i, plen, cqe->res - hdr);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
if (memcmp(pdata, stream + stream_cursor, plen) != 0) {
fprintf(stderr,
"send %d: payload content mismatch at cursor %zu, plen %u\n",
i, stream_cursor, plen);
io_uring_cqe_seen(&ring, cqe);
goto out_close;
}
stream_cursor += plen;
bid_offset[bid] += cqe->res;
io_uring_cqe_seen(&ring, cqe);
}
if (stream_cursor != expected_payload) {
fprintf(stderr, "payload mismatch: got %zu, expected %zu\n",
stream_cursor, expected_payload);
goto out_close;
}
/*
* Four bids expected: 0 (natural advance), 1 (first retire
* target), 2 (second retire target), 3 (third retire target).
* Anything else means the retire path didn't behave as planned.
*/
if (seen_bids != 4) {
fprintf(stderr, "expected 4 distinct bids, saw %d\n",
seen_bids);
goto out_close;
}
ret_val = T_EXIT_PASS;
out_close:
close(fds[0]);
close(fds[1]);
out:
io_uring_queue_exit(&ring);
if (buf_mem)
munmap(buf_mem, NR_BUFS * BUF_SIZE);
if (br) {
size_t ring_size = NR_BUFS * sizeof(struct io_uring_buf);
int page_size = sysconf(_SC_PAGESIZE);
ring_size = (ring_size + page_size - 1) & ~(page_size - 1);
munmap(br, ring_size);
}
return ret_val;
}
int main(int argc, char *argv[])
{
if (argc > 1)
return T_EXIT_SKIP;
return test();
}