Add test case for minimum length left for incremental buffers

See the link for more details, but this adds support for the application
to tell the kernel what the minimum size left in an incrementally
consumed buffer should be to consider it valid, rather than just assume
that even 1 byte is enough for that.

Link: https://github.com/axboe/liburing/issues/1433
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 983aa26..b9ec1eb 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -891,7 +891,8 @@
 	__u32	ring_entries;
 	__u16	bgid;
 	__u16	flags;
-	__u64	resv[3];
+	__u32	min_left;
+	__u32	resv[5];
 };
 
 /* argument for IORING_REGISTER_PBUF_STATUS */
diff --git a/test/Makefile b/test/Makefile
index 05e1f9e..6a79a02 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -223,6 +223,7 @@
 	recv-mshot-drain.c \
 	recv-mshot-fair.c \
 	recv-multishot.c \
+	recvmsg-inc-tail.c \
 	reg-fd-only.c \
 	reg-hint.c \
 	reg-reg-ring.c \
diff --git a/test/recvmsg-inc-tail.c b/test/recvmsg-inc-tail.c
new file mode 100644
index 0000000..29b6c2b
--- /dev/null
+++ b/test/recvmsg-inc-tail.c
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: Verify recvmsg multishot with IOU_PBUF_RING_INC does not
+ *		return spurious -EFAULT (BADADDR) when the tail of a
+ *		partially-consumed buffer is smaller than the msghdr header
+ *		the kernel needs to place (sizeof(io_uring_recvmsg_out) +
+ *		namelen + controllen).
+ *
+ *		The kernel must retire the too-small tail and advance to
+ *		the next ring entry rather than refusing the recv.
+ *
+ *		Also validates the in-buffer layout of each CQE via the
+ *		io_uring_recvmsg_* helpers, so a bug that returns a valid
+ *		bid but mis-positions the payload is caught.
+ *
+ *		See https://github.com/axboe/liburing/issues/1433
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BGID		1
+#define NR_BUFS		4
+#define BUF_SIZE	1024
+#define QD		8
+
+/*
+ * hdr reserved per CQE by recvmsg multishot:
+ *   sizeof(io_uring_recvmsg_out) + namelen + controllen
+ *   = 16 + sizeof(struct sockaddr_in) + 0 = 32
+ *
+ * Two send sizes exercise both buffer-advance paths:
+ *
+ * LARGE_SZ=480: two CQEs of (32+480)=512 consume bid=0 exactly. The
+ *   kernel advances to bid=1 via the normal head++ path, no retire
+ *   needed.
+ *
+ * SMALL_SZ=305: three CQEs of (32+305)=337 in a 1024-byte buffer total
+ *   1011, leaving a 13-byte tail. 13 < 32, so the 4th send into that
+ *   buffer must retire the tail and advance to the next ring entry.
+ *   Pre-fix this manifests as -EFAULT; post-fix the CQE succeeds with
+ *   a new buffer id.
+ *
+ * Send sequence: 2 LARGE + 7 SMALL. Expected bid per CQE:
+ *   { 0, 0, 1, 1, 1, 2, 2, 2, 3 }
+ * — one natural transition (0->1) and two retire transitions (1->2
+ * and 2->3).
+ */
+#define LARGE_SZ	480
+#define NR_LARGE	2
+#define SMALL_SZ	305
+#define NR_SMALL	7
+#define NR_SENDS	(NR_LARGE + NR_SMALL)
+
+static const int expected_bids[NR_SENDS] = { 0, 0, 1, 1, 1, 2, 2, 2, 3 };
+
+static int no_buf_ring, no_recv_mshot;
+
+static int setup_buf_ring(struct io_uring *ring, void **buf_mem,
+			  struct io_uring_buf_ring **out_br)
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_buf_reg reg = { };
+	size_t total = NR_BUFS * BUF_SIZE;
+	int page_size = sysconf(_SC_PAGESIZE);
+	size_t ring_size;
+	void *mem;
+	int ret, i;
+
+	ring_size = NR_BUFS * sizeof(struct io_uring_buf);
+	ring_size = (ring_size + page_size - 1) & ~(page_size - 1);
+
+	mem = mmap(NULL, total, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mem == MAP_FAILED)
+		return -1;
+	*buf_mem = mem;
+
+	br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE,
+		  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (br == MAP_FAILED) {
+		munmap(mem, total);
+		return -1;
+	}
+	*out_br = br;
+
+	io_uring_buf_ring_init(br);
+	reg.ring_addr = (unsigned long) br;
+	reg.ring_entries = NR_BUFS;
+	reg.bgid = BGID;
+	reg.min_left = 32;
+
+	ret = io_uring_register_buf_ring(ring, &reg, IOU_PBUF_RING_INC);
+	if (ret) {
+		if (ret == -EINVAL) {
+			no_buf_ring = 1;
+			return 0;
+		}
+		fprintf(stderr, "register_buf_ring: %d\n", ret);
+		return -1;
+	}
+
+	for (i = 0; i < NR_BUFS; i++) {
+		io_uring_buf_ring_add(br, (char *)mem + i * BUF_SIZE, BUF_SIZE,
+				      i, io_uring_buf_ring_mask(NR_BUFS), i);
+	}
+	io_uring_buf_ring_advance(br, NR_BUFS);
+	return 0;
+}
+
+static int test(void)
+{
+	struct io_uring ring;
+	struct io_uring_buf_ring *br = NULL;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct msghdr msg;
+	struct sockaddr_in name;
+	size_t expected_payload = NR_LARGE * LARGE_SZ + NR_SMALL * SMALL_SZ;
+	uint8_t stream[NR_LARGE * LARGE_SZ + NR_SMALL * SMALL_SZ];
+	size_t bid_offset[NR_BUFS] = { 0 };
+	size_t stream_cursor = 0;
+	size_t sent_offset = 0;
+	void *buf_mem = NULL;
+	int ret, fds[2];
+	int i, seen_bids = 0;
+	int last_bid = -1;
+	int ret_val = T_EXIT_FAIL;
+
+	for (i = 0; i < (int) expected_payload; i++)
+		stream[i] = (uint8_t)(i & 0xff);
+
+	ret = io_uring_queue_init(QD, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	if (setup_buf_ring(&ring, &buf_mem, &br))
+		goto out;
+	if (no_buf_ring) {
+		ret_val = T_EXIT_SKIP;
+		goto out;
+	}
+
+	ret = t_create_socket_pair(fds, true);
+	if (ret) {
+		fprintf(stderr, "socket_pair: %d\n", ret);
+		goto out;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = &name;
+	msg.msg_namelen = sizeof(name);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_recvmsg_multishot(sqe, fds[0], &msg, 0);
+	sqe->flags |= IOSQE_BUFFER_SELECT;
+	sqe->buf_group = BGID;
+	sqe->user_data = 1;
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "submit: %d\n", ret);
+		goto out_close;
+	}
+
+	for (i = 0; i < NR_SENDS; i++) {
+		int hdr = sizeof(struct io_uring_recvmsg_out) + sizeof(name);
+		int send_sz = (i < NR_LARGE) ? LARGE_SZ : SMALL_SZ;
+		struct io_uring_recvmsg_out *o;
+		uint8_t *cqe_buf, *pdata;
+		unsigned int plen;
+		int bid;
+
+		if (write(fds[1], stream + sent_offset, send_sz) != send_sz) {
+			perror("write");
+			goto out_close;
+		}
+		sent_offset += send_sz;
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe: %d\n", ret);
+			goto out_close;
+		}
+
+		if (cqe->res == -EINVAL || cqe->res == -ENOTSUP) {
+			no_recv_mshot = 1;
+			io_uring_cqe_seen(&ring, cqe);
+			ret_val = T_EXIT_SKIP;
+			goto out_close;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr,
+				"send %d: recvmsg multishot failed: %s (res=%d)\n",
+				i, strerror(-cqe->res), cqe->res);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "send %d: CQE missing buffer id\n", i);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+		if (cqe->res < hdr) {
+			fprintf(stderr, "send %d: short CQE res=%d (< hdr %d)\n",
+				i, cqe->res, hdr);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+
+		bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT;
+		if (bid != expected_bids[i]) {
+			fprintf(stderr,
+				"send %d: bid=%d, expected %d\n",
+				i, bid, expected_bids[i]);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+		if (bid != last_bid) {
+			seen_bids++;
+			last_bid = bid;
+		}
+
+		cqe_buf = (uint8_t *)buf_mem + bid * BUF_SIZE + bid_offset[bid];
+		o = io_uring_recvmsg_validate(cqe_buf, cqe->res, &msg);
+		if (!o) {
+			fprintf(stderr,
+				"send %d: recvmsg_validate returned NULL (res=%d)\n",
+				i, cqe->res);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+		if (o->controllen != 0) {
+			fprintf(stderr,
+				"send %d: unexpected controllen=%u\n",
+				i, o->controllen);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+
+		plen = io_uring_recvmsg_payload_length(o, cqe->res, &msg);
+		pdata = io_uring_recvmsg_payload(o, &msg);
+
+		if (plen != (unsigned int)(cqe->res - hdr)) {
+			fprintf(stderr,
+				"send %d: payload_length=%u, expected %d\n",
+				i, plen, cqe->res - hdr);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+		if (memcmp(pdata, stream + stream_cursor, plen) != 0) {
+			fprintf(stderr,
+				"send %d: payload content mismatch at cursor %zu, plen %u\n",
+				i, stream_cursor, plen);
+			io_uring_cqe_seen(&ring, cqe);
+			goto out_close;
+		}
+
+		stream_cursor += plen;
+		bid_offset[bid] += cqe->res;
+
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	if (stream_cursor != expected_payload) {
+		fprintf(stderr, "payload mismatch: got %zu, expected %zu\n",
+			stream_cursor, expected_payload);
+		goto out_close;
+	}
+
+	/*
+	 * Four bids expected: 0 (natural advance), 1 (first retire
+	 * target), 2 (second retire target), 3 (third retire target).
+	 * Anything else means the retire path didn't behave as planned.
+	 */
+	if (seen_bids != 4) {
+		fprintf(stderr, "expected 4 distinct bids, saw %d\n",
+			seen_bids);
+		goto out_close;
+	}
+
+	ret_val = T_EXIT_PASS;
+
+out_close:
+	close(fds[0]);
+	close(fds[1]);
+out:
+	io_uring_queue_exit(&ring);
+	if (buf_mem)
+		munmap(buf_mem, NR_BUFS * BUF_SIZE);
+	if (br) {
+		size_t ring_size = NR_BUFS * sizeof(struct io_uring_buf);
+		int page_size = sysconf(_SC_PAGESIZE);
+
+		ring_size = (ring_size + page_size - 1) & ~(page_size - 1);
+		munmap(br, ring_size);
+	}
+	return ret_val;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc > 1)
+		return T_EXIT_SKIP;
+
+	return test();
+}