blob: 0cbe2c7456d35e86b5b9733bfcc631dfc2aa0f6e [file] [log] [blame]
/* SPDX-License-Identifier: MIT */
#include <sys/mman.h>
#include <linux/mman.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <poll.h>
#include <pthread.h>
#include <errno.h>
#include "helpers.h"
#include "liburing.h"
static bool has_regvec;
struct buf_desc {
char *buf_wr;
char *buf_rd;
size_t size;
struct io_uring ring;
bool ring_init;
bool fixed;
int buf_idx;
bool rw;
};
#define BUF_BASE_IDX 1
static int page_sz;
static void probe_support(void)
{
struct io_uring_probe *p;
struct io_uring ring;
int ret = 0;
ret = io_uring_queue_init(1, &ring, 0);
if (ret) {
fprintf(stderr, "queue init failed: %d\n", ret);
exit(ret);
}
p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
ret = io_uring_register_probe(&ring, p, 256);
/* if we don't have PROBE_REGISTER, we don't have OP_READ/WRITE */
if (ret == -EINVAL)
goto out;
if (ret) {
fprintf(stderr, "register_probe: %d\n", ret);
goto out;
}
has_regvec = p->ops_len > IORING_OP_READV_FIXED &&
(p->ops[IORING_OP_READV_FIXED].flags & IO_URING_OP_SUPPORTED);
out:
io_uring_queue_exit(&ring);
if (p)
free(p);
}
static void bind_ring(struct buf_desc *bd, struct io_uring *ring, unsigned buf_idx)
{
size_t size = bd->size;
struct iovec iov;
int ret;
iov.iov_len = size;
iov.iov_base = bd->buf_wr;
ret = io_uring_register_buffers_update_tag(ring, buf_idx, &iov, NULL, 1);
if (ret != 1) {
if (geteuid()) {
fprintf(stderr, "Not root, skipping\n");
exit(T_EXIT_SKIP);
}
fprintf(stderr, "buf reg failed %i\n", ret);
exit(1);
}
bd->buf_idx = buf_idx;
}
static void reinit_ring(struct buf_desc *bd)
{
struct io_uring *ring = &bd->ring;
int ret;
if (bd->ring_init) {
io_uring_queue_exit(ring);
bd->ring_init = false;
}
ret = io_uring_queue_init(32, ring, 0);
if (ret) {
fprintf(stderr, "ring init error %i\n", ret);
exit(1);
}
ret = io_uring_register_buffers_sparse(ring, 128);
if (ret) {
fprintf(stderr, "table reg error %i\n", ret);
exit(1);
}
bind_ring(bd, &bd->ring, BUF_BASE_IDX);
bd->ring_init = true;
}
static void init_buffers(struct buf_desc *bd, size_t size)
{
void *start;
void *mem;
start = mmap(NULL, size + page_sz * 2, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (start == MAP_FAILED) {
fprintf(stderr, "Unable to preserve the page mixture memory.\n");
exit(1);
}
mem = mmap(start + page_sz, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (mem == MAP_FAILED) {
fprintf(stderr, "mmap fail\n");
exit(1);
}
memset(bd, 0, sizeof(*bd));
bd->size = size;
bd->buf_wr = mem;
bd->buf_rd = malloc(size);
if (!bd->buf_rd) {
fprintf(stderr, "malloc fail\n");
exit(1);
}
}
static int verify_data(struct buf_desc *bd, struct iovec *wr_vecs, int nr_iovec,
int fd)
{
int iov_idx, ret;
for (iov_idx = 0; iov_idx < nr_iovec; iov_idx++) {
struct iovec *vec = &wr_vecs[iov_idx];
size_t seg_size = vec->iov_len;
size_t read_bytes = 0;
while (1) {
ret = read(fd, bd->buf_rd + read_bytes, seg_size - read_bytes);
if (ret < 0) {
fprintf(stderr, "read error %i", ret);
return 1;
}
read_bytes += ret;
if (read_bytes == seg_size)
break;
if (ret == 0) {
fprintf(stderr, "can't read %i", ret);
return 2;
}
}
ret = memcmp(bd->buf_rd, vec->iov_base, seg_size);
if (ret != 0) {
fprintf(stderr, "data mismatch %i\n", ret);
return 3;
}
}
return 0;
}
struct verify_data {
struct buf_desc *bd;
struct iovec *vecs;
int nr_vec;
int fd;
};
static void *verify_thread_cb(void *data)
{
struct verify_data *vd = data;
int ret;
ret = verify_data(vd->bd, vd->vecs, vd->nr_vec, vd->fd);
return (void *)(unsigned long)ret;
}
static int test_rw(struct buf_desc *bd, struct iovec *vecs, int nr_vec, int fd_wr)
{
unsigned buf_idx = bd->buf_idx;
struct io_uring *ring = &bd->ring;
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
int ret;
sqe = io_uring_get_sqe(ring);
if (bd->fixed)
io_uring_prep_writev_fixed(sqe, fd_wr, vecs, nr_vec, 0, 0, buf_idx);
else
io_uring_prep_writev(sqe, fd_wr, vecs, nr_vec, 0);
ret = io_uring_submit(ring);
if (ret != 1) {
fprintf(stderr, "submit failed %i\n", ret);
exit(1);
}
ret = io_uring_wait_cqe(ring, &cqe);
if (ret) {
fprintf(stderr, "wait_cqe=%d\n", ret);
exit(1);
}
ret = cqe->res;
io_uring_cqe_seen(ring, cqe);
return ret;
}
static int test_sendzc(struct buf_desc *bd, struct iovec *vecs, int nr_vec, int fd_wr)
{
unsigned buf_idx = bd->buf_idx;
struct io_uring *ring = &bd->ring;
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
int ret, cqe_ret, more;
struct msghdr msghdr;
memset(&msghdr, 0, sizeof(msghdr));
msghdr.msg_iov = vecs;
msghdr.msg_iovlen = nr_vec;
sqe = io_uring_get_sqe(ring);
if (bd->fixed)
io_uring_prep_sendmsg_zc_fixed(sqe, fd_wr, &msghdr, 0, buf_idx);
else
io_uring_prep_sendmsg_zc(sqe, fd_wr, &msghdr, 0);
ret = io_uring_submit(ring);
if (ret != 1) {
fprintf(stderr, "submit failed %i\n", ret);
exit(1);
}
ret = io_uring_wait_cqe(ring, &cqe);
if (ret) {
fprintf(stderr, "wait_cqe=%d\n", ret);
exit(1);
}
cqe_ret = cqe->res;
more = cqe->flags & IORING_CQE_F_MORE;
io_uring_cqe_seen(ring, cqe);
if (more) {
ret = io_uring_wait_cqe(ring, &cqe);
if (ret) {
fprintf(stderr, "wait_cqe=%d\n", ret);
exit(1);
}
io_uring_cqe_seen(ring, cqe);
}
return cqe_ret;
}
static int test_vec(struct buf_desc *bd, struct iovec *vecs, int nr_vec,
bool expect_fail, int *cqe_ret)
{
struct sockaddr_storage addr;
int sock_server, sock_client;
struct verify_data vd;
size_t total_len;
int i, ret;
void *verify_res;
pthread_t th;
ret = t_create_socketpair_ip(&addr, &sock_client, &sock_server,
true, true, false, true, "::1");
if (ret) {
fprintf(stderr, "sock prep failed %d\n", ret);
return 1;
}
for (i = 0; i < bd->size; i++)
bd->buf_wr[i] = i;
memset(bd->buf_rd, 0, bd->size);
total_len = t_iovec_data_length(vecs, nr_vec);
vd.bd = bd;
vd.vecs = vecs;
vd.nr_vec = nr_vec;
vd.fd = sock_server;
if (!expect_fail) {
ret = pthread_create(&th, NULL, verify_thread_cb, &vd);
if (ret) {
fprintf(stderr, "pthread_create failed %i\n", ret);
return ret;
}
}
if (bd->rw)
ret = test_rw(bd, vecs, nr_vec, sock_client);
else
ret = test_sendzc(bd, vecs, nr_vec, sock_client);
*cqe_ret = ret;
if (!expect_fail && ret != total_len) {
fprintf(stderr, "invalid cqe %i, expected %lu\n",
ret, (unsigned long)total_len);
return 1;
}
if (!expect_fail) {
pthread_join(th, &verify_res);
ret = (int)(unsigned long)verify_res;
if (ret) {
fprintf(stderr, "verify failed %i\n", ret);
return 1;
}
}
close(sock_client);
close(sock_server);
return 0;
}
struct work {
struct iovec *vecs;
unsigned nr_vecs;
};
static int test_sequence(struct buf_desc *bd, unsigned nr, struct work *ws)
{
int i, ret;
int cqe_ret;
reinit_ring(bd);
for (i = 0; i < nr; i++) {
ret = test_vec(bd, ws[i].vecs, ws[i].nr_vecs, false, &cqe_ret);
if (ret) {
fprintf(stderr, "sequence failed, idx %i/%i\n", i, nr);
return ret;
}
}
return 0;
}
static void test_basic(struct buf_desc *bd)
{
void *p = bd->buf_wr;
int ret;
struct iovec iov_page = { .iov_base = p,
.iov_len = page_sz, };
struct iovec iov_inner = { .iov_base = p + 1,
.iov_len = 3, };
struct iovec iov_maxbvec = { .iov_base = p + page_sz - 1,
.iov_len = page_sz + 2, };
struct iovec iov_big = { .iov_base = p,
.iov_len = page_sz * 12 + 33, };
struct iovec iov_big_unalign = { .iov_base = p + 10,
.iov_len = page_sz * 7 + 41, };
struct iovec iov_full = { .iov_base = p,
.iov_len = bd->size, };
struct iovec iov_right1 = { .iov_base = p + bd->size - page_sz + 5,
.iov_len = page_sz - 5 };
struct iovec iov_right2 = { .iov_base = p + bd->size - page_sz - 5,
.iov_len = page_sz + 5 };
struct iovec iov_full_unalign = { .iov_base = p + 1,
.iov_len = bd->size - 1, };
struct iovec vecs[] = {
iov_page,
iov_big,
iov_inner,
iov_big_unalign,
iov_big_unalign,
};
struct iovec vecs_basic[] = { iov_page, iov_page, iov_page };
struct iovec vecs_full[] = { iov_full, iov_full, iov_full };
struct iovec vecs_full_unalign[] = { iov_full_unalign, iov_full_unalign,
iov_full_unalign };
struct iovec vecs_maxsegs[] = { iov_maxbvec, iov_maxbvec, iov_maxbvec,
iov_maxbvec, iov_maxbvec, iov_maxbvec};
ret = test_sequence(bd, 1, (struct work[]) {
{ &iov_page, 1 },
{ vecs, 1 }});
if (ret) {
fprintf(stderr, "seq failure: basic aligned, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs, 1 },
{ vecs, 1 }});
if (ret) {
fprintf(stderr, "seq failure: basic aligned, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs + 1, 1 },
{ vecs + 1, 1 }});
if (ret) {
fprintf(stderr, "seq failure: multi page buffer, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs + 2, 1 },
{ vecs + 2, 1 }});
if (ret) {
fprintf(stderr, "seq failure: misaligned buffer, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs + 3, 1 },
{ vecs + 3, 1 }});
if (ret) {
fprintf(stderr, "seq failure: misaligned multipage buffer, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs, 1 },
{ vecs + 3, 1 }});
if (ret) {
fprintf(stderr, "seq failure: realloc + increase bvec, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs + 3, 1 },
{ vecs + 0, 1 }});
if (ret) {
fprintf(stderr, "seq failure: realloc + decrease bvec, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 2, (struct work[]) {
{ vecs, 4 },
{ vecs, 4 }});
if (ret) {
fprintf(stderr, "seq failure: multisegment, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs, 2 },
{ vecs, 3 },
{ vecs, 4 }});
if (ret) {
fprintf(stderr, "seq failure: multisegment 2, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_basic, 1 },
{ vecs_basic, 2 },
{ vecs_basic, 3 }});
if (ret) {
fprintf(stderr, "seq failure: increase iovec, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_basic, 3 },
{ vecs_basic, 2 },
{ vecs_basic, 1 }});
if (ret) {
fprintf(stderr, "seq failure: decrease iovec, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ &iov_right1, 1 },
{ &iov_right2, 1 },
{ &iov_right1, 1 }});
if (ret) {
fprintf(stderr, "seq failure: right aligned, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_full, 1 },
{ vecs_full, 1 },
{ vecs_full, 3 }});
if (ret) {
fprintf(stderr, "seq failure: full size, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_full_unalign, 1 },
{ vecs_full_unalign, 1 },
{ vecs_full_unalign, 3 }});
if (ret) {
fprintf(stderr, "seq failure: full size unsigned, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_maxsegs, 1 },
{ vecs_maxsegs, 2 },
{ vecs_maxsegs, 3 }});
if (ret) {
fprintf(stderr, "seq failure: overestimated segments, %i\n", ret);
exit(1);
}
ret = test_sequence(bd, 3, (struct work[]) {
{ vecs_maxsegs, 6 },
{ vecs_maxsegs, 6 },
{ vecs_maxsegs, 6 }});
if (ret) {
fprintf(stderr, "seq failure: overestimated segments 2, %i\n", ret);
exit(1);
}
}
static void test_fail(struct buf_desc *bd)
{
int ret, cqe_ret;
void *p = bd->buf_wr;
struct iovec iov_0len = { .iov_base = p, .iov_len = 0 };
struct iovec iov_0buf = { .iov_base = 0, .iov_len = 1 };
struct iovec iov_inv = { .iov_base = (void *)-1U, .iov_len = 1 };
struct iovec iov_under = { .iov_base = p - 1, .iov_len = 1 };
struct iovec iov_over = { .iov_base = p + bd->size, .iov_len = 1 };
struct iovec vecs_0[] = { iov_0len, iov_0len, iov_0len, iov_0len,
iov_0len, iov_0len, iov_0len, iov_0len };
reinit_ring(bd);
ret = test_vec(bd, vecs_0, 8, true, &cqe_ret);
if (ret || cqe_ret > 0) {
fprintf(stderr, "0 length test failed %i, cqe %i\n",
ret, cqe_ret);
exit(1);
}
reinit_ring(bd);
ret = test_vec(bd, &iov_0buf, 1, true, &cqe_ret);
if (ret || cqe_ret >= 0) {
fprintf(stderr, "0 buf test failed %i, cqe %i\n",
ret, cqe_ret);
exit(1);
}
reinit_ring(bd);
ret = test_vec(bd, &iov_inv, 1, true, &cqe_ret);
if (ret || cqe_ret >= 0) {
fprintf(stderr, "inv buf test failed %i, cqe %i\n",
ret, cqe_ret);
exit(1);
}
reinit_ring(bd);
ret = test_vec(bd, &iov_under, 1, true, &cqe_ret);
if (ret || cqe_ret >= 0) {
fprintf(stderr, "inv buf underflow failed %i, cqe %i\n",
ret, cqe_ret);
exit(1);
}
reinit_ring(bd);
ret = test_vec(bd, &iov_over, 1, true, &cqe_ret);
if (ret || cqe_ret >= 0) {
fprintf(stderr, "inv buf overflow failed %i, cqe %i\n",
ret, cqe_ret);
exit(1);
}
}
int main(int argc, char *argv[])
{
struct buf_desc bd = {};
int i = 0;
if (argc > 1)
return T_EXIT_SKIP;
page_sz = sysconf(_SC_PAGESIZE);
probe_support();
if (!has_regvec) {
printf("doesn't support registered vector ops, skip\n");
return T_EXIT_SKIP;
}
init_buffers(&bd, page_sz * 32);
bd.fixed = true;
for (i = 0; i < 2; i++) {
bool rw = i & 1;
bd.rw = rw;
test_basic(&bd);
test_fail(&bd);
}
free(bd.buf_rd);
io_uring_queue_exit(&bd.ring);
return 0;
}