blob: c2a6848e9cd08c9529f66e30092fc863c03b6942 [file] [log] [blame]
/* SPDX-License-Identifier: MIT */
/*
* Description: run various CQ ring overflow tests
*
*/
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#include "helpers.h"
#include "liburing.h"
#define FILE_SIZE (256 * 1024)
#define BS 4096
#define BUFFERS (FILE_SIZE / BS)
static struct iovec *vecs;
#define ENTRIES 8
/*
* io_uring has rare cases where CQEs are lost.
* This happens when there is no space in the CQ ring, and also there is no
* GFP_ATOMIC memory available. In reality this probably means that the process
* is about to be killed as many other things might start failing, but we still
* want to test that liburing and the kernel deal with this properly. The fault
* injection framework allows us to test this scenario. Unfortunately this
* requires some system wide changes and so we do not enable this by default.
* The tests in this file should work in both cases (where overflows are queued
* and where they are dropped) on recent kernels.
*
* In order to test dropped CQEs you should enable fault injection in the kernel
* config:
*
* CONFIG_FAULT_INJECTION=y
* CONFIG_FAILSLAB=y
* CONFIG_FAULT_INJECTION_DEBUG_FS=y
*
* and then run the test as follows:
* echo Y > /sys/kernel/debug/failslab/task-filter
* echo 100 > /sys/kernel/debug/failslab/probability
* echo 0 > /sys/kernel/debug/failslab/verbose
* echo 100000 > /sys/kernel/debug/failslab/times
* bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
*/
static int test_io(const char *file, unsigned long usecs, unsigned *drops,
int fault)
{
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
struct io_uring_params p;
unsigned reaped, total;
struct io_uring ring;
int nodrop, i, fd, ret;
bool cqe_dropped = false;
fd = open(file, O_RDONLY | O_DIRECT);
if (fd < 0) {
if (errno == EINVAL)
return T_EXIT_SKIP;
perror("file open");
return T_EXIT_FAIL;
}
memset(&p, 0, sizeof(p));
ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
if (ret) {
close(fd);
fprintf(stderr, "ring create failed: %d\n", ret);
return T_EXIT_FAIL;
}
nodrop = 0;
if (p.features & IORING_FEAT_NODROP)
nodrop = 1;
total = 0;
for (i = 0; i < BUFFERS / 2; i++) {
off_t offset;
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
fprintf(stderr, "sqe get failed\n");
goto err;
}
offset = BS * (rand() % BUFFERS);
if (fault && i == ENTRIES + 4) {
free(vecs[i].iov_base);
vecs[i].iov_base = NULL;
}
io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
ret = io_uring_submit(&ring);
if (nodrop && ret == -EBUSY) {
*drops = 1;
total = i;
break;
} else if (ret != 1) {
fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
total = i;
break;
}
total++;
}
if (*drops)
goto reap_it;
usleep(usecs);
for (i = total; i < BUFFERS; i++) {
off_t offset;
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
fprintf(stderr, "sqe get failed\n");
goto err;
}
offset = BS * (rand() % BUFFERS);
io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
ret = io_uring_submit(&ring);
if (nodrop && ret == -EBUSY) {
*drops = 1;
break;
} else if (ret != 1) {
fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
break;
}
total++;
}
reap_it:
reaped = 0;
do {
if (nodrop && !cqe_dropped) {
/* nodrop should never lose events unless cqe_dropped */
if (reaped == total)
break;
} else {
if (reaped + *ring.cq.koverflow == total)
break;
}
ret = io_uring_wait_cqe(&ring, &cqe);
if (nodrop && ret == -EBADR) {
cqe_dropped = true;
continue;
} else if (ret) {
fprintf(stderr, "wait_cqe=%d\n", ret);
goto err;
}
if (cqe->res != BS) {
if (!(fault && cqe->res == -EFAULT)) {
fprintf(stderr, "cqe res %d, wanted %d\n",
cqe->res, BS);
goto err;
}
}
io_uring_cqe_seen(&ring, cqe);
reaped++;
} while (1);
if (!io_uring_peek_cqe(&ring, &cqe)) {
fprintf(stderr, "found unexpected completion\n");
goto err;
}
if (!nodrop || cqe_dropped) {
*drops = *ring.cq.koverflow;
} else if (*ring.cq.koverflow) {
fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
goto err;
}
io_uring_queue_exit(&ring);
close(fd);
return T_EXIT_PASS;
err:
if (fd != -1)
close(fd);
io_uring_queue_exit(&ring);
return T_EXIT_SKIP;
}
static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
{
struct io_uring_cqe *cqe;
int i, ret = 0, seq = 0;
unsigned int start_overflow = *ring->cq.koverflow;
bool dropped = false;
for (i = 0; i < nr_events; i++) {
if (do_wait)
ret = io_uring_wait_cqe(ring, &cqe);
else
ret = io_uring_peek_cqe(ring, &cqe);
if (do_wait && ret == -EBADR) {
unsigned int this_drop = *ring->cq.koverflow -
start_overflow;
dropped = true;
start_overflow = *ring->cq.koverflow;
assert(this_drop > 0);
i += (this_drop - 1);
continue;
} else if (ret) {
if (ret != -EAGAIN)
fprintf(stderr, "cqe peek failed: %d\n", ret);
break;
}
if (!dropped && cqe->user_data != seq) {
fprintf(stderr, "cqe sequence out-of-order\n");
fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
seq);
return -EINVAL;
}
seq++;
io_uring_cqe_seen(ring, cqe);
}
return i ? i : ret;
}
/*
* Submit some NOPs and watch if the overflow is correct
*/
static int test_overflow(void)
{
struct io_uring ring;
struct io_uring_params p;
struct io_uring_sqe *sqe;
unsigned pending;
int ret, i, j;
memset(&p, 0, sizeof(p));
ret = io_uring_queue_init_params(4, &ring, &p);
if (ret) {
fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
return 1;
}
/* submit 4x4 SQEs, should overflow the ring by 8 */
pending = 0;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
sqe = io_uring_get_sqe(&ring);
if (!sqe) {
fprintf(stderr, "get sqe failed\n");
goto err;
}
io_uring_prep_nop(sqe);
sqe->user_data = (i * 4) + j;
}
ret = io_uring_submit(&ring);
if (ret == 4) {
pending += 4;
continue;
}
if (p.features & IORING_FEAT_NODROP) {
if (ret == -EBUSY)
break;
}
fprintf(stderr, "sqe submit failed: %d\n", ret);
goto err;
}
/* we should now have 8 completions ready */
ret = reap_events(&ring, pending, 0);
if (ret < 0)
goto err;
if (!(p.features & IORING_FEAT_NODROP)) {
if (*ring.cq.koverflow != 8) {
fprintf(stderr, "cq ring overflow %d, expected 8\n",
*ring.cq.koverflow);
goto err;
}
}
io_uring_queue_exit(&ring);
return 0;
err:
io_uring_queue_exit(&ring);
return 1;
}
static void submit_one_nop(struct io_uring *ring, int ud)
{
struct io_uring_sqe *sqe;
int ret;
sqe = io_uring_get_sqe(ring);
assert(sqe);
io_uring_prep_nop(sqe);
sqe->user_data = ud;
ret = io_uring_submit(ring);
assert(ret == 1);
}
/*
* Create an overflow condition and ensure that SQEs are still processed
*/
static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
bool defer)
{
struct io_uring ring;
struct io_uring_params p;
int ret, i, j, ud, cqe_count;
unsigned int count;
int const N = 8;
int const LOOPS = 128;
int const QUEUE_LENGTH = 1024;
int completions[N];
int queue[QUEUE_LENGTH];
int queued = 0;
int outstanding = 0;
bool cqe_dropped = false;
memset(&completions, 0, sizeof(int) * N);
memset(&p, 0, sizeof(p));
p.cq_entries = 2 * cqe_multiple;
p.flags |= IORING_SETUP_CQSIZE;
if (poll)
p.flags |= IORING_SETUP_IOPOLL;
if (defer)
p.flags |= IORING_SETUP_SINGLE_ISSUER |
IORING_SETUP_DEFER_TASKRUN;
ret = io_uring_queue_init_params(2, &ring, &p);
if (ret) {
fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
return 1;
}
assert(p.cq_entries < N);
/* submit N SQEs, some should overflow */
for (i = 0; i < N; i++) {
submit_one_nop(&ring, i);
outstanding++;
}
for (i = 0; i < LOOPS; i++) {
struct io_uring_cqe *cqes[N];
if (io_uring_cq_has_overflow(&ring)) {
/*
* Flush any overflowed CQEs and process those. Actively
* flush these to make sure CQEs arrive in vague order
* of being sent.
*/
ret = io_uring_get_events(&ring);
if (ret != 0) {
fprintf(stderr,
"io_uring_get_events returned %d\n",
ret);
goto err;
}
} else if (!cqe_dropped) {
for (j = 0; j < queued; j++) {
submit_one_nop(&ring, queue[j]);
outstanding++;
}
queued = 0;
}
/* We have lost some random cqes, stop if no remaining. */
if (cqe_dropped && outstanding == *ring.cq.koverflow)
break;
ret = io_uring_wait_cqe(&ring, &cqes[0]);
if (ret == -EBADR) {
cqe_dropped = true;
fprintf(stderr, "CQE dropped\n");
continue;
} else if (ret != 0) {
fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
goto err;
}
cqe_count = 1;
if (batch) {
ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
if (ret < 0) {
fprintf(stderr,
"io_uring_peek_batch_cqe failed %d\n",
ret);
goto err;
}
cqe_count = ret;
}
for (j = 0; j < cqe_count; j++) {
assert(cqes[j]->user_data < N);
ud = cqes[j]->user_data;
completions[ud]++;
assert(queued < QUEUE_LENGTH);
queue[queued++] = (int)ud;
}
io_uring_cq_advance(&ring, cqe_count);
outstanding -= cqe_count;
}
/* See if there were any drops by flushing the CQ ring *and* overflow */
do {
struct io_uring_cqe *cqe;
ret = io_uring_get_events(&ring);
if (ret < 0) {
if (ret == -EBADR) {
fprintf(stderr, "CQE dropped\n");
cqe_dropped = true;
break;
}
goto err;
}
if (outstanding && !io_uring_cq_ready(&ring))
ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
if (ret && ret != -ETIME) {
if (ret == -EBADR) {
fprintf(stderr, "CQE dropped\n");
cqe_dropped = true;
break;
}
fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
goto err;
}
count = io_uring_cq_ready(&ring);
io_uring_cq_advance(&ring, count);
outstanding -= count;
} while (count);
io_uring_queue_exit(&ring);
/* Make sure that completions come back in the same order they were
* sent. If they come back unfairly then this will concentrate on a
* couple of indices.
*/
for (i = 1; !cqe_dropped && i < N; i++) {
if (abs(completions[i] - completions[i - 1]) > 1) {
fprintf(stderr, "bad completion size %d %d\n",
completions[i], completions[i - 1]);
goto err;
}
}
return 0;
err:
io_uring_queue_exit(&ring);
return 1;
}
int main(int argc, char *argv[])
{
const char *fname = ".cq-overflow";
unsigned iters, drops;
unsigned long usecs;
int ret;
int i;
bool can_defer;
if (argc > 1)
return T_EXIT_SKIP;
can_defer = t_probe_defer_taskrun();
for (i = 0; i < 16; i++) {
bool batch = i & 1;
int mult = (i & 2) ? 1 : 2;
bool poll = i & 4;
bool defer = i & 8;
if (defer && !can_defer)
continue;
ret = test_overflow_handling(batch, mult, poll, defer);
if (ret) {
fprintf(stderr, "test_overflow_handling("
"batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
batch, mult, poll, defer);
goto err;
}
}
ret = test_overflow();
if (ret) {
fprintf(stderr, "test_overflow failed\n");
return ret;
}
t_create_file(fname, FILE_SIZE);
vecs = t_create_buffers(BUFFERS, BS);
iters = 0;
usecs = 1000;
do {
drops = 0;
ret = test_io(fname, usecs, &drops, 0);
if (ret == T_EXIT_SKIP)
break;
else if (ret != T_EXIT_PASS) {
fprintf(stderr, "test_io nofault failed\n");
goto err;
}
if (drops)
break;
usecs = (usecs * 12) / 10;
iters++;
} while (iters < 40);
if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
fprintf(stderr, "test_io nofault failed\n");
goto err;
}
if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
fprintf(stderr, "test_io fault failed\n");
goto err;
}
unlink(fname);
if(vecs != NULL) {
for (i = 0; i < BUFFERS; i++)
free(vecs[i].iov_base);
}
free(vecs);
return T_EXIT_PASS;
err:
unlink(fname);
if(vecs != NULL) {
for (i = 0; i < BUFFERS; i++)
free(vecs[i].iov_base);
}
free(vecs);
return T_EXIT_FAIL;
}