| /* SPDX-License-Identifier: MIT */ |
| /* |
| * Description: run various CQ ring overflow tests |
| * |
| */ |
| #include <errno.h> |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <assert.h> |
| |
| #include "helpers.h" |
| #include "liburing.h" |
| |
| #define FILE_SIZE (256 * 1024) |
| #define BS 4096 |
| #define BUFFERS (FILE_SIZE / BS) |
| |
| static struct iovec *vecs; |
| |
| #define ENTRIES 8 |
| |
| /* |
| * io_uring has rare cases where CQEs are lost. |
| * This happens when there is no space in the CQ ring, and also there is no |
| * GFP_ATOMIC memory available. In reality this probably means that the process |
| * is about to be killed as many other things might start failing, but we still |
| * want to test that liburing and the kernel deal with this properly. The fault |
| * injection framework allows us to test this scenario. Unfortunately this |
| * requires some system wide changes and so we do not enable this by default. |
| * The tests in this file should work in both cases (where overflows are queued |
| * and where they are dropped) on recent kernels. |
| * |
| * In order to test dropped CQEs you should enable fault injection in the kernel |
| * config: |
| * |
| * CONFIG_FAULT_INJECTION=y |
| * CONFIG_FAILSLAB=y |
| * CONFIG_FAULT_INJECTION_DEBUG_FS=y |
| * |
| * and then run the test as follows: |
| * echo Y > /sys/kernel/debug/failslab/task-filter |
| * echo 100 > /sys/kernel/debug/failslab/probability |
| * echo 0 > /sys/kernel/debug/failslab/verbose |
| * echo 100000 > /sys/kernel/debug/failslab/times |
| * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t" |
| */ |
| |
| static int test_io(const char *file, unsigned long usecs, unsigned *drops, |
| int fault) |
| { |
| struct io_uring_sqe *sqe; |
| struct io_uring_cqe *cqe; |
| struct io_uring_params p; |
| unsigned reaped, total; |
| struct io_uring ring; |
| int nodrop, i, fd, ret; |
| bool cqe_dropped = false; |
| |
| fd = open(file, O_RDONLY | O_DIRECT); |
| if (fd < 0) { |
| if (errno == EINVAL) |
| return T_EXIT_SKIP; |
| perror("file open"); |
| return T_EXIT_FAIL; |
| } |
| |
| memset(&p, 0, sizeof(p)); |
| ret = io_uring_queue_init_params(ENTRIES, &ring, &p); |
| if (ret) { |
| close(fd); |
| fprintf(stderr, "ring create failed: %d\n", ret); |
| return T_EXIT_FAIL; |
| } |
| nodrop = 0; |
| if (p.features & IORING_FEAT_NODROP) |
| nodrop = 1; |
| |
| total = 0; |
| for (i = 0; i < BUFFERS / 2; i++) { |
| off_t offset; |
| |
| sqe = io_uring_get_sqe(&ring); |
| if (!sqe) { |
| fprintf(stderr, "sqe get failed\n"); |
| goto err; |
| } |
| offset = BS * (rand() % BUFFERS); |
| if (fault && i == ENTRIES + 4) { |
| free(vecs[i].iov_base); |
| vecs[i].iov_base = NULL; |
| } |
| io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); |
| |
| ret = io_uring_submit(&ring); |
| if (nodrop && ret == -EBUSY) { |
| *drops = 1; |
| total = i; |
| break; |
| } else if (ret != 1) { |
| fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); |
| total = i; |
| break; |
| } |
| total++; |
| } |
| |
| if (*drops) |
| goto reap_it; |
| |
| usleep(usecs); |
| |
| for (i = total; i < BUFFERS; i++) { |
| off_t offset; |
| |
| sqe = io_uring_get_sqe(&ring); |
| if (!sqe) { |
| fprintf(stderr, "sqe get failed\n"); |
| goto err; |
| } |
| offset = BS * (rand() % BUFFERS); |
| io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); |
| |
| ret = io_uring_submit(&ring); |
| if (nodrop && ret == -EBUSY) { |
| *drops = 1; |
| break; |
| } else if (ret != 1) { |
| fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); |
| break; |
| } |
| total++; |
| } |
| |
| reap_it: |
| reaped = 0; |
| do { |
| if (nodrop && !cqe_dropped) { |
| /* nodrop should never lose events unless cqe_dropped */ |
| if (reaped == total) |
| break; |
| } else { |
| if (reaped + *ring.cq.koverflow == total) |
| break; |
| } |
| ret = io_uring_wait_cqe(&ring, &cqe); |
| if (nodrop && ret == -EBADR) { |
| cqe_dropped = true; |
| continue; |
| } else if (ret) { |
| fprintf(stderr, "wait_cqe=%d\n", ret); |
| goto err; |
| } |
| if (cqe->res != BS) { |
| if (!(fault && cqe->res == -EFAULT)) { |
| fprintf(stderr, "cqe res %d, wanted %d\n", |
| cqe->res, BS); |
| goto err; |
| } |
| } |
| io_uring_cqe_seen(&ring, cqe); |
| reaped++; |
| } while (1); |
| |
| if (!io_uring_peek_cqe(&ring, &cqe)) { |
| fprintf(stderr, "found unexpected completion\n"); |
| goto err; |
| } |
| |
| if (!nodrop || cqe_dropped) { |
| *drops = *ring.cq.koverflow; |
| } else if (*ring.cq.koverflow) { |
| fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow); |
| goto err; |
| } |
| |
| io_uring_queue_exit(&ring); |
| close(fd); |
| return T_EXIT_PASS; |
| err: |
| if (fd != -1) |
| close(fd); |
| io_uring_queue_exit(&ring); |
| return T_EXIT_SKIP; |
| } |
| |
| static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait) |
| { |
| struct io_uring_cqe *cqe; |
| int i, ret = 0, seq = 0; |
| unsigned int start_overflow = *ring->cq.koverflow; |
| bool dropped = false; |
| |
| for (i = 0; i < nr_events; i++) { |
| if (do_wait) |
| ret = io_uring_wait_cqe(ring, &cqe); |
| else |
| ret = io_uring_peek_cqe(ring, &cqe); |
| if (do_wait && ret == -EBADR) { |
| unsigned int this_drop = *ring->cq.koverflow - |
| start_overflow; |
| |
| dropped = true; |
| start_overflow = *ring->cq.koverflow; |
| assert(this_drop > 0); |
| i += (this_drop - 1); |
| continue; |
| } else if (ret) { |
| if (ret != -EAGAIN) |
| fprintf(stderr, "cqe peek failed: %d\n", ret); |
| break; |
| } |
| if (!dropped && cqe->user_data != seq) { |
| fprintf(stderr, "cqe sequence out-of-order\n"); |
| fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data, |
| seq); |
| return -EINVAL; |
| } |
| seq++; |
| io_uring_cqe_seen(ring, cqe); |
| } |
| |
| return i ? i : ret; |
| } |
| |
| /* |
| * Submit some NOPs and watch if the overflow is correct |
| */ |
| static int test_overflow(void) |
| { |
| struct io_uring ring; |
| struct io_uring_params p; |
| struct io_uring_sqe *sqe; |
| unsigned pending; |
| int ret, i, j; |
| |
| memset(&p, 0, sizeof(p)); |
| ret = io_uring_queue_init_params(4, &ring, &p); |
| if (ret) { |
| fprintf(stderr, "io_uring_queue_init failed %d\n", ret); |
| return 1; |
| } |
| |
| /* submit 4x4 SQEs, should overflow the ring by 8 */ |
| pending = 0; |
| for (i = 0; i < 4; i++) { |
| for (j = 0; j < 4; j++) { |
| sqe = io_uring_get_sqe(&ring); |
| if (!sqe) { |
| fprintf(stderr, "get sqe failed\n"); |
| goto err; |
| } |
| |
| io_uring_prep_nop(sqe); |
| sqe->user_data = (i * 4) + j; |
| } |
| |
| ret = io_uring_submit(&ring); |
| if (ret == 4) { |
| pending += 4; |
| continue; |
| } |
| if (p.features & IORING_FEAT_NODROP) { |
| if (ret == -EBUSY) |
| break; |
| } |
| fprintf(stderr, "sqe submit failed: %d\n", ret); |
| goto err; |
| } |
| |
| /* we should now have 8 completions ready */ |
| ret = reap_events(&ring, pending, 0); |
| if (ret < 0) |
| goto err; |
| |
| if (!(p.features & IORING_FEAT_NODROP)) { |
| if (*ring.cq.koverflow != 8) { |
| fprintf(stderr, "cq ring overflow %d, expected 8\n", |
| *ring.cq.koverflow); |
| goto err; |
| } |
| } |
| io_uring_queue_exit(&ring); |
| return 0; |
| err: |
| io_uring_queue_exit(&ring); |
| return 1; |
| } |
| |
| |
| static void submit_one_nop(struct io_uring *ring, int ud) |
| { |
| struct io_uring_sqe *sqe; |
| int ret; |
| |
| sqe = io_uring_get_sqe(ring); |
| assert(sqe); |
| io_uring_prep_nop(sqe); |
| sqe->user_data = ud; |
| ret = io_uring_submit(ring); |
| assert(ret == 1); |
| } |
| |
| /* |
| * Create an overflow condition and ensure that SQEs are still processed |
| */ |
| static int test_overflow_handling(bool batch, int cqe_multiple, bool poll, |
| bool defer) |
| { |
| struct io_uring ring; |
| struct io_uring_params p; |
| int ret, i, j, ud, cqe_count; |
| unsigned int count; |
| int const N = 8; |
| int const LOOPS = 128; |
| int const QUEUE_LENGTH = 1024; |
| int completions[N]; |
| int queue[QUEUE_LENGTH]; |
| int queued = 0; |
| int outstanding = 0; |
| bool cqe_dropped = false; |
| |
| memset(&completions, 0, sizeof(int) * N); |
| memset(&p, 0, sizeof(p)); |
| p.cq_entries = 2 * cqe_multiple; |
| p.flags |= IORING_SETUP_CQSIZE; |
| |
| if (poll) |
| p.flags |= IORING_SETUP_IOPOLL; |
| |
| if (defer) |
| p.flags |= IORING_SETUP_SINGLE_ISSUER | |
| IORING_SETUP_DEFER_TASKRUN; |
| |
| ret = io_uring_queue_init_params(2, &ring, &p); |
| if (ret) { |
| fprintf(stderr, "io_uring_queue_init failed %d\n", ret); |
| return 1; |
| } |
| |
| assert(p.cq_entries < N); |
| /* submit N SQEs, some should overflow */ |
| for (i = 0; i < N; i++) { |
| submit_one_nop(&ring, i); |
| outstanding++; |
| } |
| |
| for (i = 0; i < LOOPS; i++) { |
| struct io_uring_cqe *cqes[N]; |
| |
| if (io_uring_cq_has_overflow(&ring)) { |
| /* |
| * Flush any overflowed CQEs and process those. Actively |
| * flush these to make sure CQEs arrive in vague order |
| * of being sent. |
| */ |
| ret = io_uring_get_events(&ring); |
| if (ret != 0) { |
| fprintf(stderr, |
| "io_uring_get_events returned %d\n", |
| ret); |
| goto err; |
| } |
| } else if (!cqe_dropped) { |
| for (j = 0; j < queued; j++) { |
| submit_one_nop(&ring, queue[j]); |
| outstanding++; |
| } |
| queued = 0; |
| } |
| |
| /* We have lost some random cqes, stop if no remaining. */ |
| if (cqe_dropped && outstanding == *ring.cq.koverflow) |
| break; |
| |
| ret = io_uring_wait_cqe(&ring, &cqes[0]); |
| if (ret == -EBADR) { |
| cqe_dropped = true; |
| fprintf(stderr, "CQE dropped\n"); |
| continue; |
| } else if (ret != 0) { |
| fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret); |
| goto err; |
| } |
| cqe_count = 1; |
| if (batch) { |
| ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2); |
| if (ret < 0) { |
| fprintf(stderr, |
| "io_uring_peek_batch_cqe failed %d\n", |
| ret); |
| goto err; |
| } |
| cqe_count = ret; |
| } |
| for (j = 0; j < cqe_count; j++) { |
| assert(cqes[j]->user_data < N); |
| ud = cqes[j]->user_data; |
| completions[ud]++; |
| assert(queued < QUEUE_LENGTH); |
| queue[queued++] = (int)ud; |
| } |
| io_uring_cq_advance(&ring, cqe_count); |
| outstanding -= cqe_count; |
| } |
| |
| /* See if there were any drops by flushing the CQ ring *and* overflow */ |
| do { |
| struct io_uring_cqe *cqe; |
| |
| ret = io_uring_get_events(&ring); |
| if (ret < 0) { |
| if (ret == -EBADR) { |
| fprintf(stderr, "CQE dropped\n"); |
| cqe_dropped = true; |
| break; |
| } |
| goto err; |
| } |
| if (outstanding && !io_uring_cq_ready(&ring)) |
| ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL); |
| |
| if (ret && ret != -ETIME) { |
| if (ret == -EBADR) { |
| fprintf(stderr, "CQE dropped\n"); |
| cqe_dropped = true; |
| break; |
| } |
| fprintf(stderr, "wait_cqe_timeout = %d\n", ret); |
| goto err; |
| } |
| count = io_uring_cq_ready(&ring); |
| io_uring_cq_advance(&ring, count); |
| outstanding -= count; |
| } while (count); |
| |
| io_uring_queue_exit(&ring); |
| |
| /* Make sure that completions come back in the same order they were |
| * sent. If they come back unfairly then this will concentrate on a |
| * couple of indices. |
| */ |
| for (i = 1; !cqe_dropped && i < N; i++) { |
| if (abs(completions[i] - completions[i - 1]) > 1) { |
| fprintf(stderr, "bad completion size %d %d\n", |
| completions[i], completions[i - 1]); |
| goto err; |
| } |
| } |
| return 0; |
| err: |
| io_uring_queue_exit(&ring); |
| return 1; |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| const char *fname = ".cq-overflow"; |
| unsigned iters, drops; |
| unsigned long usecs; |
| int ret; |
| int i; |
| bool can_defer; |
| |
| if (argc > 1) |
| return T_EXIT_SKIP; |
| |
| can_defer = t_probe_defer_taskrun(); |
| for (i = 0; i < 16; i++) { |
| bool batch = i & 1; |
| int mult = (i & 2) ? 1 : 2; |
| bool poll = i & 4; |
| bool defer = i & 8; |
| |
| if (defer && !can_defer) |
| continue; |
| |
| ret = test_overflow_handling(batch, mult, poll, defer); |
| if (ret) { |
| fprintf(stderr, "test_overflow_handling(" |
| "batch=%d, mult=%d, poll=%d, defer=%d) failed\n", |
| batch, mult, poll, defer); |
| goto err; |
| } |
| } |
| |
| ret = test_overflow(); |
| if (ret) { |
| fprintf(stderr, "test_overflow failed\n"); |
| return ret; |
| } |
| |
| t_create_file(fname, FILE_SIZE); |
| |
| vecs = t_create_buffers(BUFFERS, BS); |
| |
| iters = 0; |
| usecs = 1000; |
| do { |
| drops = 0; |
| |
| ret = test_io(fname, usecs, &drops, 0); |
| if (ret == T_EXIT_SKIP) |
| break; |
| else if (ret != T_EXIT_PASS) { |
| fprintf(stderr, "test_io nofault failed\n"); |
| goto err; |
| } |
| if (drops) |
| break; |
| usecs = (usecs * 12) / 10; |
| iters++; |
| } while (iters < 40); |
| |
| if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) { |
| fprintf(stderr, "test_io nofault failed\n"); |
| goto err; |
| } |
| |
| if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) { |
| fprintf(stderr, "test_io fault failed\n"); |
| goto err; |
| } |
| |
| unlink(fname); |
| if(vecs != NULL) { |
| for (i = 0; i < BUFFERS; i++) |
| free(vecs[i].iov_base); |
| } |
| free(vecs); |
| return T_EXIT_PASS; |
| err: |
| unlink(fname); |
| if(vecs != NULL) { |
| for (i = 0; i < BUFFERS; i++) |
| free(vecs[i].iov_base); |
| } |
| free(vecs); |
| return T_EXIT_FAIL; |
| } |