t/io_uring: Vectored fixed buffer test support for nvme passthrough path

The current kernel NVMe passthrough path already supports vectored IO
when using fixed buffers, but fio has not yet adapted it. This patch
aims to add a corresponding test interface in fio.

Test results:

taskset -c 1 t/io_uring -b512 -d64 -c2 -s2 -p1 -F1 -B1 -O0 -n1 -V1 -u1 -r4 /dev/ng1n1
submitter=0, tid=6179, file=/dev/ng1n1, nfiles=1, node=-1
polled=1, fixedbufs=1, register_files=1, buffered=1, QD=64
Engine=io_uring, sq_ring=64, cq_ring=64
IOPS=289.78K, BW=141MiB/s, IOS/call=1/1
IOPS=294.68K, BW=143MiB/s, IOS/call=1/1
IOPS=295.26K, BW=144MiB/s, IOS/call=1/1
Exiting on timeout
Maximum IOPS=295.26K

taskset -c 1 t/io_uring -b512 -d64 -c2 -s2 -p1 -F1 -B1 -O0 -n1 -V0 -u1 -r4 /dev/ng1n1
submitter=0, tid=6183, file=/dev/ng1n1, nfiles=1, node=-1
polled=1, fixedbufs=1, register_files=1, buffered=1, QD=64
Engine=io_uring, sq_ring=64, cq_ring=64
IOPS=292.31K, BW=142MiB/s, IOS/call=1/1
IOPS=295.79K, BW=144MiB/s, IOS/call=1/1
IOPS=290.78K, BW=141MiB/s, IOS/call=1/1
Exiting on timeout
Maximum IOPS=295.79K

Signed-off-by: Xiaobing Li <xiaobing.li@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/t/io_uring.c b/t/io_uring.c
index 581cfe5..2a5d0c8 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -146,14 +146,13 @@
 static int register_ring = 1;	/* register ring */
 static int use_sync = 0;	/* use preadv2 */
 static int numa_placement = 0;	/* set to node of device */
+static int vectored = 0;	/* use vectored IO */
 static int pt = 0;		/* passthrough I/O or not */
 
 static unsigned long tsc_rate;
 
 #define TSC_RATE_FILE	"tsc-rate"
 
-static int vectored = 1;
-
 static float plist[] = { 1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0,
 			80.0, 90.0, 95.0, 99.0, 99.5, 99.9, 99.95, 99.99 };
 static int plist_len = 17;
@@ -461,28 +460,6 @@
 	return ret;
 }
 
-static void io_uring_probe(int fd)
-{
-	struct io_uring_probe *p;
-	int ret;
-
-	p = calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
-	if (!p)
-		return;
-
-	ret = syscall(__NR_io_uring_register, fd, IORING_REGISTER_PROBE, p, 256);
-	if (ret < 0)
-		goto out;
-
-	if (IORING_OP_READ > p->ops_len)
-		goto out;
-
-	if ((p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED))
-		vectored = 0;
-out:
-	free(p);
-}
-
 static int io_uring_enter(struct submitter *s, unsigned int to_submit,
 			  unsigned int min_complete, unsigned int flags)
 {
@@ -630,6 +607,12 @@
 		sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
 		sqe->buf_index = 0;
 	}
+	if (vectored) {
+		sqe->cmd_op = NVME_URING_CMD_IO_VEC;
+		cmd->addr = (unsigned long) &s->iovecs[index];
+		cmd->data_len = 1;
+		sqe->buf_index = 0;
+	}
 	cmd->nsid = f->nsid;
 	cmd->opcode = 2;
 }
@@ -927,8 +910,6 @@
 	}
 	s->ring_fd = s->enter_ring_fd = fd;
 
-	io_uring_probe(fd);
-
 	if (fixedbufs) {
 		struct rlimit rlim;
 
@@ -1521,11 +1502,12 @@
 		" -S <bool> : Use sync IO (preadv2), default %d\n"
 		" -X <bool> : Use registered ring %d\n"
 		" -P <bool> : Automatically place on device home node %d\n"
+		" -V <bool> : Vectored IO, default %d\n"
 		" -u <bool> : Use nvme-passthrough I/O, default %d\n",
 		argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
 		fixedbufs, register_files, nthreads, !buffered, do_nop,
 		stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio,
-		use_sync, register_ring, numa_placement, pt);
+		use_sync, register_ring, numa_placement, vectored, pt);
 	exit(status);
 }
 
@@ -1584,7 +1566,7 @@
 	if (!do_nop && argc < 2)
 		usage(argv[0], 1);
 
-	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:P:u:h?")) != -1) {
+	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:a:r:D:R:X:S:P:V:u:h?")) != -1) {
 		switch (opt) {
 		case 'a':
 			aio = !!atoi(optarg);
@@ -1662,6 +1644,9 @@
 		case 'P':
 			numa_placement = !!atoi(optarg);
 			break;
+		case 'V':
+			vectored = !!atoi(optarg);
+			break;
 		case 'u':
 			pt = !!atoi(optarg);
 			break;