| // SPDX-License-Identifier: GPL-2.0 | 
 | /* | 
 |  * FUSE: Filesystem in Userspace | 
 |  * Copyright (c) 2023-2024 DataDirect Networks. | 
 |  */ | 
 |  | 
 | #include "fuse_i.h" | 
 | #include "dev_uring_i.h" | 
 | #include "fuse_dev_i.h" | 
 |  | 
 | #include <linux/fs.h> | 
 | #include <linux/io_uring/cmd.h> | 
 |  | 
 | static bool __read_mostly enable_uring; | 
 | module_param(enable_uring, bool, 0644); | 
 | MODULE_PARM_DESC(enable_uring, | 
 | 		 "Enable userspace communication through io-uring"); | 
 |  | 
 | #define FUSE_URING_IOV_SEGS 2 /* header and payload */ | 
 |  | 
 |  | 
 | bool fuse_uring_enabled(void) | 
 | { | 
 | 	return enable_uring; | 
 | } | 
 |  | 
 | struct fuse_uring_pdu { | 
 | 	struct fuse_ring_ent *ent; | 
 | }; | 
 |  | 
 | static const struct fuse_iqueue_ops fuse_io_uring_ops; | 
 |  | 
 | static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, | 
 | 				   struct fuse_ring_ent *ring_ent) | 
 | { | 
 | 	struct fuse_uring_pdu *pdu = | 
 | 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); | 
 |  | 
 | 	pdu->ent = ring_ent; | 
 | } | 
 |  | 
 | static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) | 
 | { | 
 | 	struct fuse_uring_pdu *pdu = | 
 | 		io_uring_cmd_to_pdu(cmd, struct fuse_uring_pdu); | 
 |  | 
 | 	return pdu->ent; | 
 | } | 
 |  | 
 | static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) | 
 | { | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	struct fuse_conn *fc = ring->fc; | 
 |  | 
 | 	lockdep_assert_held(&queue->lock); | 
 | 	lockdep_assert_held(&fc->bg_lock); | 
 |  | 
 | 	/* | 
 | 	 * Allow one bg request per queue, ignoring global fc limits. | 
 | 	 * This prevents a single queue from consuming all resources and | 
 | 	 * eliminates the need for remote queue wake-ups when global | 
 | 	 * limits are met but this queue has no more waiting requests. | 
 | 	 */ | 
 | 	while ((fc->active_background < fc->max_background || | 
 | 		!queue->active_background) && | 
 | 	       (!list_empty(&queue->fuse_req_bg_queue))) { | 
 | 		struct fuse_req *req; | 
 |  | 
 | 		req = list_first_entry(&queue->fuse_req_bg_queue, | 
 | 				       struct fuse_req, list); | 
 | 		fc->active_background++; | 
 | 		queue->active_background++; | 
 |  | 
 | 		list_move_tail(&req->list, &queue->fuse_req_queue); | 
 | 	} | 
 | } | 
 |  | 
 | static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, | 
 | 			       int error) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	struct fuse_conn *fc = ring->fc; | 
 |  | 
 | 	lockdep_assert_not_held(&queue->lock); | 
 | 	spin_lock(&queue->lock); | 
 | 	ent->fuse_req = NULL; | 
 | 	if (test_bit(FR_BACKGROUND, &req->flags)) { | 
 | 		queue->active_background--; | 
 | 		spin_lock(&fc->bg_lock); | 
 | 		fuse_uring_flush_bg(queue); | 
 | 		spin_unlock(&fc->bg_lock); | 
 | 	} | 
 |  | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (error) | 
 | 		req->out.h.error = error; | 
 |  | 
 | 	clear_bit(FR_SENT, &req->flags); | 
 | 	fuse_request_end(req); | 
 | } | 
 |  | 
 | /* Abort all list queued request on the given ring queue */ | 
 | static void fuse_uring_abort_end_queue_requests(struct fuse_ring_queue *queue) | 
 | { | 
 | 	struct fuse_req *req; | 
 | 	LIST_HEAD(req_list); | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	list_for_each_entry(req, &queue->fuse_req_queue, list) | 
 | 		clear_bit(FR_PENDING, &req->flags); | 
 | 	list_splice_init(&queue->fuse_req_queue, &req_list); | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	/* must not hold queue lock to avoid order issues with fi->lock */ | 
 | 	fuse_dev_end_requests(&req_list); | 
 | } | 
 |  | 
 | void fuse_uring_abort_end_requests(struct fuse_ring *ring) | 
 | { | 
 | 	int qid; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	struct fuse_conn *fc = ring->fc; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		queue = READ_ONCE(ring->queues[qid]); | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		queue->stopped = true; | 
 |  | 
 | 		WARN_ON_ONCE(ring->fc->max_background != UINT_MAX); | 
 | 		spin_lock(&queue->lock); | 
 | 		spin_lock(&fc->bg_lock); | 
 | 		fuse_uring_flush_bg(queue); | 
 | 		spin_unlock(&fc->bg_lock); | 
 | 		spin_unlock(&queue->lock); | 
 | 		fuse_uring_abort_end_queue_requests(queue); | 
 | 	} | 
 | } | 
 |  | 
 | static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list) | 
 | { | 
 | 	struct fuse_ring_ent *ent; | 
 | 	struct fuse_req *req; | 
 |  | 
 | 	ent = list_first_entry_or_null(list, struct fuse_ring_ent, list); | 
 | 	if (!ent) | 
 | 		return false; | 
 |  | 
 | 	req = ent->fuse_req; | 
 |  | 
 | 	return time_is_before_jiffies(req->create_time + | 
 | 				      fc->timeout.req_timeout); | 
 | } | 
 |  | 
 | bool fuse_uring_request_expired(struct fuse_conn *fc) | 
 | { | 
 | 	struct fuse_ring *ring = fc->ring; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	int qid; | 
 |  | 
 | 	if (!ring) | 
 | 		return false; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		queue = READ_ONCE(ring->queues[qid]); | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		spin_lock(&queue->lock); | 
 | 		if (fuse_request_expired(fc, &queue->fuse_req_queue) || | 
 | 		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) || | 
 | 		    ent_list_request_expired(fc, &queue->ent_w_req_queue) || | 
 | 		    ent_list_request_expired(fc, &queue->ent_in_userspace)) { | 
 | 			spin_unlock(&queue->lock); | 
 | 			return true; | 
 | 		} | 
 | 		spin_unlock(&queue->lock); | 
 | 	} | 
 |  | 
 | 	return false; | 
 | } | 
 |  | 
 | void fuse_uring_destruct(struct fuse_conn *fc) | 
 | { | 
 | 	struct fuse_ring *ring = fc->ring; | 
 | 	int qid; | 
 |  | 
 | 	if (!ring) | 
 | 		return; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		struct fuse_ring_queue *queue = ring->queues[qid]; | 
 | 		struct fuse_ring_ent *ent, *next; | 
 |  | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		WARN_ON(!list_empty(&queue->ent_avail_queue)); | 
 | 		WARN_ON(!list_empty(&queue->ent_w_req_queue)); | 
 | 		WARN_ON(!list_empty(&queue->ent_commit_queue)); | 
 | 		WARN_ON(!list_empty(&queue->ent_in_userspace)); | 
 |  | 
 | 		list_for_each_entry_safe(ent, next, &queue->ent_released, | 
 | 					 list) { | 
 | 			list_del_init(&ent->list); | 
 | 			kfree(ent); | 
 | 		} | 
 |  | 
 | 		kfree(queue->fpq.processing); | 
 | 		kfree(queue); | 
 | 		ring->queues[qid] = NULL; | 
 | 	} | 
 |  | 
 | 	kfree(ring->queues); | 
 | 	kfree(ring); | 
 | 	fc->ring = NULL; | 
 | } | 
 |  | 
 | /* | 
 |  * Basic ring setup for this connection based on the provided configuration | 
 |  */ | 
 | static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) | 
 | { | 
 | 	struct fuse_ring *ring; | 
 | 	size_t nr_queues = num_possible_cpus(); | 
 | 	struct fuse_ring *res = NULL; | 
 | 	size_t max_payload_size; | 
 |  | 
 | 	ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT); | 
 | 	if (!ring) | 
 | 		return NULL; | 
 |  | 
 | 	ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *), | 
 | 			       GFP_KERNEL_ACCOUNT); | 
 | 	if (!ring->queues) | 
 | 		goto out_err; | 
 |  | 
 | 	max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); | 
 | 	max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); | 
 |  | 
 | 	spin_lock(&fc->lock); | 
 | 	if (fc->ring) { | 
 | 		/* race, another thread created the ring in the meantime */ | 
 | 		spin_unlock(&fc->lock); | 
 | 		res = fc->ring; | 
 | 		goto out_err; | 
 | 	} | 
 |  | 
 | 	init_waitqueue_head(&ring->stop_waitq); | 
 |  | 
 | 	ring->nr_queues = nr_queues; | 
 | 	ring->fc = fc; | 
 | 	ring->max_payload_sz = max_payload_size; | 
 | 	smp_store_release(&fc->ring, ring); | 
 |  | 
 | 	spin_unlock(&fc->lock); | 
 | 	return ring; | 
 |  | 
 | out_err: | 
 | 	kfree(ring->queues); | 
 | 	kfree(ring); | 
 | 	return res; | 
 | } | 
 |  | 
 | static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, | 
 | 						       int qid) | 
 | { | 
 | 	struct fuse_conn *fc = ring->fc; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	struct list_head *pq; | 
 |  | 
 | 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT); | 
 | 	if (!queue) | 
 | 		return NULL; | 
 | 	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); | 
 | 	if (!pq) { | 
 | 		kfree(queue); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	queue->qid = qid; | 
 | 	queue->ring = ring; | 
 | 	spin_lock_init(&queue->lock); | 
 |  | 
 | 	INIT_LIST_HEAD(&queue->ent_avail_queue); | 
 | 	INIT_LIST_HEAD(&queue->ent_commit_queue); | 
 | 	INIT_LIST_HEAD(&queue->ent_w_req_queue); | 
 | 	INIT_LIST_HEAD(&queue->ent_in_userspace); | 
 | 	INIT_LIST_HEAD(&queue->fuse_req_queue); | 
 | 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue); | 
 | 	INIT_LIST_HEAD(&queue->ent_released); | 
 |  | 
 | 	queue->fpq.processing = pq; | 
 | 	fuse_pqueue_init(&queue->fpq); | 
 |  | 
 | 	spin_lock(&fc->lock); | 
 | 	if (ring->queues[qid]) { | 
 | 		spin_unlock(&fc->lock); | 
 | 		kfree(queue->fpq.processing); | 
 | 		kfree(queue); | 
 | 		return ring->queues[qid]; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * write_once and lock as the caller mostly doesn't take the lock at all | 
 | 	 */ | 
 | 	WRITE_ONCE(ring->queues[qid], queue); | 
 | 	spin_unlock(&fc->lock); | 
 |  | 
 | 	return queue; | 
 | } | 
 |  | 
 | static void fuse_uring_stop_fuse_req_end(struct fuse_req *req) | 
 | { | 
 | 	clear_bit(FR_SENT, &req->flags); | 
 | 	req->out.h.error = -ECONNABORTED; | 
 | 	fuse_request_end(req); | 
 | } | 
 |  | 
 | /* | 
 |  * Release a request/entry on connection tear down | 
 |  */ | 
 | static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent) | 
 | { | 
 | 	struct fuse_req *req; | 
 | 	struct io_uring_cmd *cmd; | 
 |  | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	cmd = ent->cmd; | 
 | 	ent->cmd = NULL; | 
 | 	req = ent->fuse_req; | 
 | 	ent->fuse_req = NULL; | 
 | 	if (req) { | 
 | 		/* remove entry from queue->fpq->processing */ | 
 | 		list_del_init(&req->list); | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * The entry must not be freed immediately, due to access of direct | 
 | 	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk | 
 | 	 * of race between daemon termination (which triggers IO_URING_F_CANCEL | 
 | 	 * and accesses entries without checking the list state first | 
 | 	 */ | 
 | 	list_move(&ent->list, &queue->ent_released); | 
 | 	ent->state = FRRS_RELEASED; | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (cmd) | 
 | 		io_uring_cmd_done(cmd, -ENOTCONN, 0, IO_URING_F_UNLOCKED); | 
 |  | 
 | 	if (req) | 
 | 		fuse_uring_stop_fuse_req_end(req); | 
 | } | 
 |  | 
 | static void fuse_uring_stop_list_entries(struct list_head *head, | 
 | 					 struct fuse_ring_queue *queue, | 
 | 					 enum fuse_ring_req_state exp_state) | 
 | { | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	struct fuse_ring_ent *ent, *next; | 
 | 	ssize_t queue_refs = SSIZE_MAX; | 
 | 	LIST_HEAD(to_teardown); | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	list_for_each_entry_safe(ent, next, head, list) { | 
 | 		if (ent->state != exp_state) { | 
 | 			pr_warn("entry teardown qid=%d state=%d expected=%d", | 
 | 				queue->qid, ent->state, exp_state); | 
 | 			continue; | 
 | 		} | 
 |  | 
 | 		ent->state = FRRS_TEARDOWN; | 
 | 		list_move(&ent->list, &to_teardown); | 
 | 	} | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	/* no queue lock to avoid lock order issues */ | 
 | 	list_for_each_entry_safe(ent, next, &to_teardown, list) { | 
 | 		fuse_uring_entry_teardown(ent); | 
 | 		queue_refs = atomic_dec_return(&ring->queue_refs); | 
 | 		WARN_ON_ONCE(queue_refs < 0); | 
 | 	} | 
 | } | 
 |  | 
 | static void fuse_uring_teardown_entries(struct fuse_ring_queue *queue) | 
 | { | 
 | 	fuse_uring_stop_list_entries(&queue->ent_in_userspace, queue, | 
 | 				     FRRS_USERSPACE); | 
 | 	fuse_uring_stop_list_entries(&queue->ent_avail_queue, queue, | 
 | 				     FRRS_AVAILABLE); | 
 | } | 
 |  | 
 | /* | 
 |  * Log state debug info | 
 |  */ | 
 | static void fuse_uring_log_ent_state(struct fuse_ring *ring) | 
 | { | 
 | 	int qid; | 
 | 	struct fuse_ring_ent *ent; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		struct fuse_ring_queue *queue = ring->queues[qid]; | 
 |  | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		spin_lock(&queue->lock); | 
 | 		/* | 
 | 		 * Log entries from the intermediate queue, the other queues | 
 | 		 * should be empty | 
 | 		 */ | 
 | 		list_for_each_entry(ent, &queue->ent_w_req_queue, list) { | 
 | 			pr_info(" ent-req-queue ring=%p qid=%d ent=%p state=%d\n", | 
 | 				ring, qid, ent, ent->state); | 
 | 		} | 
 | 		list_for_each_entry(ent, &queue->ent_commit_queue, list) { | 
 | 			pr_info(" ent-commit-queue ring=%p qid=%d ent=%p state=%d\n", | 
 | 				ring, qid, ent, ent->state); | 
 | 		} | 
 | 		spin_unlock(&queue->lock); | 
 | 	} | 
 | 	ring->stop_debug_log = 1; | 
 | } | 
 |  | 
 | static void fuse_uring_async_stop_queues(struct work_struct *work) | 
 | { | 
 | 	int qid; | 
 | 	struct fuse_ring *ring = | 
 | 		container_of(work, struct fuse_ring, async_teardown_work.work); | 
 |  | 
 | 	/* XXX code dup */ | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); | 
 |  | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		fuse_uring_teardown_entries(queue); | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * Some ring entries might be in the middle of IO operations, | 
 | 	 * i.e. in process to get handled by file_operations::uring_cmd | 
 | 	 * or on the way to userspace - we could handle that with conditions in | 
 | 	 * run time code, but easier/cleaner to have an async tear down handler | 
 | 	 * If there are still queue references left | 
 | 	 */ | 
 | 	if (atomic_read(&ring->queue_refs) > 0) { | 
 | 		if (time_after(jiffies, | 
 | 			       ring->teardown_time + FUSE_URING_TEARDOWN_TIMEOUT)) | 
 | 			fuse_uring_log_ent_state(ring); | 
 |  | 
 | 		schedule_delayed_work(&ring->async_teardown_work, | 
 | 				      FUSE_URING_TEARDOWN_INTERVAL); | 
 | 	} else { | 
 | 		wake_up_all(&ring->stop_waitq); | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * Stop the ring queues | 
 |  */ | 
 | void fuse_uring_stop_queues(struct fuse_ring *ring) | 
 | { | 
 | 	int qid; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues; qid++) { | 
 | 		struct fuse_ring_queue *queue = READ_ONCE(ring->queues[qid]); | 
 |  | 
 | 		if (!queue) | 
 | 			continue; | 
 |  | 
 | 		fuse_uring_teardown_entries(queue); | 
 | 	} | 
 |  | 
 | 	if (atomic_read(&ring->queue_refs) > 0) { | 
 | 		ring->teardown_time = jiffies; | 
 | 		INIT_DELAYED_WORK(&ring->async_teardown_work, | 
 | 				  fuse_uring_async_stop_queues); | 
 | 		schedule_delayed_work(&ring->async_teardown_work, | 
 | 				      FUSE_URING_TEARDOWN_INTERVAL); | 
 | 	} else { | 
 | 		wake_up_all(&ring->stop_waitq); | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * Handle IO_URING_F_CANCEL, typically should come on daemon termination. | 
 |  * | 
 |  * Releasing the last entry should trigger fuse_dev_release() if | 
 |  * the daemon was terminated | 
 |  */ | 
 | static void fuse_uring_cancel(struct io_uring_cmd *cmd, | 
 | 			      unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); | 
 | 	struct fuse_ring_queue *queue; | 
 | 	bool need_cmd_done = false; | 
 |  | 
 | 	/* | 
 | 	 * direct access on ent - it must not be destructed as long as | 
 | 	 * IO_URING_F_CANCEL might come up | 
 | 	 */ | 
 | 	queue = ent->queue; | 
 | 	spin_lock(&queue->lock); | 
 | 	if (ent->state == FRRS_AVAILABLE) { | 
 | 		ent->state = FRRS_USERSPACE; | 
 | 		list_move_tail(&ent->list, &queue->ent_in_userspace); | 
 | 		need_cmd_done = true; | 
 | 		ent->cmd = NULL; | 
 | 	} | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (need_cmd_done) { | 
 | 		/* no queue lock to avoid lock order issues */ | 
 | 		io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); | 
 | 	} | 
 | } | 
 |  | 
 | static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, | 
 | 				      struct fuse_ring_ent *ring_ent) | 
 | { | 
 | 	uring_cmd_set_ring_ent(cmd, ring_ent); | 
 | 	io_uring_cmd_mark_cancelable(cmd, issue_flags); | 
 | } | 
 |  | 
 | /* | 
 |  * Checks for errors and stores it into the request | 
 |  */ | 
 | static int fuse_uring_out_header_has_err(struct fuse_out_header *oh, | 
 | 					 struct fuse_req *req, | 
 | 					 struct fuse_conn *fc) | 
 | { | 
 | 	int err; | 
 |  | 
 | 	err = -EINVAL; | 
 | 	if (oh->unique == 0) { | 
 | 		/* Not supported through io-uring yet */ | 
 | 		pr_warn_once("notify through fuse-io-uring not supported\n"); | 
 | 		goto err; | 
 | 	} | 
 |  | 
 | 	if (oh->error <= -ERESTARTSYS || oh->error > 0) | 
 | 		goto err; | 
 |  | 
 | 	if (oh->error) { | 
 | 		err = oh->error; | 
 | 		goto err; | 
 | 	} | 
 |  | 
 | 	err = -ENOENT; | 
 | 	if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) { | 
 | 		pr_warn_ratelimited("unique mismatch, expected: %llu got %llu\n", | 
 | 				    req->in.h.unique, | 
 | 				    oh->unique & ~FUSE_INT_REQ_BIT); | 
 | 		goto err; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * Is it an interrupt reply ID? | 
 | 	 * XXX: Not supported through fuse-io-uring yet, it should not even | 
 | 	 *      find the request - should not happen. | 
 | 	 */ | 
 | 	WARN_ON_ONCE(oh->unique & FUSE_INT_REQ_BIT); | 
 |  | 
 | 	err = 0; | 
 | err: | 
 | 	return err; | 
 | } | 
 |  | 
 | static int fuse_uring_copy_from_ring(struct fuse_ring *ring, | 
 | 				     struct fuse_req *req, | 
 | 				     struct fuse_ring_ent *ent) | 
 | { | 
 | 	struct fuse_copy_state cs; | 
 | 	struct fuse_args *args = req->args; | 
 | 	struct iov_iter iter; | 
 | 	int err; | 
 | 	struct fuse_uring_ent_in_out ring_in_out; | 
 |  | 
 | 	err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out, | 
 | 			     sizeof(ring_in_out)); | 
 | 	if (err) | 
 | 		return -EFAULT; | 
 |  | 
 | 	err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz, | 
 | 			  &iter); | 
 | 	if (err) | 
 | 		return err; | 
 |  | 
 | 	fuse_copy_init(&cs, false, &iter); | 
 | 	cs.is_uring = true; | 
 | 	cs.req = req; | 
 |  | 
 | 	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); | 
 | } | 
 |  | 
 |  /* | 
 |   * Copy data from the req to the ring buffer | 
 |   */ | 
 | static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, | 
 | 				   struct fuse_ring_ent *ent) | 
 | { | 
 | 	struct fuse_copy_state cs; | 
 | 	struct fuse_args *args = req->args; | 
 | 	struct fuse_in_arg *in_args = args->in_args; | 
 | 	int num_args = args->in_numargs; | 
 | 	int err; | 
 | 	struct iov_iter iter; | 
 | 	struct fuse_uring_ent_in_out ent_in_out = { | 
 | 		.flags = 0, | 
 | 		.commit_id = req->in.h.unique, | 
 | 	}; | 
 |  | 
 | 	err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter); | 
 | 	if (err) { | 
 | 		pr_info_ratelimited("fuse: Import of user buffer failed\n"); | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	fuse_copy_init(&cs, true, &iter); | 
 | 	cs.is_uring = true; | 
 | 	cs.req = req; | 
 |  | 
 | 	if (num_args > 0) { | 
 | 		/* | 
 | 		 * Expectation is that the first argument is the per op header. | 
 | 		 * Some op code have that as zero size. | 
 | 		 */ | 
 | 		if (args->in_args[0].size > 0) { | 
 | 			err = copy_to_user(&ent->headers->op_in, in_args->value, | 
 | 					   in_args->size); | 
 | 			if (err) { | 
 | 				pr_info_ratelimited( | 
 | 					"Copying the header failed.\n"); | 
 | 				return -EFAULT; | 
 | 			} | 
 | 		} | 
 | 		in_args++; | 
 | 		num_args--; | 
 | 	} | 
 |  | 
 | 	/* copy the payload */ | 
 | 	err = fuse_copy_args(&cs, num_args, args->in_pages, | 
 | 			     (struct fuse_arg *)in_args, 0); | 
 | 	if (err) { | 
 | 		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__); | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	ent_in_out.payload_sz = cs.ring.copied_sz; | 
 | 	err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out, | 
 | 			   sizeof(ent_in_out)); | 
 | 	return err ? -EFAULT : 0; | 
 | } | 
 |  | 
 | static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent, | 
 | 				   struct fuse_req *req) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	int err; | 
 |  | 
 | 	err = -EIO; | 
 | 	if (WARN_ON(ent->state != FRRS_FUSE_REQ)) { | 
 | 		pr_err("qid=%d ring-req=%p invalid state %d on send\n", | 
 | 		       queue->qid, ent, ent->state); | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	err = -EINVAL; | 
 | 	if (WARN_ON(req->in.h.unique == 0)) | 
 | 		return err; | 
 |  | 
 | 	/* copy the request */ | 
 | 	err = fuse_uring_args_to_ring(ring, req, ent); | 
 | 	if (unlikely(err)) { | 
 | 		pr_info_ratelimited("Copy to ring failed: %d\n", err); | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	/* copy fuse_in_header */ | 
 | 	err = copy_to_user(&ent->headers->in_out, &req->in.h, | 
 | 			   sizeof(req->in.h)); | 
 | 	if (err) { | 
 | 		err = -EFAULT; | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int fuse_uring_prepare_send(struct fuse_ring_ent *ent, | 
 | 				   struct fuse_req *req) | 
 | { | 
 | 	int err; | 
 |  | 
 | 	err = fuse_uring_copy_to_ring(ent, req); | 
 | 	if (!err) | 
 | 		set_bit(FR_SENT, &req->flags); | 
 | 	else | 
 | 		fuse_uring_req_end(ent, req, err); | 
 |  | 
 | 	return err; | 
 | } | 
 |  | 
 | /* | 
 |  * Write data to the ring buffer and send the request to userspace, | 
 |  * userspace will read it | 
 |  * This is comparable with classical read(/dev/fuse) | 
 |  */ | 
 | static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, | 
 | 					struct fuse_req *req, | 
 | 					unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	int err; | 
 | 	struct io_uring_cmd *cmd; | 
 |  | 
 | 	err = fuse_uring_prepare_send(ent, req); | 
 | 	if (err) | 
 | 		return err; | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	cmd = ent->cmd; | 
 | 	ent->cmd = NULL; | 
 | 	ent->state = FRRS_USERSPACE; | 
 | 	list_move_tail(&ent->list, &queue->ent_in_userspace); | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	io_uring_cmd_done(cmd, 0, 0, issue_flags); | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * Make a ring entry available for fuse_req assignment | 
 |  */ | 
 | static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, | 
 | 				 struct fuse_ring_queue *queue) | 
 | { | 
 | 	WARN_ON_ONCE(!ent->cmd); | 
 | 	list_move(&ent->list, &queue->ent_avail_queue); | 
 | 	ent->state = FRRS_AVAILABLE; | 
 | } | 
 |  | 
 | /* Used to find the request on SQE commit */ | 
 | static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent, | 
 | 				 struct fuse_req *req) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	struct fuse_pqueue *fpq = &queue->fpq; | 
 | 	unsigned int hash; | 
 |  | 
 | 	req->ring_entry = ent; | 
 | 	hash = fuse_req_hash(req->in.h.unique); | 
 | 	list_move_tail(&req->list, &fpq->processing[hash]); | 
 | } | 
 |  | 
 | /* | 
 |  * Assign a fuse queue entry to the given entry | 
 |  */ | 
 | static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, | 
 | 					   struct fuse_req *req) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 |  | 
 | 	lockdep_assert_held(&queue->lock); | 
 |  | 
 | 	if (WARN_ON_ONCE(ent->state != FRRS_AVAILABLE && | 
 | 			 ent->state != FRRS_COMMIT)) { | 
 | 		pr_warn("%s qid=%d state=%d\n", __func__, ent->queue->qid, | 
 | 			ent->state); | 
 | 	} | 
 |  | 
 | 	clear_bit(FR_PENDING, &req->flags); | 
 | 	ent->fuse_req = req; | 
 | 	ent->state = FRRS_FUSE_REQ; | 
 | 	list_move_tail(&ent->list, &queue->ent_w_req_queue); | 
 | 	fuse_uring_add_to_pq(ent, req); | 
 | } | 
 |  | 
 | /* Fetch the next fuse request if available */ | 
 | static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent) | 
 | 	__must_hold(&queue->lock) | 
 | { | 
 | 	struct fuse_req *req; | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	struct list_head *req_queue = &queue->fuse_req_queue; | 
 |  | 
 | 	lockdep_assert_held(&queue->lock); | 
 |  | 
 | 	/* get and assign the next entry while it is still holding the lock */ | 
 | 	req = list_first_entry_or_null(req_queue, struct fuse_req, list); | 
 | 	if (req) | 
 | 		fuse_uring_add_req_to_ring_ent(ent, req); | 
 |  | 
 | 	return req; | 
 | } | 
 |  | 
 | /* | 
 |  * Read data from the ring buffer, which user space has written to | 
 |  * This is comparible with handling of classical write(/dev/fuse). | 
 |  * Also make the ring request available again for new fuse requests. | 
 |  */ | 
 | static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, | 
 | 			      unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring *ring = ent->queue->ring; | 
 | 	struct fuse_conn *fc = ring->fc; | 
 | 	ssize_t err = 0; | 
 |  | 
 | 	err = copy_from_user(&req->out.h, &ent->headers->in_out, | 
 | 			     sizeof(req->out.h)); | 
 | 	if (err) { | 
 | 		req->out.h.error = -EFAULT; | 
 | 		goto out; | 
 | 	} | 
 |  | 
 | 	err = fuse_uring_out_header_has_err(&req->out.h, req, fc); | 
 | 	if (err) { | 
 | 		/* req->out.h.error already set */ | 
 | 		goto out; | 
 | 	} | 
 |  | 
 | 	err = fuse_uring_copy_from_ring(ring, req, ent); | 
 | out: | 
 | 	fuse_uring_req_end(ent, req, err); | 
 | } | 
 |  | 
 | /* | 
 |  * Get the next fuse req and send it | 
 |  */ | 
 | static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent, | 
 | 				     struct fuse_ring_queue *queue, | 
 | 				     unsigned int issue_flags) | 
 | { | 
 | 	int err; | 
 | 	struct fuse_req *req; | 
 |  | 
 | retry: | 
 | 	spin_lock(&queue->lock); | 
 | 	fuse_uring_ent_avail(ent, queue); | 
 | 	req = fuse_uring_ent_assign_req(ent); | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (req) { | 
 | 		err = fuse_uring_send_next_to_ring(ent, req, issue_flags); | 
 | 		if (err) | 
 | 			goto retry; | 
 | 	} | 
 | } | 
 |  | 
 | static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 |  | 
 | 	lockdep_assert_held(&queue->lock); | 
 |  | 
 | 	if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE)) | 
 | 		return -EIO; | 
 |  | 
 | 	ent->state = FRRS_COMMIT; | 
 | 	list_move(&ent->list, &queue->ent_commit_queue); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */ | 
 | static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, | 
 | 				   struct fuse_conn *fc) | 
 | { | 
 | 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); | 
 | 	struct fuse_ring_ent *ent; | 
 | 	int err; | 
 | 	struct fuse_ring *ring = fc->ring; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	uint64_t commit_id = READ_ONCE(cmd_req->commit_id); | 
 | 	unsigned int qid = READ_ONCE(cmd_req->qid); | 
 | 	struct fuse_pqueue *fpq; | 
 | 	struct fuse_req *req; | 
 |  | 
 | 	err = -ENOTCONN; | 
 | 	if (!ring) | 
 | 		return err; | 
 |  | 
 | 	if (qid >= ring->nr_queues) | 
 | 		return -EINVAL; | 
 |  | 
 | 	queue = ring->queues[qid]; | 
 | 	if (!queue) | 
 | 		return err; | 
 | 	fpq = &queue->fpq; | 
 |  | 
 | 	if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped)) | 
 | 		return err; | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	/* Find a request based on the unique ID of the fuse request | 
 | 	 * This should get revised, as it needs a hash calculation and list | 
 | 	 * search. And full struct fuse_pqueue is needed (memory overhead). | 
 | 	 * As well as the link from req to ring_ent. | 
 | 	 */ | 
 | 	req = fuse_request_find(fpq, commit_id); | 
 | 	err = -ENOENT; | 
 | 	if (!req) { | 
 | 		pr_info("qid=%d commit_id %llu not found\n", queue->qid, | 
 | 			commit_id); | 
 | 		spin_unlock(&queue->lock); | 
 | 		return err; | 
 | 	} | 
 | 	list_del_init(&req->list); | 
 | 	ent = req->ring_entry; | 
 | 	req->ring_entry = NULL; | 
 |  | 
 | 	err = fuse_ring_ent_set_commit(ent); | 
 | 	if (err != 0) { | 
 | 		pr_info_ratelimited("qid=%d commit_id %llu state %d", | 
 | 				    queue->qid, commit_id, ent->state); | 
 | 		spin_unlock(&queue->lock); | 
 | 		req->out.h.error = err; | 
 | 		clear_bit(FR_SENT, &req->flags); | 
 | 		fuse_request_end(req); | 
 | 		return err; | 
 | 	} | 
 |  | 
 | 	ent->cmd = cmd; | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	/* without the queue lock, as other locks are taken */ | 
 | 	fuse_uring_prepare_cancel(cmd, issue_flags, ent); | 
 | 	fuse_uring_commit(ent, req, issue_flags); | 
 |  | 
 | 	/* | 
 | 	 * Fetching the next request is absolutely required as queued | 
 | 	 * fuse requests would otherwise not get processed - committing | 
 | 	 * and fetching is done in one step vs legacy fuse, which has separated | 
 | 	 * read (fetch request) and write (commit result). | 
 | 	 */ | 
 | 	fuse_uring_next_fuse_req(ent, queue, issue_flags); | 
 | 	return 0; | 
 | } | 
 |  | 
 | static bool is_ring_ready(struct fuse_ring *ring, int current_qid) | 
 | { | 
 | 	int qid; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	bool ready = true; | 
 |  | 
 | 	for (qid = 0; qid < ring->nr_queues && ready; qid++) { | 
 | 		if (current_qid == qid) | 
 | 			continue; | 
 |  | 
 | 		queue = ring->queues[qid]; | 
 | 		if (!queue) { | 
 | 			ready = false; | 
 | 			break; | 
 | 		} | 
 |  | 
 | 		spin_lock(&queue->lock); | 
 | 		if (list_empty(&queue->ent_avail_queue)) | 
 | 			ready = false; | 
 | 		spin_unlock(&queue->lock); | 
 | 	} | 
 |  | 
 | 	return ready; | 
 | } | 
 |  | 
 | /* | 
 |  * fuse_uring_req_fetch command handling | 
 |  */ | 
 | static void fuse_uring_do_register(struct fuse_ring_ent *ent, | 
 | 				   struct io_uring_cmd *cmd, | 
 | 				   unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	struct fuse_conn *fc = ring->fc; | 
 | 	struct fuse_iqueue *fiq = &fc->iq; | 
 |  | 
 | 	fuse_uring_prepare_cancel(cmd, issue_flags, ent); | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	ent->cmd = cmd; | 
 | 	fuse_uring_ent_avail(ent, queue); | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (!ring->ready) { | 
 | 		bool ready = is_ring_ready(ring, queue->qid); | 
 |  | 
 | 		if (ready) { | 
 | 			WRITE_ONCE(fiq->ops, &fuse_io_uring_ops); | 
 | 			WRITE_ONCE(ring->ready, true); | 
 | 			wake_up_all(&fc->blocked_waitq); | 
 | 		} | 
 | 	} | 
 | } | 
 |  | 
 | /* | 
 |  * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1] | 
 |  * the payload | 
 |  */ | 
 | static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe, | 
 | 					 struct iovec iov[FUSE_URING_IOV_SEGS]) | 
 | { | 
 | 	struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr)); | 
 | 	struct iov_iter iter; | 
 | 	ssize_t ret; | 
 |  | 
 | 	if (sqe->len != FUSE_URING_IOV_SEGS) | 
 | 		return -EINVAL; | 
 |  | 
 | 	/* | 
 | 	 * Direction for buffer access will actually be READ and WRITE, | 
 | 	 * using write for the import should include READ access as well. | 
 | 	 */ | 
 | 	ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS, | 
 | 			   FUSE_URING_IOV_SEGS, &iov, &iter); | 
 | 	if (ret < 0) | 
 | 		return ret; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static struct fuse_ring_ent * | 
 | fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, | 
 | 			   struct fuse_ring_queue *queue) | 
 | { | 
 | 	struct fuse_ring *ring = queue->ring; | 
 | 	struct fuse_ring_ent *ent; | 
 | 	size_t payload_size; | 
 | 	struct iovec iov[FUSE_URING_IOV_SEGS]; | 
 | 	int err; | 
 |  | 
 | 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov); | 
 | 	if (err) { | 
 | 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n", | 
 | 				    err); | 
 | 		return ERR_PTR(err); | 
 | 	} | 
 |  | 
 | 	err = -EINVAL; | 
 | 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) { | 
 | 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len); | 
 | 		return ERR_PTR(err); | 
 | 	} | 
 |  | 
 | 	payload_size = iov[1].iov_len; | 
 | 	if (payload_size < ring->max_payload_sz) { | 
 | 		pr_info_ratelimited("Invalid req payload len %zu\n", | 
 | 				    payload_size); | 
 | 		return ERR_PTR(err); | 
 | 	} | 
 |  | 
 | 	err = -ENOMEM; | 
 | 	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT); | 
 | 	if (!ent) | 
 | 		return ERR_PTR(err); | 
 |  | 
 | 	INIT_LIST_HEAD(&ent->list); | 
 |  | 
 | 	ent->queue = queue; | 
 | 	ent->headers = iov[0].iov_base; | 
 | 	ent->payload = iov[1].iov_base; | 
 |  | 
 | 	atomic_inc(&ring->queue_refs); | 
 | 	return ent; | 
 | } | 
 |  | 
 | /* | 
 |  * Register header and payload buffer with the kernel and puts the | 
 |  * entry as "ready to get fuse requests" on the queue | 
 |  */ | 
 | static int fuse_uring_register(struct io_uring_cmd *cmd, | 
 | 			       unsigned int issue_flags, struct fuse_conn *fc) | 
 | { | 
 | 	const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe); | 
 | 	struct fuse_ring *ring = smp_load_acquire(&fc->ring); | 
 | 	struct fuse_ring_queue *queue; | 
 | 	struct fuse_ring_ent *ent; | 
 | 	int err; | 
 | 	unsigned int qid = READ_ONCE(cmd_req->qid); | 
 |  | 
 | 	err = -ENOMEM; | 
 | 	if (!ring) { | 
 | 		ring = fuse_uring_create(fc); | 
 | 		if (!ring) | 
 | 			return err; | 
 | 	} | 
 |  | 
 | 	if (qid >= ring->nr_queues) { | 
 | 		pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid); | 
 | 		return -EINVAL; | 
 | 	} | 
 |  | 
 | 	queue = ring->queues[qid]; | 
 | 	if (!queue) { | 
 | 		queue = fuse_uring_create_queue(ring, qid); | 
 | 		if (!queue) | 
 | 			return err; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * The created queue above does not need to be destructed in | 
 | 	 * case of entry errors below, will be done at ring destruction time. | 
 | 	 */ | 
 |  | 
 | 	ent = fuse_uring_create_ring_ent(cmd, queue); | 
 | 	if (IS_ERR(ent)) | 
 | 		return PTR_ERR(ent); | 
 |  | 
 | 	fuse_uring_do_register(ent, cmd, issue_flags); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * Entry function from io_uring to handle the given passthrough command | 
 |  * (op code IORING_OP_URING_CMD) | 
 |  */ | 
 | int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_dev *fud; | 
 | 	struct fuse_conn *fc; | 
 | 	u32 cmd_op = cmd->cmd_op; | 
 | 	int err; | 
 |  | 
 | 	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { | 
 | 		fuse_uring_cancel(cmd, issue_flags); | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	/* This extra SQE size holds struct fuse_uring_cmd_req */ | 
 | 	if (!(issue_flags & IO_URING_F_SQE128)) | 
 | 		return -EINVAL; | 
 |  | 
 | 	fud = fuse_get_dev(cmd->file); | 
 | 	if (!fud) { | 
 | 		pr_info_ratelimited("No fuse device found\n"); | 
 | 		return -ENOTCONN; | 
 | 	} | 
 | 	fc = fud->fc; | 
 |  | 
 | 	/* Once a connection has io-uring enabled on it, it can't be disabled */ | 
 | 	if (!enable_uring && !fc->io_uring) { | 
 | 		pr_info_ratelimited("fuse-io-uring is disabled\n"); | 
 | 		return -EOPNOTSUPP; | 
 | 	} | 
 |  | 
 | 	if (fc->aborted) | 
 | 		return -ECONNABORTED; | 
 | 	if (!fc->connected) | 
 | 		return -ENOTCONN; | 
 |  | 
 | 	/* | 
 | 	 * fuse_uring_register() needs the ring to be initialized, | 
 | 	 * we need to know the max payload size | 
 | 	 */ | 
 | 	if (!fc->initialized) | 
 | 		return -EAGAIN; | 
 |  | 
 | 	switch (cmd_op) { | 
 | 	case FUSE_IO_URING_CMD_REGISTER: | 
 | 		err = fuse_uring_register(cmd, issue_flags, fc); | 
 | 		if (err) { | 
 | 			pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", | 
 | 				     err); | 
 | 			fc->io_uring = 0; | 
 | 			wake_up_all(&fc->blocked_waitq); | 
 | 			return err; | 
 | 		} | 
 | 		break; | 
 | 	case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: | 
 | 		err = fuse_uring_commit_fetch(cmd, issue_flags, fc); | 
 | 		if (err) { | 
 | 			pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n", | 
 | 				     err); | 
 | 			return err; | 
 | 		} | 
 | 		break; | 
 | 	default: | 
 | 		return -EINVAL; | 
 | 	} | 
 |  | 
 | 	return -EIOCBQUEUED; | 
 | } | 
 |  | 
 | static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, | 
 | 			    ssize_t ret, unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	ent->state = FRRS_USERSPACE; | 
 | 	list_move_tail(&ent->list, &queue->ent_in_userspace); | 
 | 	ent->cmd = NULL; | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	io_uring_cmd_done(cmd, ret, 0, issue_flags); | 
 | } | 
 |  | 
 | /* | 
 |  * This prepares and sends the ring request in fuse-uring task context. | 
 |  * User buffers are not mapped yet - the application does not have permission | 
 |  * to write to it - this has to be executed in ring task context. | 
 |  */ | 
 | static void fuse_uring_send_in_task(struct io_uring_cmd *cmd, | 
 | 				    unsigned int issue_flags) | 
 | { | 
 | 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd); | 
 | 	struct fuse_ring_queue *queue = ent->queue; | 
 | 	int err; | 
 |  | 
 | 	if (!(issue_flags & IO_URING_F_TASK_DEAD)) { | 
 | 		err = fuse_uring_prepare_send(ent, ent->fuse_req); | 
 | 		if (err) { | 
 | 			fuse_uring_next_fuse_req(ent, queue, issue_flags); | 
 | 			return; | 
 | 		} | 
 | 	} else { | 
 | 		err = -ECANCELED; | 
 | 	} | 
 |  | 
 | 	fuse_uring_send(ent, cmd, err, issue_flags); | 
 | } | 
 |  | 
 | static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) | 
 | { | 
 | 	unsigned int qid; | 
 | 	struct fuse_ring_queue *queue; | 
 |  | 
 | 	qid = task_cpu(current); | 
 |  | 
 | 	if (WARN_ONCE(qid >= ring->nr_queues, | 
 | 		      "Core number (%u) exceeds nr queues (%zu)\n", qid, | 
 | 		      ring->nr_queues)) | 
 | 		qid = 0; | 
 |  | 
 | 	queue = ring->queues[qid]; | 
 | 	WARN_ONCE(!queue, "Missing queue for qid %d\n", qid); | 
 |  | 
 | 	return queue; | 
 | } | 
 |  | 
 | static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent) | 
 | { | 
 | 	struct io_uring_cmd *cmd = ent->cmd; | 
 |  | 
 | 	uring_cmd_set_ring_ent(cmd, ent); | 
 | 	io_uring_cmd_complete_in_task(cmd, fuse_uring_send_in_task); | 
 | } | 
 |  | 
 | /* queue a fuse request and send it if a ring entry is available */ | 
 | void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) | 
 | { | 
 | 	struct fuse_conn *fc = req->fm->fc; | 
 | 	struct fuse_ring *ring = fc->ring; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	struct fuse_ring_ent *ent = NULL; | 
 | 	int err; | 
 |  | 
 | 	err = -EINVAL; | 
 | 	queue = fuse_uring_task_to_queue(ring); | 
 | 	if (!queue) | 
 | 		goto err; | 
 |  | 
 | 	if (req->in.h.opcode != FUSE_NOTIFY_REPLY) | 
 | 		req->in.h.unique = fuse_get_unique(fiq); | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	err = -ENOTCONN; | 
 | 	if (unlikely(queue->stopped)) | 
 | 		goto err_unlock; | 
 |  | 
 | 	set_bit(FR_URING, &req->flags); | 
 | 	req->ring_queue = queue; | 
 | 	ent = list_first_entry_or_null(&queue->ent_avail_queue, | 
 | 				       struct fuse_ring_ent, list); | 
 | 	if (ent) | 
 | 		fuse_uring_add_req_to_ring_ent(ent, req); | 
 | 	else | 
 | 		list_add_tail(&req->list, &queue->fuse_req_queue); | 
 | 	spin_unlock(&queue->lock); | 
 |  | 
 | 	if (ent) | 
 | 		fuse_uring_dispatch_ent(ent); | 
 |  | 
 | 	return; | 
 |  | 
 | err_unlock: | 
 | 	spin_unlock(&queue->lock); | 
 | err: | 
 | 	req->out.h.error = err; | 
 | 	clear_bit(FR_PENDING, &req->flags); | 
 | 	fuse_request_end(req); | 
 | } | 
 |  | 
 | bool fuse_uring_queue_bq_req(struct fuse_req *req) | 
 | { | 
 | 	struct fuse_conn *fc = req->fm->fc; | 
 | 	struct fuse_ring *ring = fc->ring; | 
 | 	struct fuse_ring_queue *queue; | 
 | 	struct fuse_ring_ent *ent = NULL; | 
 |  | 
 | 	queue = fuse_uring_task_to_queue(ring); | 
 | 	if (!queue) | 
 | 		return false; | 
 |  | 
 | 	spin_lock(&queue->lock); | 
 | 	if (unlikely(queue->stopped)) { | 
 | 		spin_unlock(&queue->lock); | 
 | 		return false; | 
 | 	} | 
 |  | 
 | 	set_bit(FR_URING, &req->flags); | 
 | 	req->ring_queue = queue; | 
 | 	list_add_tail(&req->list, &queue->fuse_req_bg_queue); | 
 |  | 
 | 	ent = list_first_entry_or_null(&queue->ent_avail_queue, | 
 | 				       struct fuse_ring_ent, list); | 
 | 	spin_lock(&fc->bg_lock); | 
 | 	fc->num_background++; | 
 | 	if (fc->num_background == fc->max_background) | 
 | 		fc->blocked = 1; | 
 | 	fuse_uring_flush_bg(queue); | 
 | 	spin_unlock(&fc->bg_lock); | 
 |  | 
 | 	/* | 
 | 	 * Due to bg_queue flush limits there might be other bg requests | 
 | 	 * in the queue that need to be handled first. Or no further req | 
 | 	 * might be available. | 
 | 	 */ | 
 | 	req = list_first_entry_or_null(&queue->fuse_req_queue, struct fuse_req, | 
 | 				       list); | 
 | 	if (ent && req) { | 
 | 		fuse_uring_add_req_to_ring_ent(ent, req); | 
 | 		spin_unlock(&queue->lock); | 
 |  | 
 | 		fuse_uring_dispatch_ent(ent); | 
 | 	} else { | 
 | 		spin_unlock(&queue->lock); | 
 | 	} | 
 |  | 
 | 	return true; | 
 | } | 
 |  | 
 | bool fuse_uring_remove_pending_req(struct fuse_req *req) | 
 | { | 
 | 	struct fuse_ring_queue *queue = req->ring_queue; | 
 |  | 
 | 	return fuse_remove_pending_req(req, &queue->lock); | 
 | } | 
 |  | 
 | static const struct fuse_iqueue_ops fuse_io_uring_ops = { | 
 | 	/* should be send over io-uring as enhancement */ | 
 | 	.send_forget = fuse_dev_queue_forget, | 
 |  | 
 | 	/* | 
 | 	 * could be send over io-uring, but interrupts should be rare, | 
 | 	 * no need to make the code complex | 
 | 	 */ | 
 | 	.send_interrupt = fuse_dev_queue_interrupt, | 
 | 	.send_req = fuse_uring_queue_fuse_req, | 
 | }; |