io_uring/cancel.c - pub/scm/linux/kernel/git/stable/linux-stable.git - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/nospec.h>
 #include <linux/io_uring.h>

 #include <uapi/linux/io_uring.h>

 #include "filetable.h"
 #include "io_uring.h"
 #include "tctx.h"
 #include "sqpoll.h"
 #include "uring_cmd.h"
 #include "poll.h"
 #include "timeout.h"
 #include "waitid.h"
 #include "futex.h"
 #include "cancel.h"

 struct io_cancel {
 	struct file			*file;
 	u64				addr;
 	u32				flags;
 	s32				fd;
 	u8				opcode;
 };

 #define CANCEL_FLAGS	(IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
 			 IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
 			 IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)

 /*
  * Returns true if the request matches the criteria outlined by 'cd'.
  */
 bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
 {
 	bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;

 	if (req->ctx != cd->ctx)
 		return false;

 	if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
 		match_user_data = true;

 	if (cd->flags & IORING_ASYNC_CANCEL_ANY)
 		goto check_seq;
 	if (cd->flags & IORING_ASYNC_CANCEL_FD) {
 		if (req->file != cd->file)
 			return false;
 	}
 	if (cd->flags & IORING_ASYNC_CANCEL_OP) {
 		if (req->opcode != cd->opcode)
 			return false;
 	}
 	if (match_user_data && req->cqe.user_data != cd->data)
 		return false;
 	if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
 check_seq:
 		if (io_cancel_match_sequence(req, cd->seq))
 			return false;
 	}

 	return true;
 }

 static bool io_cancel_cb(struct io_wq_work *work, void *data)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct io_cancel_data *cd = data;

 	return io_cancel_req_match(req, cd);
 }

 static int io_async_cancel_one(struct io_uring_task *tctx,
 			       struct io_cancel_data *cd)
 {
 	enum io_wq_cancel cancel_ret;
 	int ret = 0;
 	bool all;

 	if (!tctx || !tctx->io_wq)
 		return -ENOENT;

 	all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
 	cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
 	switch (cancel_ret) {
 	case IO_WQ_CANCEL_OK:
 		ret = 0;
 		break;
 	case IO_WQ_CANCEL_RUNNING:
 		ret = -EALREADY;
 		break;
 	case IO_WQ_CANCEL_NOTFOUND:
 		ret = -ENOENT;
 		break;
 	}

 	return ret;
 }

 int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
 		  unsigned issue_flags)
 {
 	struct io_ring_ctx *ctx = cd->ctx;
 	int ret;

 	WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);

 	ret = io_async_cancel_one(tctx, cd);
 	/*
 	 * Fall-through even for -EALREADY, as we may have poll armed
 	 * that need unarming.
 	 */
 	if (!ret)
 		return 0;

 	ret = io_poll_cancel(ctx, cd, issue_flags);
 	if (ret != -ENOENT)
 		return ret;

 	ret = io_waitid_cancel(ctx, cd, issue_flags);
 	if (ret != -ENOENT)
 		return ret;

 	ret = io_futex_cancel(ctx, cd, issue_flags);
 	if (ret != -ENOENT)
 		return ret;

 	spin_lock(&ctx->completion_lock);
 	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
 		ret = io_timeout_cancel(ctx, cd);
 	spin_unlock(&ctx->completion_lock);
 	return ret;
 }

 int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);

 	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
 		return -EINVAL;
 	if (sqe->off || sqe->splice_fd_in)
 		return -EINVAL;

 	cancel->addr = READ_ONCE(sqe->addr);
 	cancel->flags = READ_ONCE(sqe->cancel_flags);
 	if (cancel->flags & ~CANCEL_FLAGS)
 		return -EINVAL;
 	if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
 			return -EINVAL;
 		cancel->fd = READ_ONCE(sqe->fd);
 	}
 	if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
 		if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
 			return -EINVAL;
 		cancel->opcode = READ_ONCE(sqe->len);
 	}

 	return 0;
 }

 static int __io_async_cancel(struct io_cancel_data *cd,
 			     struct io_uring_task *tctx,
 			     unsigned int issue_flags)
 {
 	bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
 	struct io_ring_ctx *ctx = cd->ctx;
 	struct io_tctx_node *node;
 	int ret, nr = 0;

 	do {
 		ret = io_try_cancel(tctx, cd, issue_flags);
 		if (ret == -ENOENT)
 			break;
 		if (!all)
 			return ret;
 		nr++;
 	} while (1);

 	/* slow path, try all io-wq's */
 	io_ring_submit_lock(ctx, issue_flags);
 	ret = -ENOENT;
 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
 		ret = io_async_cancel_one(node->task->io_uring, cd);
 		if (ret != -ENOENT) {
 			if (!all)
 				break;
 			nr++;
 		}
 	}
 	io_ring_submit_unlock(ctx, issue_flags);
 	return all ? nr : ret;
 }

 int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
 	struct io_cancel_data cd = {
 		.ctx	= req->ctx,
 		.data	= cancel->addr,
 		.flags	= cancel->flags,
 		.opcode	= cancel->opcode,
 		.seq	= atomic_inc_return(&req->ctx->cancel_seq),
 	};
 	struct io_uring_task *tctx = req->tctx;
 	int ret;

 	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
 		if (req->flags & REQ_F_FIXED_FILE ||
 		    cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
 			req->flags |= REQ_F_FIXED_FILE;
 			req->file = io_file_get_fixed(req, cancel->fd,
 							issue_flags);
 		} else {
 			req->file = io_file_get_normal(req, cancel->fd);
 		}
 		if (!req->file) {
 			ret = -EBADF;
 			goto done;
 		}
 		cd.file = req->file;
 	}

 	ret = __io_async_cancel(&cd, tctx, issue_flags);
 done:
 	if (ret < 0)
 		req_set_fail(req);
 	io_req_set_res(req, ret, 0);
 	return IOU_COMPLETE;
 }

 static int __io_sync_cancel(struct io_uring_task *tctx,
 			    struct io_cancel_data *cd, int fd)
 {
 	struct io_ring_ctx *ctx = cd->ctx;

 	/* fixed must be grabbed every time since we drop the uring_lock */
 	if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
 	    (cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
 		struct io_rsrc_node *node;

 		node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
 		if (unlikely(!node))
 			return -EBADF;
 		cd->file = io_slot_file(node);
 		if (!cd->file)
 			return -EBADF;
 	}

 	return __io_async_cancel(cd, tctx, 0);
 }

 int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
 	__must_hold(&ctx->uring_lock)
 {
 	struct io_cancel_data cd = {
 		.ctx	= ctx,
 		.seq	= atomic_inc_return(&ctx->cancel_seq),
 	};
 	ktime_t timeout = KTIME_MAX;
 	struct io_uring_sync_cancel_reg sc;
 	struct file *file = NULL;
 	DEFINE_WAIT(wait);
 	int ret, i;

 	if (copy_from_user(&sc, arg, sizeof(sc)))
 		return -EFAULT;
 	if (sc.flags & ~CANCEL_FLAGS)
 		return -EINVAL;
 	for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
 		if (sc.pad[i])
 			return -EINVAL;
 	for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
 		if (sc.pad2[i])
 			return -EINVAL;

 	cd.data = sc.addr;
 	cd.flags = sc.flags;
 	cd.opcode = sc.opcode;

 	/* we can grab a normal file descriptor upfront */
 	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
 	   !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
 		file = fget(sc.fd);
 		if (!file)
 			return -EBADF;
 		cd.file = file;
 	}

 	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);

 	/* found something, done! */
 	if (ret != -EALREADY)
 		goto out;

 	if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
 		struct timespec64 ts = {
 			.tv_sec		= sc.timeout.tv_sec,
 			.tv_nsec	= sc.timeout.tv_nsec
 		};

 		timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
 	}

 	/*
 	 * Keep looking until we get -ENOENT. we'll get woken everytime
 	 * every time a request completes and will retry the cancelation.
 	 */
 	do {
 		cd.seq = atomic_inc_return(&ctx->cancel_seq);

 		prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);

 		ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);

 		mutex_unlock(&ctx->uring_lock);
 		if (ret != -EALREADY)
 			break;

 		ret = io_run_task_work_sig(ctx);
 		if (ret < 0)
 			break;
 		ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
 		if (!ret) {
 			ret = -ETIME;
 			break;
 		}
 		mutex_lock(&ctx->uring_lock);
 	} while (1);

 	finish_wait(&ctx->cq_wait, &wait);
 	mutex_lock(&ctx->uring_lock);

 	if (ret == -ENOENT || ret > 0)
 		ret = 0;
 out:
 	if (file)
 		fput(file);
 	return ret;
 }

 bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			  struct hlist_head *list, bool cancel_all,
 			  bool (*cancel)(struct io_kiocb *))
 {
 	struct hlist_node *tmp;
 	struct io_kiocb *req;
 	bool found = false;

 	lockdep_assert_held(&ctx->uring_lock);

 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
 		if (!io_match_task_safe(req, tctx, cancel_all))
 			continue;
 		hlist_del_init(&req->hash_node);
 		if (cancel(req))
 			found = true;
 	}

 	return found;
 }

 int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 		     unsigned int issue_flags, struct hlist_head *list,
 		     bool (*cancel)(struct io_kiocb *))
 {
 	struct hlist_node *tmp;
 	struct io_kiocb *req;
 	int nr = 0;

 	io_ring_submit_lock(ctx, issue_flags);
 	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
 		if (!io_cancel_req_match(req, cd))
 			continue;
 		if (cancel(req))
 			nr++;
 		if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
 			break;
 	}
 	io_ring_submit_unlock(ctx, issue_flags);
 	return nr ?: -ENOENT;
 }

 static bool io_match_linked(struct io_kiocb *head)
 {
 	struct io_kiocb *req;

 	io_for_each_link(req, head) {
 		if (req->flags & REQ_F_INFLIGHT)
 			return true;
 	}
 	return false;
 }

 /*
  * As io_match_task() but protected against racing with linked timeouts.
  * User must not hold timeout_lock.
  */
 bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
 			bool cancel_all)
 {
 	bool matched;

 	if (tctx && head->tctx != tctx)
 		return false;
 	if (cancel_all)
 		return true;

 	if (head->flags & REQ_F_LINK_TIMEOUT) {
 		struct io_ring_ctx *ctx = head->ctx;

 		/* protect against races with linked timeouts */
 		raw_spin_lock_irq(&ctx->timeout_lock);
 		matched = io_match_linked(head);
 		raw_spin_unlock_irq(&ctx->timeout_lock);
 	} else {
 		matched = io_match_linked(head);
 	}
 	return matched;
 }

 void __io_uring_cancel(bool cancel_all)
 {
 	io_uring_unreg_ringfd();
 	io_uring_cancel_generic(cancel_all, NULL);
 }

 struct io_task_cancel {
 	struct io_uring_task *tctx;
 	bool all;
 };

 static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct io_task_cancel *cancel = data;

 	return io_match_task_safe(req, cancel->tctx, cancel->all);
 }

 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
 					 struct io_uring_task *tctx,
 					 bool cancel_all)
 {
 	struct io_defer_entry *de;
 	LIST_HEAD(list);

 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
 		if (io_match_task_safe(de->req, tctx, cancel_all)) {
 			list_cut_position(&list, &ctx->defer_list, &de->list);
 			break;
 		}
 	}
 	if (list_empty(&list))
 		return false;

 	while (!list_empty(&list)) {
 		de = list_first_entry(&list, struct io_defer_entry, list);
 		list_del_init(&de->list);
 		ctx->nr_drained -= io_linked_nr(de->req);
 		io_req_task_queue_fail(de->req, -ECANCELED);
 		kfree(de);
 	}
 	return true;
 }

 __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);

 	return req->ctx == data;
 }

 static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
 {
 	struct io_tctx_node *node;
 	enum io_wq_cancel cret;
 	bool ret = false;

 	mutex_lock(&ctx->uring_lock);
 	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
 		struct io_uring_task *tctx = node->task->io_uring;

 		/*
 		 * io_wq will stay alive while we hold uring_lock, because it's
 		 * killed after ctx nodes, which requires to take the lock.
 		 */
 		if (!tctx || !tctx->io_wq)
 			continue;
 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
 	}
 	mutex_unlock(&ctx->uring_lock);

 	return ret;
 }

 __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct io_uring_task *tctx,
 					 bool cancel_all, bool is_sqpoll_thread)
 {
 	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
 	enum io_wq_cancel cret;
 	bool ret = false;

 	/* set it so io_req_local_work_add() would wake us up */
 	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
 		atomic_set(&ctx->cq_wait_nr, 1);
 		smp_mb();
 	}

 	/* failed during ring init, it couldn't have issued any requests */
 	if (!ctx->rings)
 		return false;

 	if (!tctx) {
 		ret |= io_uring_try_cancel_iowq(ctx);
 	} else if (tctx->io_wq) {
 		/*
 		 * Cancels requests of all rings, not only @ctx, but
 		 * it's fine as the task is in exit/exec.
 		 */
 		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
 				       &cancel, true);
 		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
 	}

 	/* SQPOLL thread does its own polling */
 	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
 	    is_sqpoll_thread) {
 		while (!wq_list_empty(&ctx->iopoll_list)) {
 			io_iopoll_try_reap_events(ctx);
 			ret = true;
 			cond_resched();
 		}
 	}

 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
 	    io_allowed_defer_tw_run(ctx))
 		ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
 	mutex_lock(&ctx->uring_lock);
 	ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
 	ret |= io_poll_remove_all(ctx, tctx, cancel_all);
 	ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
 	ret |= io_futex_remove_all(ctx, tctx, cancel_all);
 	ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
 	ret |= io_kill_timeouts(ctx, tctx, cancel_all);
 	if (tctx)
 		ret |= io_run_task_work() > 0;
 	else
 		ret |= flush_delayed_work(&ctx->fallback_work);
 	return ret;
 }

 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 {
 	if (tracked)
 		return atomic_read(&tctx->inflight_tracked);
 	return percpu_counter_sum(&tctx->inflight);
 }

 /*
  * Find any io_uring ctx that this task has registered or done IO on, and cancel
  * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
  */
 __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_ring_ctx *ctx;
 	struct io_tctx_node *node;
 	unsigned long index;
 	s64 inflight;
 	DEFINE_WAIT(wait);

 	WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);

 	if (!current->io_uring)
 		return;
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);

 	atomic_inc(&tctx->in_cancel);
 	do {
 		bool loop = false;

 		io_uring_drop_tctx_refs(current);
 		if (!tctx_inflight(tctx, !cancel_all))
 			break;

 		/* read completions before cancelations */
 		inflight = tctx_inflight(tctx, false);
 		if (!inflight)
 			break;

 		if (!sqd) {
 			xa_for_each(&tctx->xa, index, node) {
 				/* sqpoll task will cancel all its requests */
 				if (node->ctx->sq_data)
 					continue;
 				loop |= io_uring_try_cancel_requests(node->ctx,
 							current->io_uring,
 							cancel_all,
 							false);
 			}
 		} else {
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 				loop |= io_uring_try_cancel_requests(ctx,
 								     current->io_uring,
 								     cancel_all,
 								     true);
 		}

 		if (loop) {
 			cond_resched();
 			continue;
 		}

 		prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
 		io_run_task_work();
 		io_uring_drop_tctx_refs(current);
 		xa_for_each(&tctx->xa, index, node) {
 			if (io_local_work_pending(node->ctx)) {
 				WARN_ON_ONCE(node->ctx->submitter_task &&
 					     node->ctx->submitter_task != current);
 				goto end_wait;
 			}
 		}
 		/*
 		 * If we've seen completions, retry without waiting. This
 		 * avoids a race where a completion comes in before we did
 		 * prepare_to_wait().
 		 */
 		if (inflight == tctx_inflight(tctx, !cancel_all))
 			schedule();
 end_wait:
 		finish_wait(&tctx->wait, &wait);
 	} while (1);

 	io_uring_clean_tctx(tctx);
 	if (cancel_all) {
 		/*
 		 * We shouldn't run task_works after cancel, so just leave
 		 * ->in_cancel set for normal exit.
 		 */
 		atomic_dec(&tctx->in_cancel);
 		/* for exec all current's requests should be gone, kill tctx */
 		__io_uring_free(current);
 	}
 }
	// SPDX-License-Identifier: GPL-2.0
	#include <linux/kernel.h>
	#include <linux/errno.h>
	#include <linux/fs.h>
	#include <linux/file.h>
	#include <linux/mm.h>
	#include <linux/slab.h>
	#include <linux/namei.h>
	#include <linux/nospec.h>
	#include <linux/io_uring.h>

	#include <uapi/linux/io_uring.h>

	#include "filetable.h"
	#include "io_uring.h"
	#include "tctx.h"
	#include "sqpoll.h"
	#include "uring_cmd.h"
	#include "poll.h"
	#include "timeout.h"
	#include "waitid.h"
	#include "futex.h"
	#include "cancel.h"

	struct io_cancel {
	struct file *file;
	u64 addr;
	u32 flags;
	s32 fd;
	u8 opcode;
	};

	#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL \| IORING_ASYNC_CANCEL_FD \| \
	IORING_ASYNC_CANCEL_ANY \| IORING_ASYNC_CANCEL_FD_FIXED \| \
	IORING_ASYNC_CANCEL_USERDATA \| IORING_ASYNC_CANCEL_OP)

	/*
	* Returns true if the request matches the criteria outlined by 'cd'.
	*/
	bool io_cancel_req_match(struct io_kiocb req, struct io_cancel_data cd)
	{
	bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;

	if (req->ctx != cd->ctx)
	return false;

	if (!(cd->flags & (IORING_ASYNC_CANCEL_FD \| IORING_ASYNC_CANCEL_OP)))
	match_user_data = true;

	if (cd->flags & IORING_ASYNC_CANCEL_ANY)
	goto check_seq;
	if (cd->flags & IORING_ASYNC_CANCEL_FD) {
	if (req->file != cd->file)
	return false;
	}
	if (cd->flags & IORING_ASYNC_CANCEL_OP) {
	if (req->opcode != cd->opcode)
	return false;
	}
	if (match_user_data && req->cqe.user_data != cd->data)
	return false;
	if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
	check_seq:
	if (io_cancel_match_sequence(req, cd->seq))
	return false;
	}

	return true;
	}

	static bool io_cancel_cb(struct io_wq_work work, void data)
	{
	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
	struct io_cancel_data *cd = data;

	return io_cancel_req_match(req, cd);
	}

	static int io_async_cancel_one(struct io_uring_task *tctx,
	struct io_cancel_data *cd)
	{
	enum io_wq_cancel cancel_ret;
	int ret = 0;
	bool all;

	if (!tctx \|\| !tctx->io_wq)
	return -ENOENT;

	all = cd->flags & (IORING_ASYNC_CANCEL_ALL\|IORING_ASYNC_CANCEL_ANY);
	cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
	switch (cancel_ret) {
	case IO_WQ_CANCEL_OK:
	ret = 0;
	break;
	case IO_WQ_CANCEL_RUNNING:
	ret = -EALREADY;
	break;
	case IO_WQ_CANCEL_NOTFOUND:
	ret = -ENOENT;
	break;
	}

	return ret;
	}

	int io_try_cancel(struct io_uring_task tctx, struct io_cancel_data cd,
	unsigned issue_flags)
	{
	struct io_ring_ctx *ctx = cd->ctx;
	int ret;

	WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);

	ret = io_async_cancel_one(tctx, cd);
	/*
	* Fall-through even for -EALREADY, as we may have poll armed
	* that need unarming.
	*/
	if (!ret)
	return 0;

	ret = io_poll_cancel(ctx, cd, issue_flags);
	if (ret != -ENOENT)
	return ret;

	ret = io_waitid_cancel(ctx, cd, issue_flags);
	if (ret != -ENOENT)
	return ret;

	ret = io_futex_cancel(ctx, cd, issue_flags);
	if (ret != -ENOENT)
	return ret;

	spin_lock(&ctx->completion_lock);
	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
	ret = io_timeout_cancel(ctx, cd);
	spin_unlock(&ctx->completion_lock);
	return ret;
	}

	int io_async_cancel_prep(struct io_kiocb req, const struct io_uring_sqe sqe)
	{
	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);

	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
	return -EINVAL;
	if (sqe->off \|\| sqe->splice_fd_in)
	return -EINVAL;

	cancel->addr = READ_ONCE(sqe->addr);
	cancel->flags = READ_ONCE(sqe->cancel_flags);
	if (cancel->flags & ~CANCEL_FLAGS)
	return -EINVAL;
	if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
	if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
	return -EINVAL;
	cancel->fd = READ_ONCE(sqe->fd);
	}
	if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
	if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
	return -EINVAL;
	cancel->opcode = READ_ONCE(sqe->len);
	}

	return 0;
	}

	static int __io_async_cancel(struct io_cancel_data *cd,
	struct io_uring_task *tctx,
	unsigned int issue_flags)
	{
	bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL\|IORING_ASYNC_CANCEL_ANY);
	struct io_ring_ctx *ctx = cd->ctx;
	struct io_tctx_node *node;
	int ret, nr = 0;

	do {
	ret = io_try_cancel(tctx, cd, issue_flags);
	if (ret == -ENOENT)
	break;
	if (!all)
	return ret;
	nr++;
	} while (1);

	/* slow path, try all io-wq's */
	io_ring_submit_lock(ctx, issue_flags);
	ret = -ENOENT;
	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
	ret = io_async_cancel_one(node->task->io_uring, cd);
	if (ret != -ENOENT) {
	if (!all)
	break;
	nr++;
	}
	}
	io_ring_submit_unlock(ctx, issue_flags);
	return all ? nr : ret;
	}

	int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
	{
	struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
	struct io_cancel_data cd = {
	.ctx = req->ctx,
	.data = cancel->addr,
	.flags = cancel->flags,
	.opcode = cancel->opcode,
	.seq = atomic_inc_return(&req->ctx->cancel_seq),
	};
	struct io_uring_task *tctx = req->tctx;
	int ret;

	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
	if (req->flags & REQ_F_FIXED_FILE \|\|
	cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
	req->flags \|= REQ_F_FIXED_FILE;
	req->file = io_file_get_fixed(req, cancel->fd,
	issue_flags);
	} else {
	req->file = io_file_get_normal(req, cancel->fd);
	}
	if (!req->file) {
	ret = -EBADF;
	goto done;
	}
	cd.file = req->file;
	}

	ret = __io_async_cancel(&cd, tctx, issue_flags);
	done:
	if (ret < 0)
	req_set_fail(req);
	io_req_set_res(req, ret, 0);
	return IOU_COMPLETE;
	}

	static int __io_sync_cancel(struct io_uring_task *tctx,
	struct io_cancel_data *cd, int fd)
	{
	struct io_ring_ctx *ctx = cd->ctx;

	/* fixed must be grabbed every time since we drop the uring_lock */
	if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
	(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
	struct io_rsrc_node *node;

	node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
	if (unlikely(!node))
	return -EBADF;
	cd->file = io_slot_file(node);
	if (!cd->file)
	return -EBADF;
	}

	return __io_async_cancel(cd, tctx, 0);
	}

	int io_sync_cancel(struct io_ring_ctx ctx, void __user arg)
	__must_hold(&ctx->uring_lock)
	{
	struct io_cancel_data cd = {
	.ctx = ctx,
	.seq = atomic_inc_return(&ctx->cancel_seq),
	};
	ktime_t timeout = KTIME_MAX;
	struct io_uring_sync_cancel_reg sc;
	struct file *file = NULL;
	DEFINE_WAIT(wait);
	int ret, i;

	if (copy_from_user(&sc, arg, sizeof(sc)))
	return -EFAULT;
	if (sc.flags & ~CANCEL_FLAGS)
	return -EINVAL;
	for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
	if (sc.pad[i])
	return -EINVAL;
	for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
	if (sc.pad2[i])
	return -EINVAL;

	cd.data = sc.addr;
	cd.flags = sc.flags;
	cd.opcode = sc.opcode;

	/* we can grab a normal file descriptor upfront */
	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
	!(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
	file = fget(sc.fd);
	if (!file)
	return -EBADF;
	cd.file = file;
	}

	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);

	/* found something, done! */
	if (ret != -EALREADY)
	goto out;

	if (sc.timeout.tv_sec != -1UL \|\| sc.timeout.tv_nsec != -1UL) {
	struct timespec64 ts = {
	.tv_sec = sc.timeout.tv_sec,
	.tv_nsec = sc.timeout.tv_nsec
	};

	timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
	}

	/*
	* Keep looking until we get -ENOENT. we'll get woken everytime
	* every time a request completes and will retry the cancelation.
	*/
	do {
	cd.seq = atomic_inc_return(&ctx->cancel_seq);

	prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);

	ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);

	mutex_unlock(&ctx->uring_lock);
	if (ret != -EALREADY)
	break;

	ret = io_run_task_work_sig(ctx);
	if (ret < 0)
	break;
	ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
	if (!ret) {
	ret = -ETIME;
	break;
	}
	mutex_lock(&ctx->uring_lock);
	} while (1);

	finish_wait(&ctx->cq_wait, &wait);
	mutex_lock(&ctx->uring_lock);

	if (ret == -ENOENT \|\| ret > 0)
	ret = 0;
	out:
	if (file)
	fput(file);
	return ret;
	}

	bool io_cancel_remove_all(struct io_ring_ctx ctx, struct io_uring_task tctx,
	struct hlist_head *list, bool cancel_all,
	bool (cancel)(struct io_kiocb ))
	{
	struct hlist_node *tmp;
	struct io_kiocb *req;
	bool found = false;

	lockdep_assert_held(&ctx->uring_lock);

	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
	if (!io_match_task_safe(req, tctx, cancel_all))
	continue;
	hlist_del_init(&req->hash_node);
	if (cancel(req))
	found = true;
	}

	return found;
	}

	int io_cancel_remove(struct io_ring_ctx ctx, struct io_cancel_data cd,
	unsigned int issue_flags, struct hlist_head *list,
	bool (cancel)(struct io_kiocb ))
	{
	struct hlist_node *tmp;
	struct io_kiocb *req;
	int nr = 0;

	io_ring_submit_lock(ctx, issue_flags);
	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
	if (!io_cancel_req_match(req, cd))
	continue;
	if (cancel(req))
	nr++;
	if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
	break;
	}
	io_ring_submit_unlock(ctx, issue_flags);
	return nr ?: -ENOENT;
	}

	static bool io_match_linked(struct io_kiocb *head)
	{
	struct io_kiocb *req;

	io_for_each_link(req, head) {
	if (req->flags & REQ_F_INFLIGHT)
	return true;
	}
	return false;
	}

	/*
	* As io_match_task() but protected against racing with linked timeouts.
	* User must not hold timeout_lock.
	*/
	bool io_match_task_safe(struct io_kiocb head, struct io_uring_task tctx,
	bool cancel_all)
	{
	bool matched;

	if (tctx && head->tctx != tctx)
	return false;
	if (cancel_all)
	return true;

	if (head->flags & REQ_F_LINK_TIMEOUT) {
	struct io_ring_ctx *ctx = head->ctx;

	/* protect against races with linked timeouts */
	raw_spin_lock_irq(&ctx->timeout_lock);
	matched = io_match_linked(head);
	raw_spin_unlock_irq(&ctx->timeout_lock);
	} else {
	matched = io_match_linked(head);
	}
	return matched;
	}

	void __io_uring_cancel(bool cancel_all)
	{
	io_uring_unreg_ringfd();
	io_uring_cancel_generic(cancel_all, NULL);
	}

	struct io_task_cancel {
	struct io_uring_task *tctx;
	bool all;
	};

	static bool io_cancel_task_cb(struct io_wq_work work, void data)
	{
	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
	struct io_task_cancel *cancel = data;

	return io_match_task_safe(req, cancel->tctx, cancel->all);
	}

	static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
	struct io_uring_task *tctx,
	bool cancel_all)
	{
	struct io_defer_entry *de;
	LIST_HEAD(list);

	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
	if (io_match_task_safe(de->req, tctx, cancel_all)) {
	list_cut_position(&list, &ctx->defer_list, &de->list);
	break;
	}
	}
	if (list_empty(&list))
	return false;

	while (!list_empty(&list)) {
	de = list_first_entry(&list, struct io_defer_entry, list);
	list_del_init(&de->list);
	ctx->nr_drained -= io_linked_nr(de->req);
	io_req_task_queue_fail(de->req, -ECANCELED);
	kfree(de);
	}
	return true;
	}

	__cold bool io_cancel_ctx_cb(struct io_wq_work work, void data)
	{
	struct io_kiocb *req = container_of(work, struct io_kiocb, work);

	return req->ctx == data;
	}

	static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
	{
	struct io_tctx_node *node;
	enum io_wq_cancel cret;
	bool ret = false;

	mutex_lock(&ctx->uring_lock);
	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
	struct io_uring_task *tctx = node->task->io_uring;

	/*
	* io_wq will stay alive while we hold uring_lock, because it's
	* killed after ctx nodes, which requires to take the lock.
	*/
	if (!tctx \|\| !tctx->io_wq)
	continue;
	cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
	ret \|= (cret != IO_WQ_CANCEL_NOTFOUND);
	}
	mutex_unlock(&ctx->uring_lock);

	return ret;
	}

	__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
	struct io_uring_task *tctx,
	bool cancel_all, bool is_sqpoll_thread)
	{
	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
	enum io_wq_cancel cret;
	bool ret = false;

	/* set it so io_req_local_work_add() would wake us up */
	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
	atomic_set(&ctx->cq_wait_nr, 1);
	smp_mb();
	}

	/* failed during ring init, it couldn't have issued any requests */
	if (!ctx->rings)
	return false;

	if (!tctx) {
	ret \|= io_uring_try_cancel_iowq(ctx);
	} else if (tctx->io_wq) {
	/*
	* Cancels requests of all rings, not only @ctx, but
	* it's fine as the task is in exit/exec.
	*/
	cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
	&cancel, true);
	ret \|= (cret != IO_WQ_CANCEL_NOTFOUND);
	}

	/* SQPOLL thread does its own polling */
	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) \|\|
	is_sqpoll_thread) {
	while (!wq_list_empty(&ctx->iopoll_list)) {
	io_iopoll_try_reap_events(ctx);
	ret = true;
	cond_resched();
	}
	}

	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
	io_allowed_defer_tw_run(ctx))
	ret \|= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
	mutex_lock(&ctx->uring_lock);
	ret \|= io_cancel_defer_files(ctx, tctx, cancel_all);
	ret \|= io_poll_remove_all(ctx, tctx, cancel_all);
	ret \|= io_waitid_remove_all(ctx, tctx, cancel_all);
	ret \|= io_futex_remove_all(ctx, tctx, cancel_all);
	ret \|= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
	mutex_unlock(&ctx->uring_lock);
	ret \|= io_kill_timeouts(ctx, tctx, cancel_all);
	if (tctx)
	ret \|= io_run_task_work() > 0;
	else
	ret \|= flush_delayed_work(&ctx->fallback_work);
	return ret;
	}

	static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
	{
	if (tracked)
	return atomic_read(&tctx->inflight_tracked);
	return percpu_counter_sum(&tctx->inflight);
	}

	/*
	* Find any io_uring ctx that this task has registered or done IO on, and cancel
	* requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
	*/
	__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
	{
	struct io_uring_task *tctx = current->io_uring;
	struct io_ring_ctx *ctx;
	struct io_tctx_node *node;
	unsigned long index;
	s64 inflight;
	DEFINE_WAIT(wait);

	WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);

	if (!current->io_uring)
	return;
	if (tctx->io_wq)
	io_wq_exit_start(tctx->io_wq);

	atomic_inc(&tctx->in_cancel);
	do {
	bool loop = false;

	io_uring_drop_tctx_refs(current);
	if (!tctx_inflight(tctx, !cancel_all))
	break;

	/* read completions before cancelations */
	inflight = tctx_inflight(tctx, false);
	if (!inflight)
	break;

	if (!sqd) {
	xa_for_each(&tctx->xa, index, node) {
	/* sqpoll task will cancel all its requests */
	if (node->ctx->sq_data)
	continue;
	loop \|= io_uring_try_cancel_requests(node->ctx,
	current->io_uring,
	cancel_all,
	false);
	}
	} else {
	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
	loop \|= io_uring_try_cancel_requests(ctx,
	current->io_uring,
	cancel_all,
	true);
	}

	if (loop) {
	cond_resched();
	continue;
	}

	prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
	io_run_task_work();
	io_uring_drop_tctx_refs(current);
	xa_for_each(&tctx->xa, index, node) {
	if (io_local_work_pending(node->ctx)) {
	WARN_ON_ONCE(node->ctx->submitter_task &&
	node->ctx->submitter_task != current);
	goto end_wait;
	}
	}
	/*
	* If we've seen completions, retry without waiting. This
	* avoids a race where a completion comes in before we did
	* prepare_to_wait().
	*/
	if (inflight == tctx_inflight(tctx, !cancel_all))
	schedule();
	end_wait:
	finish_wait(&tctx->wait, &wait);
	} while (1);

	io_uring_clean_tctx(tctx);
	if (cancel_all) {
	/*
	* We shouldn't run task_works after cancel, so just leave
	* ->in_cancel set for normal exit.
	*/
	atomic_dec(&tctx->in_cancel);
	/* for exec all current's requests should be gone, kill tctx */
	__io_uring_free(current);
	}
	}