| // SPDX-License-Identifier: GPL-2.0 | 
 | #include <linux/kernel.h> | 
 | #include <linux/errno.h> | 
 | #include <linux/mm.h> | 
 | #include <linux/slab.h> | 
 | #include <linux/eventfd.h> | 
 | #include <linux/eventpoll.h> | 
 | #include <linux/io_uring.h> | 
 | #include <linux/io_uring_types.h> | 
 |  | 
 | #include "io-wq.h" | 
 | #include "eventfd.h" | 
 |  | 
 | struct io_ev_fd { | 
 | 	struct eventfd_ctx	*cq_ev_fd; | 
 | 	unsigned int		eventfd_async; | 
 | 	/* protected by ->completion_lock */ | 
 | 	unsigned		last_cq_tail; | 
 | 	refcount_t		refs; | 
 | 	atomic_t		ops; | 
 | 	struct rcu_head		rcu; | 
 | }; | 
 |  | 
 | enum { | 
 | 	IO_EVENTFD_OP_SIGNAL_BIT, | 
 | }; | 
 |  | 
 | static void io_eventfd_free(struct rcu_head *rcu) | 
 | { | 
 | 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); | 
 |  | 
 | 	eventfd_ctx_put(ev_fd->cq_ev_fd); | 
 | 	kfree(ev_fd); | 
 | } | 
 |  | 
 | static void io_eventfd_put(struct io_ev_fd *ev_fd) | 
 | { | 
 | 	if (refcount_dec_and_test(&ev_fd->refs)) | 
 | 		call_rcu(&ev_fd->rcu, io_eventfd_free); | 
 | } | 
 |  | 
 | static void io_eventfd_do_signal(struct rcu_head *rcu) | 
 | { | 
 | 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); | 
 |  | 
 | 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); | 
 | 	io_eventfd_put(ev_fd); | 
 | } | 
 |  | 
 | /* | 
 |  * Returns true if the caller should put the ev_fd reference, false if not. | 
 |  */ | 
 | static bool __io_eventfd_signal(struct io_ev_fd *ev_fd) | 
 | { | 
 | 	if (eventfd_signal_allowed()) { | 
 | 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); | 
 | 		return true; | 
 | 	} | 
 | 	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { | 
 | 		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); | 
 | 		return false; | 
 | 	} | 
 | 	return true; | 
 | } | 
 |  | 
 | /* | 
 |  * Trigger if eventfd_async isn't set, or if it's set and the caller is | 
 |  * an async worker. | 
 |  */ | 
 | static bool io_eventfd_trigger(struct io_ev_fd *ev_fd) | 
 | { | 
 | 	return !ev_fd->eventfd_async || io_wq_current_is_worker(); | 
 | } | 
 |  | 
 | void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) | 
 | { | 
 | 	bool skip = false; | 
 | 	struct io_ev_fd *ev_fd; | 
 |  | 
 | 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) | 
 | 		return; | 
 |  | 
 | 	guard(rcu)(); | 
 | 	ev_fd = rcu_dereference(ctx->io_ev_fd); | 
 | 	/* | 
 | 	 * Check again if ev_fd exists in case an io_eventfd_unregister call | 
 | 	 * completed between the NULL check of ctx->io_ev_fd at the start of | 
 | 	 * the function and rcu_read_lock. | 
 | 	 */ | 
 | 	if (!ev_fd) | 
 | 		return; | 
 | 	if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs)) | 
 | 		return; | 
 |  | 
 | 	if (cqe_event) { | 
 | 		/* | 
 | 		 * Eventfd should only get triggered when at least one event | 
 | 		 * has been posted. Some applications rely on the eventfd | 
 | 		 * notification count only changing IFF a new CQE has been | 
 | 		 * added to the CQ ring. There's no dependency on 1:1 | 
 | 		 * relationship between how many times this function is called | 
 | 		 * (and hence the eventfd count) and number of CQEs posted to | 
 | 		 * the CQ ring. | 
 | 		 */ | 
 | 		spin_lock(&ctx->completion_lock); | 
 | 		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail; | 
 | 		ev_fd->last_cq_tail = ctx->cached_cq_tail; | 
 | 		spin_unlock(&ctx->completion_lock); | 
 | 	} | 
 |  | 
 | 	if (skip || __io_eventfd_signal(ev_fd)) | 
 | 		io_eventfd_put(ev_fd); | 
 | } | 
 |  | 
 | int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, | 
 | 			unsigned int eventfd_async) | 
 | { | 
 | 	struct io_ev_fd *ev_fd; | 
 | 	__s32 __user *fds = arg; | 
 | 	int fd; | 
 |  | 
 | 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd, | 
 | 					lockdep_is_held(&ctx->uring_lock)); | 
 | 	if (ev_fd) | 
 | 		return -EBUSY; | 
 |  | 
 | 	if (copy_from_user(&fd, fds, sizeof(*fds))) | 
 | 		return -EFAULT; | 
 |  | 
 | 	ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); | 
 | 	if (!ev_fd) | 
 | 		return -ENOMEM; | 
 |  | 
 | 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); | 
 | 	if (IS_ERR(ev_fd->cq_ev_fd)) { | 
 | 		int ret = PTR_ERR(ev_fd->cq_ev_fd); | 
 |  | 
 | 		kfree(ev_fd); | 
 | 		return ret; | 
 | 	} | 
 |  | 
 | 	spin_lock(&ctx->completion_lock); | 
 | 	ev_fd->last_cq_tail = ctx->cached_cq_tail; | 
 | 	spin_unlock(&ctx->completion_lock); | 
 |  | 
 | 	ev_fd->eventfd_async = eventfd_async; | 
 | 	ctx->has_evfd = true; | 
 | 	refcount_set(&ev_fd->refs, 1); | 
 | 	atomic_set(&ev_fd->ops, 0); | 
 | 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int io_eventfd_unregister(struct io_ring_ctx *ctx) | 
 | { | 
 | 	struct io_ev_fd *ev_fd; | 
 |  | 
 | 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd, | 
 | 					lockdep_is_held(&ctx->uring_lock)); | 
 | 	if (ev_fd) { | 
 | 		ctx->has_evfd = false; | 
 | 		rcu_assign_pointer(ctx->io_ev_fd, NULL); | 
 | 		io_eventfd_put(ev_fd); | 
 | 		return 0; | 
 | 	} | 
 |  | 
 | 	return -ENXIO; | 
 | } |