|  | // SPDX-License-Identifier: GPL-2.0 | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/errno.h> | 
|  | #include <linux/mm.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/eventfd.h> | 
|  | #include <linux/eventpoll.h> | 
|  | #include <linux/io_uring.h> | 
|  | #include <linux/io_uring_types.h> | 
|  |  | 
|  | #include "io-wq.h" | 
|  | #include "eventfd.h" | 
|  |  | 
|  | struct io_ev_fd { | 
|  | struct eventfd_ctx	*cq_ev_fd; | 
|  | unsigned int		eventfd_async; | 
|  | /* protected by ->completion_lock */ | 
|  | unsigned		last_cq_tail; | 
|  | refcount_t		refs; | 
|  | atomic_t		ops; | 
|  | struct rcu_head		rcu; | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | IO_EVENTFD_OP_SIGNAL_BIT, | 
|  | }; | 
|  |  | 
|  | static void io_eventfd_free(struct rcu_head *rcu) | 
|  | { | 
|  | struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); | 
|  |  | 
|  | eventfd_ctx_put(ev_fd->cq_ev_fd); | 
|  | kfree(ev_fd); | 
|  | } | 
|  |  | 
|  | static void io_eventfd_put(struct io_ev_fd *ev_fd) | 
|  | { | 
|  | if (refcount_dec_and_test(&ev_fd->refs)) | 
|  | call_rcu(&ev_fd->rcu, io_eventfd_free); | 
|  | } | 
|  |  | 
|  | static void io_eventfd_do_signal(struct rcu_head *rcu) | 
|  | { | 
|  | struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); | 
|  |  | 
|  | eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); | 
|  | io_eventfd_put(ev_fd); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Returns true if the caller should put the ev_fd reference, false if not. | 
|  | */ | 
|  | static bool __io_eventfd_signal(struct io_ev_fd *ev_fd) | 
|  | { | 
|  | if (eventfd_signal_allowed()) { | 
|  | eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); | 
|  | return true; | 
|  | } | 
|  | if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { | 
|  | call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Trigger if eventfd_async isn't set, or if it's set and the caller is | 
|  | * an async worker. | 
|  | */ | 
|  | static bool io_eventfd_trigger(struct io_ev_fd *ev_fd) | 
|  | { | 
|  | return !ev_fd->eventfd_async || io_wq_current_is_worker(); | 
|  | } | 
|  |  | 
|  | void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) | 
|  | { | 
|  | bool skip = false; | 
|  | struct io_ev_fd *ev_fd; | 
|  |  | 
|  | if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) | 
|  | return; | 
|  |  | 
|  | guard(rcu)(); | 
|  | ev_fd = rcu_dereference(ctx->io_ev_fd); | 
|  | /* | 
|  | * Check again if ev_fd exists in case an io_eventfd_unregister call | 
|  | * completed between the NULL check of ctx->io_ev_fd at the start of | 
|  | * the function and rcu_read_lock. | 
|  | */ | 
|  | if (!ev_fd) | 
|  | return; | 
|  | if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs)) | 
|  | return; | 
|  |  | 
|  | if (cqe_event) { | 
|  | /* | 
|  | * Eventfd should only get triggered when at least one event | 
|  | * has been posted. Some applications rely on the eventfd | 
|  | * notification count only changing IFF a new CQE has been | 
|  | * added to the CQ ring. There's no dependency on 1:1 | 
|  | * relationship between how many times this function is called | 
|  | * (and hence the eventfd count) and number of CQEs posted to | 
|  | * the CQ ring. | 
|  | */ | 
|  | spin_lock(&ctx->completion_lock); | 
|  | skip = ctx->cached_cq_tail == ev_fd->last_cq_tail; | 
|  | ev_fd->last_cq_tail = ctx->cached_cq_tail; | 
|  | spin_unlock(&ctx->completion_lock); | 
|  | } | 
|  |  | 
|  | if (skip || __io_eventfd_signal(ev_fd)) | 
|  | io_eventfd_put(ev_fd); | 
|  | } | 
|  |  | 
|  | int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, | 
|  | unsigned int eventfd_async) | 
|  | { | 
|  | struct io_ev_fd *ev_fd; | 
|  | __s32 __user *fds = arg; | 
|  | int fd; | 
|  |  | 
|  | ev_fd = rcu_dereference_protected(ctx->io_ev_fd, | 
|  | lockdep_is_held(&ctx->uring_lock)); | 
|  | if (ev_fd) | 
|  | return -EBUSY; | 
|  |  | 
|  | if (copy_from_user(&fd, fds, sizeof(*fds))) | 
|  | return -EFAULT; | 
|  |  | 
|  | ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); | 
|  | if (!ev_fd) | 
|  | return -ENOMEM; | 
|  |  | 
|  | ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); | 
|  | if (IS_ERR(ev_fd->cq_ev_fd)) { | 
|  | int ret = PTR_ERR(ev_fd->cq_ev_fd); | 
|  |  | 
|  | kfree(ev_fd); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | spin_lock(&ctx->completion_lock); | 
|  | ev_fd->last_cq_tail = ctx->cached_cq_tail; | 
|  | spin_unlock(&ctx->completion_lock); | 
|  |  | 
|  | ev_fd->eventfd_async = eventfd_async; | 
|  | ctx->has_evfd = true; | 
|  | refcount_set(&ev_fd->refs, 1); | 
|  | atomic_set(&ev_fd->ops, 0); | 
|  | rcu_assign_pointer(ctx->io_ev_fd, ev_fd); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int io_eventfd_unregister(struct io_ring_ctx *ctx) | 
|  | { | 
|  | struct io_ev_fd *ev_fd; | 
|  |  | 
|  | ev_fd = rcu_dereference_protected(ctx->io_ev_fd, | 
|  | lockdep_is_held(&ctx->uring_lock)); | 
|  | if (ev_fd) { | 
|  | ctx->has_evfd = false; | 
|  | rcu_assign_pointer(ctx->io_ev_fd, NULL); | 
|  | io_eventfd_put(ev_fd); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | return -ENXIO; | 
|  | } |