| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * eventfd support for mshv |
| * |
| * Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic |
| * framework code is taken from the kvm implementation. |
| * |
| * All credits to kvm developers. |
| */ |
| |
| #include <linux/syscalls.h> |
| #include <linux/wait.h> |
| #include <linux/poll.h> |
| #include <linux/file.h> |
| #include <linux/list.h> |
| #include <linux/workqueue.h> |
| #include <linux/eventfd.h> |
| |
| #if IS_ENABLED(CONFIG_X86_64) |
| #include <asm/apic.h> |
| #endif |
| #include <asm/mshyperv.h> |
| |
| #include "mshv_eventfd.h" |
| #include "mshv.h" |
| #include "mshv_root.h" |
| |
| static struct workqueue_struct *irqfd_cleanup_wq; |
| |
| void mshv_register_irq_ack_notifier(struct mshv_partition *partition, |
| struct mshv_irq_ack_notifier *mian) |
| { |
| mutex_lock(&partition->pt_irq_lock); |
| hlist_add_head_rcu(&mian->link, &partition->irq_ack_notifier_list); |
| mutex_unlock(&partition->pt_irq_lock); |
| } |
| |
| void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition, |
| struct mshv_irq_ack_notifier *mian) |
| { |
| mutex_lock(&partition->pt_irq_lock); |
| hlist_del_init_rcu(&mian->link); |
| mutex_unlock(&partition->pt_irq_lock); |
| synchronize_rcu(); |
| } |
| |
| bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi) |
| { |
| struct mshv_irq_ack_notifier *mian; |
| bool acked = false; |
| |
| rcu_read_lock(); |
| hlist_for_each_entry_rcu(mian, &partition->irq_ack_notifier_list, |
| link) { |
| if (mian->irq_ack_gsi == gsi) { |
| mian->irq_acked(mian); |
| acked = true; |
| } |
| } |
| rcu_read_unlock(); |
| |
| return acked; |
| } |
| |
| #if IS_ENABLED(CONFIG_ARM64) |
| static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type) |
| { |
| return false; |
| } |
| #elif IS_ENABLED(CONFIG_X86_64) |
| static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type) |
| { |
| return type == HV_X64_INTERRUPT_TYPE_EXTINT; |
| } |
| #endif |
| |
| static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian) |
| { |
| struct mshv_irqfd_resampler *resampler; |
| struct mshv_partition *partition; |
| struct mshv_irqfd *irqfd; |
| int idx; |
| |
| resampler = container_of(mian, struct mshv_irqfd_resampler, |
| rsmplr_notifier); |
| partition = resampler->rsmplr_partn; |
| |
| idx = srcu_read_lock(&partition->pt_irq_srcu); |
| |
| hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list, |
| irqfd_resampler_hnode) { |
| if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type)) |
| hv_call_clear_virtual_interrupt(partition->pt_id); |
| |
| eventfd_signal(irqfd->irqfd_resamplefd); |
| } |
| |
| srcu_read_unlock(&partition->pt_irq_srcu, idx); |
| } |
| |
| #if IS_ENABLED(CONFIG_X86_64) |
| static bool |
| mshv_vp_irq_vector_injected(union hv_vp_register_page_interrupt_vectors iv, |
| u32 vector) |
| { |
| int i; |
| |
| for (i = 0; i < iv.vector_count; i++) { |
| if (iv.vector[i] == vector) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector) |
| { |
| union hv_vp_register_page_interrupt_vectors iv, new_iv; |
| |
| iv = vp->vp_register_page->interrupt_vectors; |
| new_iv = iv; |
| |
| if (mshv_vp_irq_vector_injected(iv, vector)) |
| return 0; |
| |
| if (iv.vector_count >= HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT) |
| return -ENOSPC; |
| |
| new_iv.vector[new_iv.vector_count++] = vector; |
| |
| if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, |
| iv.as_uint64, new_iv.as_uint64) != iv.as_uint64) |
| return -EAGAIN; |
| |
| return 0; |
| } |
| |
| static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 vector) |
| { |
| int ret; |
| |
| do { |
| ret = mshv_vp_irq_try_set_vector(vp, vector); |
| } while (ret == -EAGAIN && !need_resched()); |
| |
| return ret; |
| } |
| |
| /* |
| * Try to raise irq for guest via shared vector array. hyp does the actual |
| * inject of the interrupt. |
| */ |
| static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) |
| { |
| struct mshv_partition *partition = irqfd->irqfd_partn; |
| struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; |
| struct mshv_vp *vp; |
| |
| if (!(ms_hyperv.ext_features & |
| HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE)) |
| return -EOPNOTSUPP; |
| |
| if (hv_scheduler_type != HV_SCHEDULER_TYPE_ROOT) |
| return -EOPNOTSUPP; |
| |
| if (irq->lapic_control.logical_dest_mode) |
| return -EOPNOTSUPP; |
| |
| vp = partition->pt_vp_array[irq->lapic_apic_id]; |
| |
| if (!vp->vp_register_page) |
| return -EOPNOTSUPP; |
| |
| if (mshv_vp_irq_set_vector(vp, irq->lapic_vector)) |
| return -EINVAL; |
| |
| if (vp->run.flags.root_sched_dispatched && |
| vp->vp_register_page->interrupt_vectors.as_uint64) |
| return -EBUSY; |
| |
| wake_up(&vp->run.vp_suspend_queue); |
| |
| return 0; |
| } |
| #else /* CONFIG_X86_64 */ |
| static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) |
| { |
| return -EOPNOTSUPP; |
| } |
| #endif |
| |
| static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd) |
| { |
| struct mshv_partition *partition = irqfd->irqfd_partn; |
| struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; |
| unsigned int seq; |
| int idx; |
| |
| WARN_ON(irqfd->irqfd_resampler && |
| !irq->lapic_control.level_triggered); |
| |
| idx = srcu_read_lock(&partition->pt_irq_srcu); |
| if (irqfd->irqfd_girq_ent.guest_irq_num) { |
| if (!irqfd->irqfd_girq_ent.girq_entry_valid) { |
| srcu_read_unlock(&partition->pt_irq_srcu, idx); |
| return; |
| } |
| |
| do { |
| seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); |
| } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); |
| } |
| |
| hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id, |
| irq->lapic_vector, irq->lapic_apic_id, |
| irq->lapic_control); |
| srcu_read_unlock(&partition->pt_irq_srcu, idx); |
| } |
| |
| static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd) |
| { |
| struct mshv_irqfd_resampler *rp = irqfd->irqfd_resampler; |
| struct mshv_partition *pt = rp->rsmplr_partn; |
| |
| mutex_lock(&pt->irqfds_resampler_lock); |
| |
| hlist_del_rcu(&irqfd->irqfd_resampler_hnode); |
| synchronize_srcu(&pt->pt_irq_srcu); |
| |
| if (hlist_empty(&rp->rsmplr_irqfd_list)) { |
| hlist_del(&rp->rsmplr_hnode); |
| mshv_unregister_irq_ack_notifier(pt, &rp->rsmplr_notifier); |
| kfree(rp); |
| } |
| |
| mutex_unlock(&pt->irqfds_resampler_lock); |
| } |
| |
| /* |
| * Race-free decouple logic (ordering is critical) |
| */ |
| static void mshv_irqfd_shutdown(struct work_struct *work) |
| { |
| struct mshv_irqfd *irqfd = |
| container_of(work, struct mshv_irqfd, irqfd_shutdown); |
| |
| /* |
| * Synchronize with the wait-queue and unhook ourselves to prevent |
| * further events. |
| */ |
| remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait); |
| |
| if (irqfd->irqfd_resampler) { |
| mshv_irqfd_resampler_shutdown(irqfd); |
| eventfd_ctx_put(irqfd->irqfd_resamplefd); |
| } |
| |
| /* |
| * It is now safe to release the object's resources |
| */ |
| eventfd_ctx_put(irqfd->irqfd_eventfd_ctx); |
| kfree(irqfd); |
| } |
| |
| /* assumes partition->pt_irqfds_lock is held */ |
| static bool mshv_irqfd_is_active(struct mshv_irqfd *irqfd) |
| { |
| return !hlist_unhashed(&irqfd->irqfd_hnode); |
| } |
| |
| /* |
| * Mark the irqfd as inactive and schedule it for removal |
| * |
| * assumes partition->pt_irqfds_lock is held |
| */ |
| static void mshv_irqfd_deactivate(struct mshv_irqfd *irqfd) |
| { |
| if (!mshv_irqfd_is_active(irqfd)) |
| return; |
| |
| hlist_del(&irqfd->irqfd_hnode); |
| |
| queue_work(irqfd_cleanup_wq, &irqfd->irqfd_shutdown); |
| } |
| |
| /* |
| * Called with wqh->lock held and interrupts disabled |
| */ |
| static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, |
| int sync, void *key) |
| { |
| struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd, |
| irqfd_wait); |
| unsigned long flags = (unsigned long)key; |
| int idx; |
| unsigned int seq; |
| struct mshv_partition *pt = irqfd->irqfd_partn; |
| int ret = 0; |
| |
| if (flags & POLLIN) { |
| u64 cnt; |
| |
| eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt); |
| idx = srcu_read_lock(&pt->pt_irq_srcu); |
| do { |
| seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); |
| } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); |
| |
| /* An event has been signaled, raise an interrupt */ |
| ret = mshv_try_assert_irq_fast(irqfd); |
| if (ret) |
| mshv_assert_irq_slow(irqfd); |
| |
| srcu_read_unlock(&pt->pt_irq_srcu, idx); |
| |
| ret = 1; |
| } |
| |
| if (flags & POLLHUP) { |
| /* The eventfd is closing, detach from the partition */ |
| unsigned long flags; |
| |
| spin_lock_irqsave(&pt->pt_irqfds_lock, flags); |
| |
| /* |
| * We must check if someone deactivated the irqfd before |
| * we could acquire the pt_irqfds_lock since the item is |
| * deactivated from the mshv side before it is unhooked from |
| * the wait-queue. If it is already deactivated, we can |
| * simply return knowing the other side will cleanup for us. |
| * We cannot race against the irqfd going away since the |
| * other side is required to acquire wqh->lock, which we hold |
| */ |
| if (mshv_irqfd_is_active(irqfd)) |
| mshv_irqfd_deactivate(irqfd); |
| |
| spin_unlock_irqrestore(&pt->pt_irqfds_lock, flags); |
| } |
| |
| return ret; |
| } |
| |
| /* Must be called under pt_irqfds_lock */ |
| static void mshv_irqfd_update(struct mshv_partition *pt, |
| struct mshv_irqfd *irqfd) |
| { |
| write_seqcount_begin(&irqfd->irqfd_irqe_sc); |
| irqfd->irqfd_girq_ent = mshv_ret_girq_entry(pt, |
| irqfd->irqfd_irqnum); |
| mshv_copy_girq_info(&irqfd->irqfd_girq_ent, &irqfd->irqfd_lapic_irq); |
| write_seqcount_end(&irqfd->irqfd_irqe_sc); |
| } |
| |
| void mshv_irqfd_routing_update(struct mshv_partition *pt) |
| { |
| struct mshv_irqfd *irqfd; |
| |
| spin_lock_irq(&pt->pt_irqfds_lock); |
| hlist_for_each_entry(irqfd, &pt->pt_irqfds_list, irqfd_hnode) |
| mshv_irqfd_update(pt, irqfd); |
| spin_unlock_irq(&pt->pt_irqfds_lock); |
| } |
| |
| static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh, |
| poll_table *polltbl) |
| { |
| struct mshv_irqfd *irqfd = |
| container_of(polltbl, struct mshv_irqfd, irqfd_polltbl); |
| |
| irqfd->irqfd_wqh = wqh; |
| |
| /* |
| * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. |
| * that the irqfd isn't already bound to another partition. Only the |
| * first exclusive waiter encountered will be notified, and |
| * add_wait_queue_priority() doesn't enforce exclusivity. |
| */ |
| irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE; |
| add_wait_queue_priority(wqh, &irqfd->irqfd_wait); |
| } |
| |
| static int mshv_irqfd_assign(struct mshv_partition *pt, |
| struct mshv_user_irqfd *args) |
| { |
| struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
| struct mshv_irqfd *irqfd, *tmp; |
| unsigned int events; |
| int ret; |
| int idx; |
| |
| CLASS(fd, f)(args->fd); |
| |
| irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); |
| if (!irqfd) |
| return -ENOMEM; |
| |
| irqfd->irqfd_partn = pt; |
| irqfd->irqfd_irqnum = args->gsi; |
| INIT_WORK(&irqfd->irqfd_shutdown, mshv_irqfd_shutdown); |
| seqcount_spinlock_init(&irqfd->irqfd_irqe_sc, &pt->pt_irqfds_lock); |
| |
| if (fd_empty(f)) { |
| ret = -EBADF; |
| goto out; |
| } |
| |
| eventfd = eventfd_ctx_fileget(fd_file(f)); |
| if (IS_ERR(eventfd)) { |
| ret = PTR_ERR(eventfd); |
| goto fail; |
| } |
| |
| irqfd->irqfd_eventfd_ctx = eventfd; |
| |
| if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE)) { |
| struct mshv_irqfd_resampler *rp; |
| |
| resamplefd = eventfd_ctx_fdget(args->resamplefd); |
| if (IS_ERR(resamplefd)) { |
| ret = PTR_ERR(resamplefd); |
| goto fail; |
| } |
| |
| irqfd->irqfd_resamplefd = resamplefd; |
| |
| mutex_lock(&pt->irqfds_resampler_lock); |
| |
| hlist_for_each_entry(rp, &pt->irqfds_resampler_list, |
| rsmplr_hnode) { |
| if (rp->rsmplr_notifier.irq_ack_gsi == |
| irqfd->irqfd_irqnum) { |
| irqfd->irqfd_resampler = rp; |
| break; |
| } |
| } |
| |
| if (!irqfd->irqfd_resampler) { |
| rp = kzalloc(sizeof(*rp), GFP_KERNEL_ACCOUNT); |
| if (!rp) { |
| ret = -ENOMEM; |
| mutex_unlock(&pt->irqfds_resampler_lock); |
| goto fail; |
| } |
| |
| rp->rsmplr_partn = pt; |
| INIT_HLIST_HEAD(&rp->rsmplr_irqfd_list); |
| rp->rsmplr_notifier.irq_ack_gsi = irqfd->irqfd_irqnum; |
| rp->rsmplr_notifier.irq_acked = |
| mshv_irqfd_resampler_ack; |
| |
| hlist_add_head(&rp->rsmplr_hnode, |
| &pt->irqfds_resampler_list); |
| mshv_register_irq_ack_notifier(pt, |
| &rp->rsmplr_notifier); |
| irqfd->irqfd_resampler = rp; |
| } |
| |
| hlist_add_head_rcu(&irqfd->irqfd_resampler_hnode, |
| &irqfd->irqfd_resampler->rsmplr_irqfd_list); |
| |
| mutex_unlock(&pt->irqfds_resampler_lock); |
| } |
| |
| /* |
| * Install our own custom wake-up handling so we are notified via |
| * a callback whenever someone signals the underlying eventfd |
| */ |
| init_waitqueue_func_entry(&irqfd->irqfd_wait, mshv_irqfd_wakeup); |
| init_poll_funcptr(&irqfd->irqfd_polltbl, mshv_irqfd_queue_proc); |
| |
| spin_lock_irq(&pt->pt_irqfds_lock); |
| if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE) && |
| !irqfd->irqfd_lapic_irq.lapic_control.level_triggered) { |
| /* |
| * Resample Fd must be for level triggered interrupt |
| * Otherwise return with failure |
| */ |
| spin_unlock_irq(&pt->pt_irqfds_lock); |
| ret = -EINVAL; |
| goto fail; |
| } |
| ret = 0; |
| hlist_for_each_entry(tmp, &pt->pt_irqfds_list, irqfd_hnode) { |
| if (irqfd->irqfd_eventfd_ctx != tmp->irqfd_eventfd_ctx) |
| continue; |
| /* This fd is used for another irq already. */ |
| ret = -EBUSY; |
| spin_unlock_irq(&pt->pt_irqfds_lock); |
| goto fail; |
| } |
| |
| idx = srcu_read_lock(&pt->pt_irq_srcu); |
| mshv_irqfd_update(pt, irqfd); |
| hlist_add_head(&irqfd->irqfd_hnode, &pt->pt_irqfds_list); |
| spin_unlock_irq(&pt->pt_irqfds_lock); |
| |
| /* |
| * Check if there was an event already pending on the eventfd |
| * before we registered, and trigger it as if we didn't miss it. |
| */ |
| events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl); |
| |
| if (events & POLLIN) |
| mshv_assert_irq_slow(irqfd); |
| |
| srcu_read_unlock(&pt->pt_irq_srcu, idx); |
| return 0; |
| |
| fail: |
| if (irqfd->irqfd_resampler) |
| mshv_irqfd_resampler_shutdown(irqfd); |
| |
| if (resamplefd && !IS_ERR(resamplefd)) |
| eventfd_ctx_put(resamplefd); |
| |
| if (eventfd && !IS_ERR(eventfd)) |
| eventfd_ctx_put(eventfd); |
| |
| out: |
| kfree(irqfd); |
| return ret; |
| } |
| |
| /* |
| * shutdown any irqfd's that match fd+gsi |
| */ |
| static int mshv_irqfd_deassign(struct mshv_partition *pt, |
| struct mshv_user_irqfd *args) |
| { |
| struct mshv_irqfd *irqfd; |
| struct hlist_node *n; |
| struct eventfd_ctx *eventfd; |
| |
| eventfd = eventfd_ctx_fdget(args->fd); |
| if (IS_ERR(eventfd)) |
| return PTR_ERR(eventfd); |
| |
| hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, |
| irqfd_hnode) { |
| if (irqfd->irqfd_eventfd_ctx == eventfd && |
| irqfd->irqfd_irqnum == args->gsi) |
| |
| mshv_irqfd_deactivate(irqfd); |
| } |
| |
| eventfd_ctx_put(eventfd); |
| |
| /* |
| * Block until we know all outstanding shutdown jobs have completed |
| * so that we guarantee there will not be any more interrupts on this |
| * gsi once this deassign function returns. |
| */ |
| flush_workqueue(irqfd_cleanup_wq); |
| |
| return 0; |
| } |
| |
| int mshv_set_unset_irqfd(struct mshv_partition *pt, |
| struct mshv_user_irqfd *args) |
| { |
| if (args->flags & ~MSHV_IRQFD_FLAGS_MASK) |
| return -EINVAL; |
| |
| if (args->flags & BIT(MSHV_IRQFD_BIT_DEASSIGN)) |
| return mshv_irqfd_deassign(pt, args); |
| |
| return mshv_irqfd_assign(pt, args); |
| } |
| |
| /* |
| * This function is called as the mshv VM fd is being released. |
| * Shutdown all irqfds that still remain open |
| */ |
| static void mshv_irqfd_release(struct mshv_partition *pt) |
| { |
| struct mshv_irqfd *irqfd; |
| struct hlist_node *n; |
| |
| spin_lock_irq(&pt->pt_irqfds_lock); |
| |
| hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, irqfd_hnode) |
| mshv_irqfd_deactivate(irqfd); |
| |
| spin_unlock_irq(&pt->pt_irqfds_lock); |
| |
| /* |
| * Block until we know all outstanding shutdown jobs have completed |
| * since we do not take a mshv_partition* reference. |
| */ |
| flush_workqueue(irqfd_cleanup_wq); |
| } |
| |
| int mshv_irqfd_wq_init(void) |
| { |
| irqfd_cleanup_wq = alloc_workqueue("mshv-irqfd-cleanup", 0, 0); |
| if (!irqfd_cleanup_wq) |
| return -ENOMEM; |
| |
| return 0; |
| } |
| |
| void mshv_irqfd_wq_cleanup(void) |
| { |
| destroy_workqueue(irqfd_cleanup_wq); |
| } |
| |
| /* |
| * -------------------------------------------------------------------- |
| * ioeventfd: translate a MMIO memory write to an eventfd signal. |
| * |
| * userspace can register a MMIO address with an eventfd for receiving |
| * notification when the memory has been touched. |
| * -------------------------------------------------------------------- |
| */ |
| |
| static void ioeventfd_release(struct mshv_ioeventfd *p, u64 partition_id) |
| { |
| if (p->iovntfd_doorbell_id > 0) |
| mshv_unregister_doorbell(partition_id, p->iovntfd_doorbell_id); |
| eventfd_ctx_put(p->iovntfd_eventfd); |
| kfree(p); |
| } |
| |
| /* MMIO writes trigger an event if the addr/val match */ |
| static void ioeventfd_mmio_write(int doorbell_id, void *data) |
| { |
| struct mshv_partition *partition = (struct mshv_partition *)data; |
| struct mshv_ioeventfd *p; |
| |
| rcu_read_lock(); |
| hlist_for_each_entry_rcu(p, &partition->ioeventfds_list, iovntfd_hnode) |
| if (p->iovntfd_doorbell_id == doorbell_id) { |
| eventfd_signal(p->iovntfd_eventfd); |
| break; |
| } |
| |
| rcu_read_unlock(); |
| } |
| |
| static bool ioeventfd_check_collision(struct mshv_partition *pt, |
| struct mshv_ioeventfd *p) |
| __must_hold(&pt->mutex) |
| { |
| struct mshv_ioeventfd *_p; |
| |
| hlist_for_each_entry(_p, &pt->ioeventfds_list, iovntfd_hnode) |
| if (_p->iovntfd_addr == p->iovntfd_addr && |
| _p->iovntfd_length == p->iovntfd_length && |
| (_p->iovntfd_wildcard || p->iovntfd_wildcard || |
| _p->iovntfd_datamatch == p->iovntfd_datamatch)) |
| return true; |
| |
| return false; |
| } |
| |
| static int mshv_assign_ioeventfd(struct mshv_partition *pt, |
| struct mshv_user_ioeventfd *args) |
| __must_hold(&pt->mutex) |
| { |
| struct mshv_ioeventfd *p; |
| struct eventfd_ctx *eventfd; |
| u64 doorbell_flags = 0; |
| int ret; |
| |
| /* This mutex is currently protecting ioeventfd.items list */ |
| WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex)); |
| |
| if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO)) |
| return -EOPNOTSUPP; |
| |
| /* must be natural-word sized */ |
| switch (args->len) { |
| case 0: |
| doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY; |
| break; |
| case 1: |
| doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE; |
| break; |
| case 2: |
| doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD; |
| break; |
| case 4: |
| doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD; |
| break; |
| case 8: |
| doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD; |
| break; |
| default: |
| return -EINVAL; |
| } |
| |
| /* check for range overflow */ |
| if (args->addr + args->len < args->addr) |
| return -EINVAL; |
| |
| /* check for extra flags that we don't understand */ |
| if (args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK) |
| return -EINVAL; |
| |
| eventfd = eventfd_ctx_fdget(args->fd); |
| if (IS_ERR(eventfd)) |
| return PTR_ERR(eventfd); |
| |
| p = kzalloc(sizeof(*p), GFP_KERNEL); |
| if (!p) { |
| ret = -ENOMEM; |
| goto fail; |
| } |
| |
| p->iovntfd_addr = args->addr; |
| p->iovntfd_length = args->len; |
| p->iovntfd_eventfd = eventfd; |
| |
| /* The datamatch feature is optional, otherwise this is a wildcard */ |
| if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)) { |
| p->iovntfd_datamatch = args->datamatch; |
| } else { |
| p->iovntfd_wildcard = true; |
| doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE; |
| } |
| |
| if (ioeventfd_check_collision(pt, p)) { |
| ret = -EEXIST; |
| goto unlock_fail; |
| } |
| |
| ret = mshv_register_doorbell(pt->pt_id, ioeventfd_mmio_write, |
| (void *)pt, p->iovntfd_addr, |
| p->iovntfd_datamatch, doorbell_flags); |
| if (ret < 0) |
| goto unlock_fail; |
| |
| p->iovntfd_doorbell_id = ret; |
| |
| hlist_add_head_rcu(&p->iovntfd_hnode, &pt->ioeventfds_list); |
| |
| return 0; |
| |
| unlock_fail: |
| kfree(p); |
| |
| fail: |
| eventfd_ctx_put(eventfd); |
| |
| return ret; |
| } |
| |
| static int mshv_deassign_ioeventfd(struct mshv_partition *pt, |
| struct mshv_user_ioeventfd *args) |
| __must_hold(&pt->mutex) |
| { |
| struct mshv_ioeventfd *p; |
| struct eventfd_ctx *eventfd; |
| struct hlist_node *n; |
| int ret = -ENOENT; |
| |
| /* This mutex is currently protecting ioeventfd.items list */ |
| WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex)); |
| |
| eventfd = eventfd_ctx_fdget(args->fd); |
| if (IS_ERR(eventfd)) |
| return PTR_ERR(eventfd); |
| |
| hlist_for_each_entry_safe(p, n, &pt->ioeventfds_list, iovntfd_hnode) { |
| bool wildcard = !(args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)); |
| |
| if (p->iovntfd_eventfd != eventfd || |
| p->iovntfd_addr != args->addr || |
| p->iovntfd_length != args->len || |
| p->iovntfd_wildcard != wildcard) |
| continue; |
| |
| if (!p->iovntfd_wildcard && |
| p->iovntfd_datamatch != args->datamatch) |
| continue; |
| |
| hlist_del_rcu(&p->iovntfd_hnode); |
| synchronize_rcu(); |
| ioeventfd_release(p, pt->pt_id); |
| ret = 0; |
| break; |
| } |
| |
| eventfd_ctx_put(eventfd); |
| |
| return ret; |
| } |
| |
| int mshv_set_unset_ioeventfd(struct mshv_partition *pt, |
| struct mshv_user_ioeventfd *args) |
| __must_hold(&pt->mutex) |
| { |
| if ((args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK) || |
| mshv_field_nonzero(*args, rsvd)) |
| return -EINVAL; |
| |
| /* PIO not yet implemented */ |
| if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO)) |
| return -EOPNOTSUPP; |
| |
| if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DEASSIGN)) |
| return mshv_deassign_ioeventfd(pt, args); |
| |
| return mshv_assign_ioeventfd(pt, args); |
| } |
| |
| void mshv_eventfd_init(struct mshv_partition *pt) |
| { |
| spin_lock_init(&pt->pt_irqfds_lock); |
| INIT_HLIST_HEAD(&pt->pt_irqfds_list); |
| |
| INIT_HLIST_HEAD(&pt->irqfds_resampler_list); |
| mutex_init(&pt->irqfds_resampler_lock); |
| |
| INIT_HLIST_HEAD(&pt->ioeventfds_list); |
| } |
| |
| void mshv_eventfd_release(struct mshv_partition *pt) |
| { |
| struct hlist_head items; |
| struct hlist_node *n; |
| struct mshv_ioeventfd *p; |
| |
| hlist_move_list(&pt->ioeventfds_list, &items); |
| synchronize_rcu(); |
| |
| hlist_for_each_entry_safe(p, n, &items, iovntfd_hnode) { |
| hlist_del(&p->iovntfd_hnode); |
| ioeventfd_release(p, pt->pt_id); |
| } |
| |
| mshv_irqfd_release(pt); |
| } |