diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7b7c5269cf18..170a67aad983 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -245,34 +245,14 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) return ret; } -struct kvm_irqfd_pt { - struct kvm_kernel_irqfd *irqfd; - poll_table pt; -}; - -static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh, - poll_table *pt) -{ - struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt); - struct kvm_kernel_irqfd *irqfd = p->irqfd; - - /* - * Add the irqfd as a priority waiter on the eventfd, with a custom - * wake-up handler, so that KVM *and only KVM* is notified whenever the - * underlying eventfd is signaled. - */ - init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); - - add_wait_queue_priority(wqh, &irqfd->wait); -} - -/* Must be called under irqfds.lock */ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int n_entries; + lockdep_assert_held(&kvm->irqfds.lock); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -286,6 +266,49 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } +struct kvm_irqfd_pt { + struct kvm_kernel_irqfd *irqfd; + struct kvm *kvm; + poll_table pt; + int ret; +}; + +static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt); + struct kvm_kernel_irqfd *irqfd = p->irqfd; + struct kvm_kernel_irqfd *tmp; + struct kvm *kvm = p->kvm; + + spin_lock_irq(&kvm->irqfds.lock); + + list_for_each_entry(tmp, &kvm->irqfds.items, list) { + if (irqfd->eventfd != tmp->eventfd) + continue; + /* This fd is used for another irq already. */ + p->ret = -EBUSY; + spin_unlock_irq(&kvm->irqfds.lock); + return; + } + + irqfd_update(kvm, irqfd); + + list_add_tail(&irqfd->list, &kvm->irqfds.items); + + spin_unlock_irq(&kvm->irqfds.lock); + + /* + * Add the irqfd as a priority waiter on the eventfd, with a custom + * wake-up handler, so that KVM *and only KVM* is notified whenever the + * underlying eventfd is signaled. + */ + init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); + + add_wait_queue_priority(wqh, &irqfd->wait); + p->ret = 0; +} + #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) void __attribute__((weak)) kvm_arch_irq_bypass_stop( struct irq_bypass_consumer *cons) @@ -308,7 +331,7 @@ void __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_kernel_irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; struct kvm_irqfd_pt irqfd_pt; int ret; @@ -407,32 +430,22 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) */ idx = srcu_read_lock(&kvm->irq_srcu); - spin_lock_irq(&kvm->irqfds.lock); - - ret = 0; - list_for_each_entry(tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd != tmp->eventfd) - continue; - /* This fd is used for another irq already. */ - ret = -EBUSY; - goto fail_duplicate; - } - - irqfd_update(kvm, irqfd); - - list_add_tail(&irqfd->list, &kvm->irqfds.items); - - spin_unlock_irq(&kvm->irqfds.lock); - /* - * Register the irqfd with the eventfd by polling on the eventfd. If - * there was en event pending on the eventfd prior to registering, - * manually trigger IRQ injection. + * Register the irqfd with the eventfd by polling on the eventfd, and + * simultaneously and the irqfd to KVM's list. If there was en event + * pending on the eventfd prior to registering, manually trigger IRQ + * injection. */ irqfd_pt.irqfd = irqfd; + irqfd_pt.kvm = kvm; init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register); events = vfs_poll(fd_file(f), &irqfd_pt.pt); + + ret = irqfd_pt.ret; + if (ret) + goto fail_poll; + if (events & EPOLLIN) schedule_work(&irqfd->inject); @@ -452,8 +465,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) srcu_read_unlock(&kvm->irq_srcu, idx); return 0; -fail_duplicate: - spin_unlock_irq(&kvm->irqfds.lock); +fail_poll: srcu_read_unlock(&kvm->irq_srcu, idx); fail: if (irqfd->resampler)