diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c index 8dd22be2ca0b..b348928871c2 100644 --- a/drivers/hv/mshv_eventfd.c +++ b/drivers/hv/mshv_eventfd.c @@ -368,6 +368,14 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh, container_of(polltbl, struct mshv_irqfd, irqfd_polltbl); irqfd->irqfd_wqh = wqh; + + /* + * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. + * that the irqfd isn't already bound to another partition. Only the + * first exclusive waiter encountered will be notified, and + * add_wait_queue_priority() doesn't enforce exclusivity. + */ + irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE; add_wait_queue_priority(wqh, &irqfd->irqfd_wait); } diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 13a10f3294a8..c08ec8a7d27c 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -957,6 +957,7 @@ irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) struct privcmd_kernel_irqfd *kirqfd = container_of(pt, struct privcmd_kernel_irqfd, pt); + kirqfd->wait.flags |= WQ_FLAG_EXCLUSIVE; add_wait_queue_priority(wqh, &kirqfd->wait); } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 51e38f5f4701..4ab3ab195277 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -40,7 +40,7 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ { unsigned long flags; - wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY; + wq_entry->flags |= WQ_FLAG_PRIORITY; spin_lock_irqsave(&wq_head->lock, flags); __add_wait_queue(wq_head, wq_entry); spin_unlock_irqrestore(&wq_head->lock, flags); @@ -64,7 +64,7 @@ EXPORT_SYMBOL(remove_wait_queue); * the non-exclusive tasks. Normally, exclusive tasks will be at the end of * the list and any non-exclusive tasks will be woken first. A priority task * may be at the head of the list, and can consume the event without any other - * tasks being woken. + * tasks being woken if it's also an exclusive task. * * There are circumstances in which we can try to wake a task which has already * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a9a13f919de8..f8c2486f95d5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -316,6 +316,7 @@ static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh, init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); spin_release(&kvm->irqfds.lock.dep_map, _RET_IP_); + irqfd->wait.flags |= WQ_FLAG_EXCLUSIVE; add_wait_queue_priority(wqh, &irqfd->wait); spin_acquire(&kvm->irqfds.lock.dep_map, 0, 0, _RET_IP_);