mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
sched/wait: Drop WQ_FLAG_EXCLUSIVE from add_wait_queue_priority()
Drop the setting of WQ_FLAG_EXCLUSIVE from add_wait_queue_priority() and instead have callers manually add the flag prior to adding their structure to the queue. Blindly setting WQ_FLAG_EXCLUSIVE is flawed, as the nature of exclusive, priority waiters means that only the first waiter added will ever receive notifications. Pushing the flawed behavior to callers will allow fixing the problem one hypervisor at a time (KVM added the flawed API, and then KVM's code was copy+pasted nearly verbatim by Xen and Hyper-V), and will also allow for adding an API that provides true exclusivity, i.e. that guarantees at most one priority waiter is in the queue. Opportunistically add a comment in Hyper-V to call out the mess. Xen privcmd's irqfd_wakefup() doesn't actually operate in exclusive mode, i.e. can be "fixed" simply by dropping WQ_FLAG_EXCLUSIVE. And KVM is primed to switch to the aforementioned fully exclusive API, i.e. won't be carrying the flawed code for long. No functional change intended. Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250522235223.3178519-7-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
parent
86e00cd162
commit
867347bb21
4 changed files with 12 additions and 2 deletions
|
|
@ -368,6 +368,14 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
|||
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
|
||||
|
||||
irqfd->irqfd_wqh = wqh;
|
||||
|
||||
/*
|
||||
* TODO: Ensure there isn't already an exclusive, priority waiter, e.g.
|
||||
* that the irqfd isn't already bound to another partition. Only the
|
||||
* first exclusive waiter encountered will be notified, and
|
||||
* add_wait_queue_priority() doesn't enforce exclusivity.
|
||||
*/
|
||||
irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||
add_wait_queue_priority(wqh, &irqfd->irqfd_wait);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -957,6 +957,7 @@ irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
|
|||
struct privcmd_kernel_irqfd *kirqfd =
|
||||
container_of(pt, struct privcmd_kernel_irqfd, pt);
|
||||
|
||||
kirqfd->wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||
add_wait_queue_priority(wqh, &kirqfd->wait);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
|
|||
{
|
||||
unsigned long flags;
|
||||
|
||||
wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
|
||||
wq_entry->flags |= WQ_FLAG_PRIORITY;
|
||||
spin_lock_irqsave(&wq_head->lock, flags);
|
||||
__add_wait_queue(wq_head, wq_entry);
|
||||
spin_unlock_irqrestore(&wq_head->lock, flags);
|
||||
|
|
@ -64,7 +64,7 @@ EXPORT_SYMBOL(remove_wait_queue);
|
|||
* the non-exclusive tasks. Normally, exclusive tasks will be at the end of
|
||||
* the list and any non-exclusive tasks will be woken first. A priority task
|
||||
* may be at the head of the list, and can consume the event without any other
|
||||
* tasks being woken.
|
||||
* tasks being woken if it's also an exclusive task.
|
||||
*
|
||||
* There are circumstances in which we can try to wake a task which has already
|
||||
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
|
||||
|
|
|
|||
|
|
@ -316,6 +316,7 @@ static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
|
|||
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
|
||||
|
||||
spin_release(&kvm->irqfds.lock.dep_map, _RET_IP_);
|
||||
irqfd->wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||
add_wait_queue_priority(wqh, &irqfd->wait);
|
||||
spin_acquire(&kvm->irqfds.lock.dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue