From 5324953c06bd929c135d9e04be391ee2c11b5a19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Feb 2026 17:33:29 +0100 Subject: [PATCH] sched/core: Fix wakeup_preempt's next_class tracking Kernel test robot reported that tools/testing/selftests/kvm/hardware_disable_test was failing due to commit 704069649b5b ("sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()") It turns out there were two related problems that could lead to a missed preemption: - when hitting newidle balance from the idle thread, it would elevate rb->next_class from &idle_sched_class to &fair_sched_class, causing later wakeup_preempt() calls to not hit the sched_class_above() case, and not issue resched_curr(). Notably, this modification pattern should only lower the next_class, and never raise it. Create two new helper functions to wrap this. - when doing schedule_idle(), it was possible to miss (re)setting rq->next_class to &idle_sched_class, leading to the very same problem. Cc: Sean Christopherson Fixes: 704069649b5b ("sched/core: Rework sched_class::wakeup_preempt() and rq_modified_*()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202602122157.4e861298-lkp@intel.com Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260218163329.GQ1395416@noisy.programming.kicks-ass.net --- kernel/sched/core.c | 1 + kernel/sched/ext.c | 4 ++-- kernel/sched/fair.c | 4 ++-- kernel/sched/sched.h | 11 +++++++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 759777694c78..b7f77c165a6e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6830,6 +6830,7 @@ static void __sched notrace __schedule(int sched_mode) /* SCX must consult the BPF scheduler to tell if rq is empty */ if (!rq->nr_running && !scx_enabled()) { next = prev; + rq->next_class = &idle_sched_class; goto picked; } } else if (!preempt && prev_state) { diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 62b1f3ac5630..06cc0a4aec66 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2460,7 +2460,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) /* see kick_cpus_irq_workfn() */ smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1); - rq->next_class = &ext_sched_class; + rq_modified_begin(rq, &ext_sched_class); rq_unpin_lock(rq, rf); balance_one(rq, prev); @@ -2475,7 +2475,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) * If @force_scx is true, always try to pick a SCHED_EXT task, * regardless of any higher-priority sched classes activity. */ - if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class)) + if (!force_scx && rq_modified_above(rq, &ext_sched_class)) return RETRY_TASK; keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f4446cbe8ffa..bf948db905ed 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12982,7 +12982,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) t0 = sched_clock_cpu(this_cpu); __sched_balance_update_blocked_averages(this_rq); - this_rq->next_class = &fair_sched_class; + rq_modified_begin(this_rq, &fair_sched_class); raw_spin_rq_unlock(this_rq); for_each_domain(this_cpu, sd) { @@ -13049,7 +13049,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) pulled_task = 1; /* If a higher prio class was modified, restart the pick */ - if (sched_class_above(this_rq->next_class, &fair_sched_class)) + if (rq_modified_above(this_rq, &fair_sched_class)) pulled_task = -1; out: diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b82fb70a9d54..43bbf0693cca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2748,6 +2748,17 @@ static inline const struct sched_class *next_active_class(const struct sched_cla #define sched_class_above(_a, _b) ((_a) < (_b)) +static inline void rq_modified_begin(struct rq *rq, const struct sched_class *class) +{ + if (sched_class_above(rq->next_class, class)) + rq->next_class = class; +} + +static inline bool rq_modified_above(struct rq *rq, const struct sched_class *class) +{ + return sched_class_above(rq->next_class, class); +} + static inline bool sched_stop_runnable(struct rq *rq) { return rq->stop && task_on_rq_queued(rq->stop);