mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:24:47 +01:00
sched_ext: Fix starvation of scx_enable() under fair-class saturation
During scx_enable(), the READY -> ENABLED task switching loop changes the
calling thread's sched_class from fair to ext. Since fair has higher
priority than ext, saturating fair-class workloads can indefinitely starve
the enable thread, hanging the system. This was introduced when the enable
path switched from preempt_disable() to scx_bypass() which doesn't protect
against fair-class starvation. Note that the original preempt_disable()
protection wasn't complete either - in partial switch modes, the calling
thread could still be starved after preempt_enable() as it may have been
switched to ext class.
Fix it by offloading the enable body to a dedicated system-wide RT
(SCHED_FIFO) kthread which cannot be starved by either fair or ext class
tasks. scx_enable() lazily creates the kthread on first use and passes the
ops pointer through a struct scx_enable_cmd containing the kthread_work,
then synchronously waits for completion.
The workfn runs on a different kthread from sch->helper (which runs
disable_work), so it can safely flush disable_work on the error path
without deadlock.
Fixes: 8c2090c504 ("sched_ext: Initialize in bypass mode")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
1336b579f6
commit
b06ccbabe2
1 changed files with 56 additions and 10 deletions
|
|
@ -4975,20 +4975,30 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
/*
|
||||
* scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid
|
||||
* starvation. During the READY -> ENABLED task switching loop, the calling
|
||||
* thread's sched_class gets switched from fair to ext. As fair has higher
|
||||
* priority than ext, the calling thread can be indefinitely starved under
|
||||
* fair-class saturation, leading to a system hang.
|
||||
*/
|
||||
struct scx_enable_cmd {
|
||||
struct kthread_work work;
|
||||
struct sched_ext_ops *ops;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void scx_enable_workfn(struct kthread_work *work)
|
||||
{
|
||||
struct scx_enable_cmd *cmd =
|
||||
container_of(work, struct scx_enable_cmd, work);
|
||||
struct sched_ext_ops *ops = cmd->ops;
|
||||
struct scx_sched *sch;
|
||||
struct scx_task_iter sti;
|
||||
struct task_struct *p;
|
||||
unsigned long timeout;
|
||||
int i, cpu, ret;
|
||||
|
||||
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
|
||||
cpu_possible_mask)) {
|
||||
pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&scx_enable_mutex);
|
||||
|
||||
if (scx_enable_state() != SCX_DISABLED) {
|
||||
|
|
@ -5205,13 +5215,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||
|
||||
atomic_long_inc(&scx_enable_seq);
|
||||
|
||||
return 0;
|
||||
cmd->ret = 0;
|
||||
return;
|
||||
|
||||
err_free_ksyncs:
|
||||
free_kick_syncs();
|
||||
err_unlock:
|
||||
mutex_unlock(&scx_enable_mutex);
|
||||
return ret;
|
||||
cmd->ret = ret;
|
||||
return;
|
||||
|
||||
err_disable_unlock_all:
|
||||
scx_cgroup_unlock();
|
||||
|
|
@ -5230,7 +5242,41 @@ err_disable:
|
|||
*/
|
||||
scx_error(sch, "scx_enable() failed (%d)", ret);
|
||||
kthread_flush_work(&sch->disable_work);
|
||||
return 0;
|
||||
cmd->ret = 0;
|
||||
}
|
||||
|
||||
static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
||||
{
|
||||
static struct kthread_worker *helper;
|
||||
static DEFINE_MUTEX(helper_mutex);
|
||||
struct scx_enable_cmd cmd;
|
||||
|
||||
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
|
||||
cpu_possible_mask)) {
|
||||
pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!READ_ONCE(helper)) {
|
||||
mutex_lock(&helper_mutex);
|
||||
if (!helper) {
|
||||
helper = kthread_run_worker(0, "scx_enable_helper");
|
||||
if (IS_ERR_OR_NULL(helper)) {
|
||||
helper = NULL;
|
||||
mutex_unlock(&helper_mutex);
|
||||
return -ENOMEM;
|
||||
}
|
||||
sched_set_fifo(helper->task);
|
||||
}
|
||||
mutex_unlock(&helper_mutex);
|
||||
}
|
||||
|
||||
kthread_init_work(&cmd.work, scx_enable_workfn);
|
||||
cmd.ops = ops;
|
||||
|
||||
kthread_queue_work(READ_ONCE(helper), &cmd.work);
|
||||
kthread_flush_work(&cmd.work);
|
||||
return cmd.ret;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue